forked from DebaucheryLibrarian/traxxx
Added support for Family Strokes.
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
const util = require('util');
|
||||
const winston = require('winston');
|
||||
const args = require('./argv');
|
||||
require('winston-daily-rotate-file');
|
||||
|
||||
const logger = winston.createLogger({
|
||||
format: winston.format.combine(
|
||||
@@ -19,6 +20,11 @@ const logger = winston.createLogger({
|
||||
),
|
||||
timestamp: true,
|
||||
}),
|
||||
new winston.transports.DailyRotateFile({
|
||||
datePattern: 'YYYY-MM-DD',
|
||||
filename: 'log/%DATE%.log',
|
||||
level: 'silly',
|
||||
}),
|
||||
],
|
||||
});
|
||||
|
||||
|
||||
@@ -320,7 +320,6 @@ async function storeReleaseAssets(releases) {
|
||||
|
||||
await createMediaDirectory('releases', subpath);
|
||||
|
||||
try {
|
||||
// don't use Promise.all to prevent concurrency issues with duplicate detection
|
||||
if (release.poster) {
|
||||
await storePhotos([release.poster], {
|
||||
@@ -346,9 +345,6 @@ async function storeReleaseAssets(releases) {
|
||||
targetId: release.id,
|
||||
subpath,
|
||||
}, identifier);
|
||||
} catch (error) {
|
||||
console.log(release.url, error);
|
||||
}
|
||||
}, {
|
||||
concurrency: 10,
|
||||
});
|
||||
@@ -409,7 +405,7 @@ async function storeReleases(releases) {
|
||||
...releaseWithChannelSite,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(error);
|
||||
logger.error(error.message);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -79,7 +79,7 @@ async function deepFetchReleases(baseReleases) {
|
||||
deep: true,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(error);
|
||||
logger.error(error.message);
|
||||
|
||||
return {
|
||||
...release,
|
||||
|
||||
@@ -6,15 +6,6 @@ const { JSDOM } = require('jsdom');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
|
||||
const { matchTags } = require('../tags');
|
||||
|
||||
const defaultTags = {
|
||||
swallowed: ['blowjob', 'deepthroat', 'facefuck'],
|
||||
trueanal: ['anal'],
|
||||
allanal: ['anal', 'fmf'],
|
||||
nympho: [],
|
||||
};
|
||||
|
||||
const descriptionTags = {
|
||||
'anal cream pie': 'anal creampie',
|
||||
'ass to mouth': 'ass to mouth',
|
||||
@@ -55,7 +46,7 @@ async function scrapeLatestA(html, site) {
|
||||
const actors = Array.from(element.querySelectorAll('h4.models a'), actorElement => actorElement.textContent);
|
||||
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is somethines 00:00, sometimes 0:00:00
|
||||
// timestamp is sometimes 00:00, sometimes 0:00:00
|
||||
const duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
@@ -70,7 +61,7 @@ async function scrapeLatestA(html, site) {
|
||||
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
|
||||
|
||||
const photos = [...primaryPhotos, ...secondaryPhotos];
|
||||
const tags = await matchTags([...defaultTags[site.slug], ...deriveTagsFromDescription(description)]);
|
||||
const tags = deriveTagsFromDescription(description);
|
||||
|
||||
const scene = {
|
||||
url,
|
||||
@@ -124,7 +115,7 @@ async function scrapeLatestB(html, site) {
|
||||
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
|
||||
|
||||
const photos = [...primaryPhotos, ...secondaryPhotos];
|
||||
const tags = await matchTags([...defaultTags[site.slug], ...deriveTagsFromDescription(description)]);
|
||||
const tags = deriveTagsFromDescription(description);
|
||||
|
||||
return {
|
||||
url,
|
||||
@@ -155,7 +146,7 @@ async function scrapeSceneA(html, url, site) {
|
||||
const actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
|
||||
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is somethines 00:00, sometimes 0:00:00
|
||||
// timestamp is sometimes 00:00, sometimes 0:00:00
|
||||
const duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
@@ -163,7 +154,7 @@ async function scrapeSceneA(html, url, site) {
|
||||
const { poster } = document.querySelector('.content-page-header video');
|
||||
const { src, type } = document.querySelector('.content-page-header source');
|
||||
|
||||
const tags = await matchTags([...defaultTags[site.slug], ...deriveTagsFromDescription(description)]);
|
||||
const tags = deriveTagsFromDescription(description);
|
||||
|
||||
return {
|
||||
url,
|
||||
@@ -204,7 +195,7 @@ async function scrapeSceneB(html, url, site) {
|
||||
const { poster } = document.querySelector('.content-page-header-inner video');
|
||||
const { src, type } = document.querySelector('.content-page-header-inner source');
|
||||
|
||||
const tags = await matchTags([...defaultTags[site.slug], ...deriveTagsFromDescription(description)]);
|
||||
const tags = deriveTagsFromDescription(description);
|
||||
|
||||
const scene = {
|
||||
url,
|
||||
|
||||
@@ -30,7 +30,7 @@ async function scrapeProfile(html, _url, actorName) {
|
||||
|
||||
if (descriptionString) profile.description = descriptionString.textContent;
|
||||
|
||||
if (bio.Birthday) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate();
|
||||
if (bio.Birthday && !/-0001/.test(bio.Birthday)) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); // birthyear sometimes -0001, see Spencer Bradley as of january 2020
|
||||
if (bio.Born) profile.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
|
||||
|
||||
profile.birthPlace = bio['Birth Place'] || bio.Birthplace;
|
||||
|
||||
@@ -15,7 +15,7 @@ function extractTitle(pathname) {
|
||||
|
||||
function extractActors(str) {
|
||||
return str
|
||||
.split(/,|\band/)
|
||||
.split(/,|\band\b/ig)
|
||||
.filter(actor => !/\.{3}/.test(actor))
|
||||
.map(actor => actor.trim())
|
||||
.filter(actor => actor.length > 0);
|
||||
@@ -81,7 +81,54 @@ function scrapeScene(html, site) {
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
function scrapeSceneA(html, site, sceneX, url) {
|
||||
const scene = sceneX || new JSDOM(html).window.document;
|
||||
const release = { site };
|
||||
|
||||
release.description = scene.querySelector('.scene-story').textContent.replace('...read more', '...').trim();
|
||||
|
||||
release.date = moment.utc(scene.querySelector('.scene-date').textContent, 'MM/DD/YYYY').toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.starring span'), el => extractActors(el.textContent)).flat();
|
||||
|
||||
const durationString = scene.querySelector('.time').textContent.trim();
|
||||
const duration = ['00'].concat(durationString.split(':')).slice(-3).join(':'); // ensure hh:mm:ss
|
||||
release.duration = moment.duration(duration).asSeconds();
|
||||
|
||||
if (sceneX) {
|
||||
const titleEl = scene.querySelector(':scope > a');
|
||||
|
||||
release.url = titleEl.href;
|
||||
release.entryId = titleEl.id;
|
||||
release.title = titleEl.title;
|
||||
|
||||
const [poster, ...photos] = Array.from(scene.querySelectorAll('.scene img'), el => el.src);
|
||||
release.poster = [poster.replace('bio_big', 'video'), poster];
|
||||
release.photos = photos;
|
||||
}
|
||||
|
||||
if (!sceneX) {
|
||||
release.title = scene.querySelector('.title span').textContent;
|
||||
release.url = url;
|
||||
|
||||
release.poster = scene.querySelector('video').poster;
|
||||
release.photos = [release.poster.replace('video', 'bio_small'), release.poster.replace('video', 'bio_small2')];
|
||||
}
|
||||
|
||||
const [, entryIdA, entryIdB] = new URL(release.url).pathname.split('/');
|
||||
release.entryId = entryIdA === 'scenes' ? entryIdB : entryIdA;
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeLatestA(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const scenes = Array.from(document.querySelectorAll('.scenewrapper'));
|
||||
|
||||
return scenes.map(scene => scrapeSceneA(null, site, scene));
|
||||
}
|
||||
|
||||
async function fetchLatestTeamSkeet(site, page = 1) {
|
||||
const url = `https://www.teamskeet.com/t1/updates/load?fltrs[site]=${site.parameters.id}&page=${page}&view=newest&fltrs[time]=ALL&order=DESC`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
@@ -92,10 +139,37 @@ async function fetchLatest(site, page = 1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchLatestA(site) {
|
||||
const url = `${site.url}/scenes`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatestA(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
if (site.parameters.id) {
|
||||
return fetchLatestTeamSkeet(site, page);
|
||||
}
|
||||
|
||||
if (site.parameters.scraper === 'A') {
|
||||
return fetchLatestA(site, page);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const session = bhttp.session(); // resolve redirects
|
||||
const res = await session.get(url);
|
||||
|
||||
if (site.parameters.scraper === 'A') {
|
||||
return scrapeSceneA(res.body.toString(), site, null, url);
|
||||
}
|
||||
|
||||
return scrapeScene(res.body.toString(), site);
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,11 @@ const knex = require('./knex');
|
||||
const whereOr = require('./utils/where-or');
|
||||
|
||||
async function curateSite(site, includeParameters = false) {
|
||||
const tags = await knex('sites_tags')
|
||||
.select('tags.*', 'sites_tags.inherit')
|
||||
.where('site_id', site.id)
|
||||
.join('tags', 'tags.id', 'sites_tags.tag_id');
|
||||
|
||||
const parameters = JSON.parse(site.parameters);
|
||||
|
||||
return {
|
||||
@@ -16,6 +21,7 @@ async function curateSite(site, includeParameters = false) {
|
||||
url: site.url,
|
||||
description: site.description,
|
||||
slug: site.slug,
|
||||
tags,
|
||||
independent: !!parameters && parameters.independent,
|
||||
parameters: includeParameters ? parameters : null,
|
||||
network: {
|
||||
@@ -55,7 +61,7 @@ function destructConfigNetworks(networks) {
|
||||
|
||||
async function findSiteByUrl(url) {
|
||||
const { hostname } = new URL(url);
|
||||
const domain = hostname.replace(/^www./, '');
|
||||
const domain = hostname.replace(/www.|tour./, '');
|
||||
|
||||
const site = await knex('sites')
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
|
||||
16
src/tags.js
16
src/tags.js
@@ -42,6 +42,7 @@ async function matchTags(rawTags) {
|
||||
const tagEntries = await knex('tags')
|
||||
.pluck('aliases.id')
|
||||
.whereIn('tags.name', tags)
|
||||
.orWhereIn('tags.slug', tags)
|
||||
.where(function where() {
|
||||
this
|
||||
.whereNull('tags.alias_for')
|
||||
@@ -58,15 +59,20 @@ async function matchTags(rawTags) {
|
||||
}
|
||||
|
||||
async function associateTags(release, releaseId) {
|
||||
if (!release.tags || release.tags.length === 0) {
|
||||
const siteTags = release.site.tags.filter(tag => tag.inherit === true).map(tag => tag.id);
|
||||
|
||||
const rawReleaseTags = release.tags || [];
|
||||
const releaseTags = rawReleaseTags.some(tag => typeof tag === 'string')
|
||||
? await matchTags(release.tags) // scraper returned raw tags
|
||||
: rawReleaseTags; // tags already matched by (outdated) scraper
|
||||
|
||||
const tags = releaseTags.concat(siteTags);
|
||||
|
||||
if (tags.length === 0) {
|
||||
logger.info(`No tags available for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
return;
|
||||
}
|
||||
|
||||
const tags = release.tags.some(tag => typeof tag === 'string')
|
||||
? await matchTags(release.tags) // scraper returned raw tags
|
||||
: release.tags; // tags already matched by (outdated) scraper
|
||||
|
||||
const associationEntries = await knex('releases_tags')
|
||||
.where('release_id', releaseId)
|
||||
.whereIn('tag_id', tags);
|
||||
|
||||
Reference in New Issue
Block a user