Added support for Family Strokes.

This commit is contained in:
2020-01-13 23:45:09 +01:00
parent 48b37a509e
commit 859cb7e1f3
58 changed files with 2130 additions and 33 deletions

View File

@@ -6,15 +6,6 @@ const { JSDOM } = require('jsdom');
const cheerio = require('cheerio');
const moment = require('moment');
const { matchTags } = require('../tags');
const defaultTags = {
swallowed: ['blowjob', 'deepthroat', 'facefuck'],
trueanal: ['anal'],
allanal: ['anal', 'fmf'],
nympho: [],
};
const descriptionTags = {
'anal cream pie': 'anal creampie',
'ass to mouth': 'ass to mouth',
@@ -55,7 +46,7 @@ async function scrapeLatestA(html, site) {
const actors = Array.from(element.querySelectorAll('h4.models a'), actorElement => actorElement.textContent);
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is somethines 00:00, sometimes 0:00:00
// timestamp is sometimes 00:00, sometimes 0:00:00
const duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
@@ -70,7 +61,7 @@ async function scrapeLatestA(html, site) {
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
const photos = [...primaryPhotos, ...secondaryPhotos];
const tags = await matchTags([...defaultTags[site.slug], ...deriveTagsFromDescription(description)]);
const tags = deriveTagsFromDescription(description);
const scene = {
url,
@@ -124,7 +115,7 @@ async function scrapeLatestB(html, site) {
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
const photos = [...primaryPhotos, ...secondaryPhotos];
const tags = await matchTags([...defaultTags[site.slug], ...deriveTagsFromDescription(description)]);
const tags = deriveTagsFromDescription(description);
return {
url,
@@ -155,7 +146,7 @@ async function scrapeSceneA(html, url, site) {
const actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is somethines 00:00, sometimes 0:00:00
// timestamp is sometimes 00:00, sometimes 0:00:00
const duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
@@ -163,7 +154,7 @@ async function scrapeSceneA(html, url, site) {
const { poster } = document.querySelector('.content-page-header video');
const { src, type } = document.querySelector('.content-page-header source');
const tags = await matchTags([...defaultTags[site.slug], ...deriveTagsFromDescription(description)]);
const tags = deriveTagsFromDescription(description);
return {
url,
@@ -204,7 +195,7 @@ async function scrapeSceneB(html, url, site) {
const { poster } = document.querySelector('.content-page-header-inner video');
const { src, type } = document.querySelector('.content-page-header-inner source');
const tags = await matchTags([...defaultTags[site.slug], ...deriveTagsFromDescription(description)]);
const tags = deriveTagsFromDescription(description);
const scene = {
url,

View File

@@ -30,7 +30,7 @@ async function scrapeProfile(html, _url, actorName) {
if (descriptionString) profile.description = descriptionString.textContent;
if (bio.Birthday) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate();
if (bio.Birthday && !/-0001/.test(bio.Birthday)) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); // birthyear sometimes -0001, see Spencer Bradley as of january 2020
if (bio.Born) profile.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
profile.birthPlace = bio['Birth Place'] || bio.Birthplace;

View File

@@ -15,7 +15,7 @@ function extractTitle(pathname) {
function extractActors(str) {
return str
.split(/,|\band/)
.split(/,|\band\b/ig)
.filter(actor => !/\.{3}/.test(actor))
.map(actor => actor.trim())
.filter(actor => actor.length > 0);
@@ -81,7 +81,54 @@ function scrapeScene(html, site) {
return release;
}
async function fetchLatest(site, page = 1) {
function scrapeSceneA(html, site, sceneX, url) {
const scene = sceneX || new JSDOM(html).window.document;
const release = { site };
release.description = scene.querySelector('.scene-story').textContent.replace('...read more', '...').trim();
release.date = moment.utc(scene.querySelector('.scene-date').textContent, 'MM/DD/YYYY').toDate();
release.actors = Array.from(scene.querySelectorAll('.starring span'), el => extractActors(el.textContent)).flat();
const durationString = scene.querySelector('.time').textContent.trim();
const duration = ['00'].concat(durationString.split(':')).slice(-3).join(':'); // ensure hh:mm:ss
release.duration = moment.duration(duration).asSeconds();
if (sceneX) {
const titleEl = scene.querySelector(':scope > a');
release.url = titleEl.href;
release.entryId = titleEl.id;
release.title = titleEl.title;
const [poster, ...photos] = Array.from(scene.querySelectorAll('.scene img'), el => el.src);
release.poster = [poster.replace('bio_big', 'video'), poster];
release.photos = photos;
}
if (!sceneX) {
release.title = scene.querySelector('.title span').textContent;
release.url = url;
release.poster = scene.querySelector('video').poster;
release.photos = [release.poster.replace('video', 'bio_small'), release.poster.replace('video', 'bio_small2')];
}
const [, entryIdA, entryIdB] = new URL(release.url).pathname.split('/');
release.entryId = entryIdA === 'scenes' ? entryIdB : entryIdA;
return release;
}
function scrapeLatestA(html, site) {
const { document } = new JSDOM(html).window;
const scenes = Array.from(document.querySelectorAll('.scenewrapper'));
return scenes.map(scene => scrapeSceneA(null, site, scene));
}
async function fetchLatestTeamSkeet(site, page = 1) {
const url = `https://www.teamskeet.com/t1/updates/load?fltrs[site]=${site.parameters.id}&page=${page}&view=newest&fltrs[time]=ALL&order=DESC`;
const res = await bhttp.get(url);
@@ -92,10 +139,37 @@ async function fetchLatest(site, page = 1) {
return null;
}
async function fetchLatestA(site) {
const url = `${site.url}/scenes`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeLatestA(res.body.toString(), site);
}
return null;
}
async function fetchLatest(site, page = 1) {
if (site.parameters.id) {
return fetchLatestTeamSkeet(site, page);
}
if (site.parameters.scraper === 'A') {
return fetchLatestA(site, page);
}
return null;
}
async function fetchScene(url, site) {
const session = bhttp.session(); // resolve redirects
const res = await session.get(url);
if (site.parameters.scraper === 'A') {
return scrapeSceneA(res.body.toString(), site, null, url);
}
return scrapeScene(res.body.toString(), site);
}