Added Vivid network. Added ASMR Fantasy to Adult Time. Storing deep URL in database. Added href to header links.

This commit is contained in:
2020-02-11 04:58:18 +01:00
parent 114e2e03b2
commit dd6a1d9bfd
44 changed files with 1322 additions and 35 deletions

View File

@@ -259,6 +259,18 @@ async function storeActor(actor, scraped = false, scrapeSuccess = false) {
await storeSocialLinks(actor.social, actorEntry.id);
if (actor.avatars) {
await createMediaDirectory('actors', `${actorEntry.slug}/`);
await storePhotos(actor.avatars, {
domain: 'actor',
role: 'photo',
primaryRole: 'avatar',
targetId: actorEntry.id,
subpath: `${actorEntry.slug}/`,
naming: 'timestamp',
}, actorEntry.name);
}
logger.info(`Added new entry for actor '${actor.name}'`);
return actorEntry;
@@ -425,17 +437,7 @@ async function scrapeActors(actorNames) {
return profile;
}
const newActorEntry = await storeActor(profile, true, true);
await createMediaDirectory('actors', `${newActorEntry.slug}/`);
await storePhotos(profile.avatars, {
domain: 'actor',
role: 'photo',
primaryRole: 'avatar',
targetId: newActorEntry.id,
subpath: `${newActorEntry.slug}/`,
naming: 'timestamp',
}, newActorEntry.name);
await storeActor(profile, true, true);
}
return profile;

View File

@@ -81,12 +81,21 @@ const { argv } = yargs
type: 'string',
default: config.fetchAfter.join(' '),
})
.option('last', {
describe: 'Get the latest x releases, no matter the date range',
type: 'number',
})
.option('null-date-limit', {
describe: 'Limit amount of scenes when dates are missing.',
type: 'number',
default: config.nullDateLimit,
alias: 'limit',
})
.option('page', {
describe: 'Page to start scraping at',
type: 'number',
default: 1,
})
.option('save', {
describe: 'Save fetched releases to database',
type: 'boolean',

View File

@@ -64,8 +64,9 @@ async function createThumbnail(buffer) {
return thumbnail;
} catch (error) {
logger.error(`Failed to create thumbnail: ${error.message}`);
throw error;
}
return null;
}
async function createMediaDirectory(domain, subpath) {

View File

@@ -227,6 +227,7 @@ async function curateReleaseEntry(release) {
// dislikes: release.rating && release.rating.dislikes,
// rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
deep: typeof release.deep === 'boolean' ? release.deep : false,
deep_url: release.deepUrl,
};
return curatedRelease;
@@ -296,11 +297,16 @@ function accumulateActors(releases) {
name: actorName,
slug: actorSlug,
releaseIds: new Set(),
avatars: [],
};
}
if (actor.name) acc[actorSlug] = { ...acc[actorSlug], ...actor }; // actor input contains profile info
acc[actorSlug].releaseIds.add(release.id);
if (actor.name) acc[actorSlug] = { ...acc[actorSlug], ...actor }; // actor input contains profile info
if (actor.avatar) {
acc[actorSlug].avatars = acc[actorSlug].avatars.concat(actor.avatar);
}
});
return acc;

View File

@@ -11,7 +11,10 @@ const { findNetworkByUrl } = require('./networks');
const { storeReleases } = require('./releases');
async function findSite(url, release) {
const site = (release && release.site) || await findSiteByUrl(url);
if (release?.site) return release.site;
if (!url) return null;
const site = await findSiteByUrl(url);
if (site) {
return site;
@@ -33,7 +36,7 @@ async function findSite(url, release) {
async function scrapeRelease(source, basicRelease = null, type = 'scene') {
// profile scraper may return either URLs or pre-scraped scenes
const sourceIsUrl = typeof source === 'string';
const url = sourceIsUrl ? source : source.url;
const url = sourceIsUrl ? source : source?.url;
const release = sourceIsUrl ? basicRelease : source;
const site = await findSite(url, release);

View File

@@ -37,7 +37,7 @@ async function findDuplicateReleaseIds(latestReleases, accReleases) {
.concat(accReleases.map(release => String(release.entryId))));
}
async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = 1) {
async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = argv.page) {
if (!argv.latest || !scraper.fetchLatest) {
return [];
}
@@ -53,22 +53,27 @@ async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), a
const uniqueReleases = latestReleases
.filter(release => !duplicateReleaseIds.has(String(release.entryId)) // release is already in database
&& (!release.date || moment(release.date).isAfter(afterDate))); // release is older than specified date limit
&& (argv.last || !release.date || moment(release.date).isAfter(afterDate))); // release is older than specified date limit
logger.info(`${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases`);
const uniqueReleasesWithSite = uniqueReleases.map(release => ({ ...release, site }));
if (
uniqueReleases.length > 0
// && (oldestReleaseOnPage || page < argv.pages)
&& (oldestReleaseOnPage
&& ((oldestReleaseOnPage
? moment(oldestReleaseOnPage).isAfter(afterDate)
: accReleases.length + uniqueReleases.length < argv.nullDateLimit)
|| (argv.last && accReleases.length + uniqueReleases.length < argv.last))
) {
// oldest release on page is newer that specified limit, fetch next page
return scrapeUniqueReleases(scraper, site, afterDate, accReleases.concat(uniqueReleases), page + 1);
// oldest release on page is newer that specified date range, or latest count has not yet been met, fetch next page
return scrapeUniqueReleases(scraper, site, afterDate, accReleases.concat(uniqueReleasesWithSite), page + 1);
}
const uniqueReleasesWithSite = uniqueReleases.map(release => ({ ...release, site }));
if (argv.latest && uniqueReleases.length >= argv.latest) {
return accReleases.concat(uniqueReleasesWithSite).slice(0, argv.last);
}
if (oldestReleaseOnPage) {
return accReleases.concat(uniqueReleasesWithSite);
@@ -81,7 +86,9 @@ async function scrapeUpcomingReleases(scraper, site) {
if (argv.upcoming && scraper.fetchUpcoming) {
const upcomingReleases = await scraper.fetchUpcoming(site);
return upcomingReleases.map(release => ({ ...release, upcoming: true }));
return upcomingReleases
? upcomingReleases.map(release => ({ ...release, site, upcoming: true }))
: [];
}
return [];

View File

@@ -13,8 +13,8 @@ function curateRelease(release, site) {
return release;
}
async function networkFetchScene(url, site) {
const scene = await fetchScene(url, site);
async function networkFetchScene(url, site, release) {
const scene = await fetchScene(url, site, release);
return curateRelease(scene, site);
}

View File

@@ -184,7 +184,7 @@ async function scrapeScene(html, url, site, scrapedRelease) {
const videoJson = $('script:contains("window.ScenePlayerOptions")').html();
const [data, data2] = json ? JSON.parse(json) : [];
const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{'), videoJson.indexOf('};') + 1));
const videoData = videoJson && JSON.parse(videoJson.slice(videoJson.indexOf('{'), videoJson.indexOf('};') + 1));
[release.entryId] = (scrapedRelease?.path || new URL(url).pathname).split('/').slice(-1);
release.title = videoData?.playerOptions?.sceneInfos.sceneTitle || data?.name;
@@ -217,14 +217,14 @@ async function scrapeScene(html, url, site, scrapedRelease) {
}));
}
const hasTrans = release.actors.some(actor => actor.gender === 'shemale');
const hasTrans = release.actors?.some(actor => actor.gender === 'shemale');
const rawTags = data?.keywords?.split(', ') || data2?.keywords?.split(', ');
release.tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags;
const channel = data?.productionCompany?.name || $('.studioLink a, .siteLink a').attr('title')?.trim();
if (channel) release.channel = slugify(channel, { delimiter: '' });
release.poster = videoData.picPreview;
if (videoData.picPreview && new URL(videoData.picPreview).pathname.length > 1) release.poster = videoData.picPreview; // sometimes links to just https://images02-fame.gammacdn.com/
const photoLink = $('.picturesItem a').attr('href');
if (photoLink) release.photos = await getPhotos(photoLink, site);
@@ -472,7 +472,9 @@ async function fetchScene(url, site, release) {
const res = await bhttp.get(deepUrl);
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), url, site, release);
const scene = await scrapeScene(res.body.toString(), url, site, release);
return { ...scene, deepUrl };
}
return null;

View File

@@ -2,7 +2,6 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchScene,

View File

@@ -44,6 +44,7 @@ const sextury = require('./21sextury');
const teamskeet = require('./teamskeet');
const transangels = require('./transangels');
const twistys = require('./twistys');
const vivid = require('./vivid');
const vixen = require('./vixen');
const vogov = require('./vogov');
const wicked = require('./wicked');
@@ -90,6 +91,7 @@ module.exports = {
score,
teamskeet,
twistys,
vivid,
vixen,
vogov,
wicked,

114
src/scrapers/vivid.js Normal file
View File

@@ -0,0 +1,114 @@
'use strict';
/* eslint-disable no-unused-vars */
const bhttp = require('bhttp');
const { get, date } = require('../utils/q');
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
function scrapeLatestNative(scenes, site) {
return scenes.map((scene) => {
const release = {};
release.entryId = scene.id;
release.url = `${site.url}${scene.url}`;
release.title = scene.name;
release.date = date(scene.release_date, 'YYYY-MM-DD');
release.duration = parseInt(scene.runtime, 10) * 60;
release.actors = scene.cast?.map(actor => ({
name: actor.stagename,
gender: actor.gender.toLowerCase(),
avatar: actor.placard,
})) || [];
release.stars = Number(scene.rating);
release.poster = scene.placard_800 || scene.placard;
return release;
});
}
function scrapeSceneNative({ html, q, qa }, url, _site) {
const release = { url };
release.entryId = new URL(url).pathname.split('/')[2]; // eslint-disable-line prefer-destructuring
release.title = q('.scene-h2-heading', true);
release.description = q('.indie-model-p', true);
const dateString = qa('h5').find(el => /Released/.test(el.textContent)).textContent;
release.date = date(dateString, 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
const duration = qa('h5').find(el => /Runtime/.test(el.textContent)).textContent;
const [hours, minutes] = duration.match(/\d+/g);
if (minutes) release.duration = (hours * 3600) + (minutes * 60);
else release.duration = hours * 60; // scene shorter that 1hr, hour match are minutes
release.actors = qa('h4 a[href*="/stars"], h4 a[href*="/celebs"]', true);
release.tags = qa('h5 a[href*="/categories"]', true);
const [poster, trailer] = html.match(/https:\/\/content.vivid.com(.*)(.jpg|.mp4)/g);
release.poster = poster;
if (trailer) {
release.trailer = {
src: trailer,
};
}
const channel = q('h5 a[href*="/sites"]', true);
if (channel) release.channel = channel.replace(/\.\w+/, '');
return release;
}
async function fetchLatestNative(site, page = 1) {
if (site.parameters?.useGamma) {
return fetchApiLatest(site, page);
}
const apiUrl = `${site.url}/videos/api/?limit=50&offset=${(page - 1) * 50}&sort=datedesc`;
const res = await bhttp.get(apiUrl, {
decodeJSON: true,
});
if (res.statusCode === 200 && res.body.code === 200) {
return scrapeLatestNative(res.body.responseData, site);
}
return null;
}
async function fetchUpcomingNative(site) {
if (site.parameters?.useGamma) {
return fetchApiUpcoming(site);
}
return null;
}
async function fetchSceneNative(url, site, release) {
if (site.parameters?.useGamma) {
return fetchScene(url, site, release);
}
const qScene = await get(url);
return qScene && scrapeSceneNative(qScene, url, site);
}
async function fetchSceneWrapper(url, site, release) {
const scene = await fetchScene(url, site, release);
return scene;
}
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene: fetchSceneWrapper,
};

View File

@@ -119,11 +119,11 @@ function qtrailers(context, selector = 'source', attr = 'src', protocol = 'https
return attr ? trailers.map(trailer => prefixProtocol(trailer, protocol)) : trailers;
}
function qlength(context, selector, attr = 'textContent') {
function qlength(context, selector, match, attr = 'textContent') {
const durationString = q(context, selector, attr);
if (!durationString) return null;
const duration = durationString.match(/(\d+:)?\d+:\d+/);
const duration = durationString.match(match || /(\d+:)?\d+:\d+/);
if (duration) {
const segments = ['00'].concat(duration[0].split(':')).slice(-3);