Added Vivid network. Added ASMR Fantasy to Adult Time. Storing deep URL in database. Added href to header links.
This commit is contained in:
@@ -259,6 +259,18 @@ async function storeActor(actor, scraped = false, scrapeSuccess = false) {
|
||||
|
||||
await storeSocialLinks(actor.social, actorEntry.id);
|
||||
|
||||
if (actor.avatars) {
|
||||
await createMediaDirectory('actors', `${actorEntry.slug}/`);
|
||||
await storePhotos(actor.avatars, {
|
||||
domain: 'actor',
|
||||
role: 'photo',
|
||||
primaryRole: 'avatar',
|
||||
targetId: actorEntry.id,
|
||||
subpath: `${actorEntry.slug}/`,
|
||||
naming: 'timestamp',
|
||||
}, actorEntry.name);
|
||||
}
|
||||
|
||||
logger.info(`Added new entry for actor '${actor.name}'`);
|
||||
|
||||
return actorEntry;
|
||||
@@ -425,17 +437,7 @@ async function scrapeActors(actorNames) {
|
||||
return profile;
|
||||
}
|
||||
|
||||
const newActorEntry = await storeActor(profile, true, true);
|
||||
|
||||
await createMediaDirectory('actors', `${newActorEntry.slug}/`);
|
||||
await storePhotos(profile.avatars, {
|
||||
domain: 'actor',
|
||||
role: 'photo',
|
||||
primaryRole: 'avatar',
|
||||
targetId: newActorEntry.id,
|
||||
subpath: `${newActorEntry.slug}/`,
|
||||
naming: 'timestamp',
|
||||
}, newActorEntry.name);
|
||||
await storeActor(profile, true, true);
|
||||
}
|
||||
|
||||
return profile;
|
||||
|
||||
@@ -81,12 +81,21 @@ const { argv } = yargs
|
||||
type: 'string',
|
||||
default: config.fetchAfter.join(' '),
|
||||
})
|
||||
.option('last', {
|
||||
describe: 'Get the latest x releases, no matter the date range',
|
||||
type: 'number',
|
||||
})
|
||||
.option('null-date-limit', {
|
||||
describe: 'Limit amount of scenes when dates are missing.',
|
||||
type: 'number',
|
||||
default: config.nullDateLimit,
|
||||
alias: 'limit',
|
||||
})
|
||||
.option('page', {
|
||||
describe: 'Page to start scraping at',
|
||||
type: 'number',
|
||||
default: 1,
|
||||
})
|
||||
.option('save', {
|
||||
describe: 'Save fetched releases to database',
|
||||
type: 'boolean',
|
||||
|
||||
@@ -64,8 +64,9 @@ async function createThumbnail(buffer) {
|
||||
return thumbnail;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to create thumbnail: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function createMediaDirectory(domain, subpath) {
|
||||
|
||||
@@ -227,6 +227,7 @@ async function curateReleaseEntry(release) {
|
||||
// dislikes: release.rating && release.rating.dislikes,
|
||||
// rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
|
||||
deep: typeof release.deep === 'boolean' ? release.deep : false,
|
||||
deep_url: release.deepUrl,
|
||||
};
|
||||
|
||||
return curatedRelease;
|
||||
@@ -296,11 +297,16 @@ function accumulateActors(releases) {
|
||||
name: actorName,
|
||||
slug: actorSlug,
|
||||
releaseIds: new Set(),
|
||||
avatars: [],
|
||||
};
|
||||
}
|
||||
|
||||
if (actor.name) acc[actorSlug] = { ...acc[actorSlug], ...actor }; // actor input contains profile info
|
||||
acc[actorSlug].releaseIds.add(release.id);
|
||||
|
||||
if (actor.name) acc[actorSlug] = { ...acc[actorSlug], ...actor }; // actor input contains profile info
|
||||
if (actor.avatar) {
|
||||
acc[actorSlug].avatars = acc[actorSlug].avatars.concat(actor.avatar);
|
||||
}
|
||||
});
|
||||
|
||||
return acc;
|
||||
|
||||
@@ -11,7 +11,10 @@ const { findNetworkByUrl } = require('./networks');
|
||||
const { storeReleases } = require('./releases');
|
||||
|
||||
async function findSite(url, release) {
|
||||
const site = (release && release.site) || await findSiteByUrl(url);
|
||||
if (release?.site) return release.site;
|
||||
if (!url) return null;
|
||||
|
||||
const site = await findSiteByUrl(url);
|
||||
|
||||
if (site) {
|
||||
return site;
|
||||
@@ -33,7 +36,7 @@ async function findSite(url, release) {
|
||||
async function scrapeRelease(source, basicRelease = null, type = 'scene') {
|
||||
// profile scraper may return either URLs or pre-scraped scenes
|
||||
const sourceIsUrl = typeof source === 'string';
|
||||
const url = sourceIsUrl ? source : source.url;
|
||||
const url = sourceIsUrl ? source : source?.url;
|
||||
const release = sourceIsUrl ? basicRelease : source;
|
||||
|
||||
const site = await findSite(url, release);
|
||||
|
||||
@@ -37,7 +37,7 @@ async function findDuplicateReleaseIds(latestReleases, accReleases) {
|
||||
.concat(accReleases.map(release => String(release.entryId))));
|
||||
}
|
||||
|
||||
async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = 1) {
|
||||
async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = argv.page) {
|
||||
if (!argv.latest || !scraper.fetchLatest) {
|
||||
return [];
|
||||
}
|
||||
@@ -53,22 +53,27 @@ async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), a
|
||||
|
||||
const uniqueReleases = latestReleases
|
||||
.filter(release => !duplicateReleaseIds.has(String(release.entryId)) // release is already in database
|
||||
&& (!release.date || moment(release.date).isAfter(afterDate))); // release is older than specified date limit
|
||||
&& (argv.last || !release.date || moment(release.date).isAfter(afterDate))); // release is older than specified date limit
|
||||
|
||||
logger.info(`${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases`);
|
||||
|
||||
const uniqueReleasesWithSite = uniqueReleases.map(release => ({ ...release, site }));
|
||||
|
||||
if (
|
||||
uniqueReleases.length > 0
|
||||
// && (oldestReleaseOnPage || page < argv.pages)
|
||||
&& (oldestReleaseOnPage
|
||||
&& ((oldestReleaseOnPage
|
||||
? moment(oldestReleaseOnPage).isAfter(afterDate)
|
||||
: accReleases.length + uniqueReleases.length < argv.nullDateLimit)
|
||||
|| (argv.last && accReleases.length + uniqueReleases.length < argv.last))
|
||||
) {
|
||||
// oldest release on page is newer that specified limit, fetch next page
|
||||
return scrapeUniqueReleases(scraper, site, afterDate, accReleases.concat(uniqueReleases), page + 1);
|
||||
// oldest release on page is newer that specified date range, or latest count has not yet been met, fetch next page
|
||||
return scrapeUniqueReleases(scraper, site, afterDate, accReleases.concat(uniqueReleasesWithSite), page + 1);
|
||||
}
|
||||
|
||||
const uniqueReleasesWithSite = uniqueReleases.map(release => ({ ...release, site }));
|
||||
if (argv.latest && uniqueReleases.length >= argv.latest) {
|
||||
return accReleases.concat(uniqueReleasesWithSite).slice(0, argv.last);
|
||||
}
|
||||
|
||||
if (oldestReleaseOnPage) {
|
||||
return accReleases.concat(uniqueReleasesWithSite);
|
||||
@@ -81,7 +86,9 @@ async function scrapeUpcomingReleases(scraper, site) {
|
||||
if (argv.upcoming && scraper.fetchUpcoming) {
|
||||
const upcomingReleases = await scraper.fetchUpcoming(site);
|
||||
|
||||
return upcomingReleases.map(release => ({ ...release, upcoming: true }));
|
||||
return upcomingReleases
|
||||
? upcomingReleases.map(release => ({ ...release, site, upcoming: true }))
|
||||
: [];
|
||||
}
|
||||
|
||||
return [];
|
||||
|
||||
@@ -13,8 +13,8 @@ function curateRelease(release, site) {
|
||||
return release;
|
||||
}
|
||||
|
||||
async function networkFetchScene(url, site) {
|
||||
const scene = await fetchScene(url, site);
|
||||
async function networkFetchScene(url, site, release) {
|
||||
const scene = await fetchScene(url, site, release);
|
||||
|
||||
return curateRelease(scene, site);
|
||||
}
|
||||
|
||||
@@ -184,7 +184,7 @@ async function scrapeScene(html, url, site, scrapedRelease) {
|
||||
const videoJson = $('script:contains("window.ScenePlayerOptions")').html();
|
||||
|
||||
const [data, data2] = json ? JSON.parse(json) : [];
|
||||
const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{'), videoJson.indexOf('};') + 1));
|
||||
const videoData = videoJson && JSON.parse(videoJson.slice(videoJson.indexOf('{'), videoJson.indexOf('};') + 1));
|
||||
|
||||
[release.entryId] = (scrapedRelease?.path || new URL(url).pathname).split('/').slice(-1);
|
||||
release.title = videoData?.playerOptions?.sceneInfos.sceneTitle || data?.name;
|
||||
@@ -217,14 +217,14 @@ async function scrapeScene(html, url, site, scrapedRelease) {
|
||||
}));
|
||||
}
|
||||
|
||||
const hasTrans = release.actors.some(actor => actor.gender === 'shemale');
|
||||
const hasTrans = release.actors?.some(actor => actor.gender === 'shemale');
|
||||
const rawTags = data?.keywords?.split(', ') || data2?.keywords?.split(', ');
|
||||
release.tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags;
|
||||
|
||||
const channel = data?.productionCompany?.name || $('.studioLink a, .siteLink a').attr('title')?.trim();
|
||||
if (channel) release.channel = slugify(channel, { delimiter: '' });
|
||||
|
||||
release.poster = videoData.picPreview;
|
||||
if (videoData.picPreview && new URL(videoData.picPreview).pathname.length > 1) release.poster = videoData.picPreview; // sometimes links to just https://images02-fame.gammacdn.com/
|
||||
|
||||
const photoLink = $('.picturesItem a').attr('href');
|
||||
if (photoLink) release.photos = await getPhotos(photoLink, site);
|
||||
@@ -472,7 +472,9 @@ async function fetchScene(url, site, release) {
|
||||
const res = await bhttp.get(deepUrl);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), url, site, release);
|
||||
const scene = await scrapeScene(res.body.toString(), url, site, release);
|
||||
|
||||
return { ...scene, deepUrl };
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene } = require('./gamma');
|
||||
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchScene,
|
||||
|
||||
@@ -44,6 +44,7 @@ const sextury = require('./21sextury');
|
||||
const teamskeet = require('./teamskeet');
|
||||
const transangels = require('./transangels');
|
||||
const twistys = require('./twistys');
|
||||
const vivid = require('./vivid');
|
||||
const vixen = require('./vixen');
|
||||
const vogov = require('./vogov');
|
||||
const wicked = require('./wicked');
|
||||
@@ -90,6 +91,7 @@ module.exports = {
|
||||
score,
|
||||
teamskeet,
|
||||
twistys,
|
||||
vivid,
|
||||
vixen,
|
||||
vogov,
|
||||
wicked,
|
||||
|
||||
114
src/scrapers/vivid.js
Normal file
114
src/scrapers/vivid.js
Normal file
@@ -0,0 +1,114 @@
|
||||
'use strict';
|
||||
|
||||
/* eslint-disable no-unused-vars */
|
||||
const bhttp = require('bhttp');
|
||||
|
||||
const { get, date } = require('../utils/q');
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
function scrapeLatestNative(scenes, site) {
|
||||
return scenes.map((scene) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = scene.id;
|
||||
release.url = `${site.url}${scene.url}`;
|
||||
|
||||
release.title = scene.name;
|
||||
release.date = date(scene.release_date, 'YYYY-MM-DD');
|
||||
release.duration = parseInt(scene.runtime, 10) * 60;
|
||||
|
||||
release.actors = scene.cast?.map(actor => ({
|
||||
name: actor.stagename,
|
||||
gender: actor.gender.toLowerCase(),
|
||||
avatar: actor.placard,
|
||||
})) || [];
|
||||
|
||||
release.stars = Number(scene.rating);
|
||||
release.poster = scene.placard_800 || scene.placard;
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeSceneNative({ html, q, qa }, url, _site) {
|
||||
const release = { url };
|
||||
|
||||
release.entryId = new URL(url).pathname.split('/')[2]; // eslint-disable-line prefer-destructuring
|
||||
|
||||
release.title = q('.scene-h2-heading', true);
|
||||
release.description = q('.indie-model-p', true);
|
||||
|
||||
const dateString = qa('h5').find(el => /Released/.test(el.textContent)).textContent;
|
||||
release.date = date(dateString, 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
|
||||
const duration = qa('h5').find(el => /Runtime/.test(el.textContent)).textContent;
|
||||
const [hours, minutes] = duration.match(/\d+/g);
|
||||
|
||||
if (minutes) release.duration = (hours * 3600) + (minutes * 60);
|
||||
else release.duration = hours * 60; // scene shorter that 1hr, hour match are minutes
|
||||
|
||||
release.actors = qa('h4 a[href*="/stars"], h4 a[href*="/celebs"]', true);
|
||||
release.tags = qa('h5 a[href*="/categories"]', true);
|
||||
|
||||
const [poster, trailer] = html.match(/https:\/\/content.vivid.com(.*)(.jpg|.mp4)/g);
|
||||
release.poster = poster;
|
||||
|
||||
if (trailer) {
|
||||
release.trailer = {
|
||||
src: trailer,
|
||||
};
|
||||
}
|
||||
|
||||
const channel = q('h5 a[href*="/sites"]', true);
|
||||
if (channel) release.channel = channel.replace(/\.\w+/, '');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatestNative(site, page = 1) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiLatest(site, page);
|
||||
}
|
||||
|
||||
const apiUrl = `${site.url}/videos/api/?limit=50&offset=${(page - 1) * 50}&sort=datedesc`;
|
||||
const res = await bhttp.get(apiUrl, {
|
||||
decodeJSON: true,
|
||||
});
|
||||
|
||||
if (res.statusCode === 200 && res.body.code === 200) {
|
||||
return scrapeLatestNative(res.body.responseData, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchUpcomingNative(site) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiUpcoming(site);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchSceneNative(url, site, release) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchScene(url, site, release);
|
||||
}
|
||||
|
||||
const qScene = await get(url);
|
||||
|
||||
return qScene && scrapeSceneNative(qScene, url, site);
|
||||
}
|
||||
|
||||
async function fetchSceneWrapper(url, site, release) {
|
||||
const scene = await fetchScene(url, site, release);
|
||||
|
||||
return scene;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene: fetchSceneWrapper,
|
||||
};
|
||||
@@ -119,11 +119,11 @@ function qtrailers(context, selector = 'source', attr = 'src', protocol = 'https
|
||||
return attr ? trailers.map(trailer => prefixProtocol(trailer, protocol)) : trailers;
|
||||
}
|
||||
|
||||
function qlength(context, selector, attr = 'textContent') {
|
||||
function qlength(context, selector, match, attr = 'textContent') {
|
||||
const durationString = q(context, selector, attr);
|
||||
|
||||
if (!durationString) return null;
|
||||
const duration = durationString.match(/(\d+:)?\d+:\d+/);
|
||||
const duration = durationString.match(match || /(\d+:)?\d+:\d+/);
|
||||
|
||||
if (duration) {
|
||||
const segments = ['00'].concat(duration[0].split(':')).slice(-3);
|
||||
|
||||
Reference in New Issue
Block a user