Added preflight method to scrapers. Added Assylum.
This commit is contained in:
parent
bec26ee072
commit
2f70de8e11
Binary file not shown.
After Width: | Height: | Size: 41 KiB |
Binary file not shown.
After Width: | Height: | Size: 1.6 KiB |
Binary file not shown.
After Width: | Height: | Size: 16 KiB |
Binary file not shown.
After Width: | Height: | Size: 96 KiB |
Binary file not shown.
After Width: | Height: | Size: 41 KiB |
Binary file not shown.
After Width: | Height: | Size: 81 KiB |
|
@ -26,6 +26,12 @@ const networks = [
|
||||||
url: 'https://www.adulttime.com',
|
url: 'https://www.adulttime.com',
|
||||||
description: 'Adult Time is a premium streaming service for adults! Watch adult movies, series, and channels from the top names in the industry.',
|
description: 'Adult Time is a premium streaming service for adults! Watch adult movies, series, and channels from the top names in the industry.',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
slug: 'assylum',
|
||||||
|
name: 'Assylum',
|
||||||
|
url: 'https://www.assylum.com',
|
||||||
|
description: 'At Assylum, submissive girls get dominated with rough anal sex, ass to mouth, hard BDSM, and sexual humiliation and degradation.',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
slug: 'babes',
|
slug: 'babes',
|
||||||
name: 'Babes',
|
name: 'Babes',
|
||||||
|
|
|
@ -294,6 +294,27 @@ const sites = [
|
||||||
description: 'LadyGonzo.com is a new Adult Time porn series featuring Joanna Angel shooting hardcore sex and gonzo porn movies the way she\'d like to see it!',
|
description: 'LadyGonzo.com is a new Adult Time porn series featuring Joanna Angel shooting hardcore sex and gonzo porn movies the way she\'d like to see it!',
|
||||||
network: 'adulttime',
|
network: 'adulttime',
|
||||||
},
|
},
|
||||||
|
// ASSYLUM
|
||||||
|
{
|
||||||
|
slug: 'assylum',
|
||||||
|
name: 'Assylum',
|
||||||
|
url: 'https://www.assylum.com',
|
||||||
|
description: 'At Assylum, submissive girls get dominated with rough anal sex, ass to mouth, hard BDSM, and sexual humiliation and degradation.',
|
||||||
|
network: 'assylum',
|
||||||
|
parameters: {
|
||||||
|
a: 68,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
slug: 'slavemouth',
|
||||||
|
name: 'Slave Mouth',
|
||||||
|
url: 'https://www.slavemouth.com',
|
||||||
|
description: 'Submissive girls get their mouths punished hard by Dr. Mercies, with facefucking, gagging, frozen cum bukkake, face bondage, ass eating, and sexual degradation.',
|
||||||
|
network: 'assylum',
|
||||||
|
parameters: {
|
||||||
|
a: 183,
|
||||||
|
},
|
||||||
|
},
|
||||||
// BABES
|
// BABES
|
||||||
{
|
{
|
||||||
name: 'Babes',
|
name: 'Babes',
|
||||||
|
@ -1255,8 +1276,8 @@ const sites = [
|
||||||
network: 'digitalplayground',
|
network: 'digitalplayground',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'rawcuts',
|
slug: 'rawcut',
|
||||||
name: 'Raw Cuts',
|
name: 'Raw Cut',
|
||||||
url: 'https://www.digitalplayground.com/scenes?site=208',
|
url: 'https://www.digitalplayground.com/scenes?site=208',
|
||||||
description: '',
|
description: '',
|
||||||
network: 'digitalplayground',
|
network: 'digitalplayground',
|
||||||
|
|
|
@ -362,7 +362,9 @@ async function scrapeActors(actorNames) {
|
||||||
const actorEntry = await knex('actors').where({ slug: actorSlug }).first();
|
const actorEntry = await knex('actors').where({ slug: actorSlug }).first();
|
||||||
const sources = argv.sources || config.profiles || Object.keys(scrapers.actors);
|
const sources = argv.sources || config.profiles || Object.keys(scrapers.actors);
|
||||||
|
|
||||||
const profiles = await Promise.map(sources, async (source) => {
|
const finalSources = argv.withReleases ? sources.flat() : sources; // ignore race-to-success grouping when scenes are requested
|
||||||
|
|
||||||
|
const profiles = await Promise.map(finalSources, async (source) => {
|
||||||
// const [scraperSlug, scraper] = source;
|
// const [scraperSlug, scraper] = source;
|
||||||
const profileScrapers = [].concat(source).map(slug => ({ scraperSlug: slug, scraper: scrapers.actors[slug] }));
|
const profileScrapers = [].concat(source).map(slug => ({ scraperSlug: slug, scraper: scrapers.actors[slug] }));
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ async function findSite(url, release) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeRelease(source, basicRelease = null, type = 'scene') {
|
async function scrapeRelease(source, basicRelease = null, type = 'scene', preflight) {
|
||||||
// profile scraper may return either URLs or pre-scraped scenes
|
// profile scraper may return either URLs or pre-scraped scenes
|
||||||
const sourceIsUrl = typeof source === 'string';
|
const sourceIsUrl = typeof source === 'string';
|
||||||
const url = sourceIsUrl ? source : source?.url;
|
const url = sourceIsUrl ? source : source?.url;
|
||||||
|
@ -64,8 +64,8 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene') {
|
||||||
}
|
}
|
||||||
|
|
||||||
const scrapedRelease = type === 'scene'
|
const scrapedRelease = type === 'scene'
|
||||||
? await scraper.fetchScene(url, site, release)
|
? await scraper.fetchScene(url, site, release, preflight)
|
||||||
: await scraper.fetchMovie(url, site, release);
|
: await scraper.fetchMovie(url, site, release, preflight);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...release,
|
...release,
|
||||||
|
@ -77,8 +77,8 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene') {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeReleases(sources, release = null, type = 'scene') {
|
async function scrapeReleases(sources, release = null, type = 'scene', preflight = null) {
|
||||||
const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, release, type), {
|
const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, release, type, preflight), {
|
||||||
concurrency: 5,
|
concurrency: 5,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -37,12 +37,12 @@ async function findDuplicateReleaseIds(latestReleases, accReleases) {
|
||||||
.concat(accReleases.map(release => String(release.entryId))));
|
.concat(accReleases.map(release => String(release.entryId))));
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = argv.page) {
|
async function scrapeUniqueReleases(scraper, site, preflight, afterDate = getAfterDate(), accReleases = [], page = argv.page) {
|
||||||
if (!argv.latest || !scraper.fetchLatest) {
|
if (!argv.latest || !scraper.fetchLatest) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
const latestReleases = await scraper.fetchLatest(site, page);
|
const latestReleases = await scraper.fetchLatest(site, page, preflight);
|
||||||
|
|
||||||
if (latestReleases.length === 0) {
|
if (latestReleases.length === 0) {
|
||||||
return accReleases;
|
return accReleases;
|
||||||
|
@ -68,7 +68,7 @@ async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), a
|
||||||
|| (argv.last && accReleases.length + uniqueReleases.length < argv.last))
|
|| (argv.last && accReleases.length + uniqueReleases.length < argv.last))
|
||||||
) {
|
) {
|
||||||
// oldest release on page is newer that specified date range, or latest count has not yet been met, fetch next page
|
// oldest release on page is newer that specified date range, or latest count has not yet been met, fetch next page
|
||||||
return scrapeUniqueReleases(scraper, site, afterDate, accReleases.concat(uniqueReleasesWithSite), page + 1);
|
return scrapeUniqueReleases(scraper, site, preflight, afterDate, accReleases.concat(uniqueReleasesWithSite), page + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argv.latest && uniqueReleases.length >= argv.latest) {
|
if (argv.latest && uniqueReleases.length >= argv.latest) {
|
||||||
|
@ -82,9 +82,9 @@ async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), a
|
||||||
return accReleases.concat(uniqueReleasesWithSite).slice(0, argv.nullDateLimit);
|
return accReleases.concat(uniqueReleasesWithSite).slice(0, argv.nullDateLimit);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeUpcomingReleases(scraper, site) {
|
async function scrapeUpcomingReleases(scraper, site, preflight) {
|
||||||
if (argv.upcoming && scraper.fetchUpcoming) {
|
if (argv.upcoming && scraper.fetchUpcoming) {
|
||||||
const upcomingReleases = await scraper.fetchUpcoming(site);
|
const upcomingReleases = await scraper.fetchUpcoming(site, 1, preflight);
|
||||||
|
|
||||||
return upcomingReleases
|
return upcomingReleases
|
||||||
? upcomingReleases.map(release => ({ ...release, site, upcoming: true }))
|
? upcomingReleases.map(release => ({ ...release, site, upcoming: true }))
|
||||||
|
@ -94,11 +94,11 @@ async function scrapeUpcomingReleases(scraper, site) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
async function deepFetchReleases(baseReleases) {
|
async function deepFetchReleases(baseReleases, preflight) {
|
||||||
return Promise.map(baseReleases, async (release) => {
|
return Promise.map(baseReleases, async (release) => {
|
||||||
if (release.url || (release.path && release.site)) {
|
if (release.url || (release.path && release.site)) {
|
||||||
try {
|
try {
|
||||||
const fullRelease = await scrapeRelease(release.url || release.path, release, 'scene');
|
const fullRelease = await scrapeRelease(release.url || release.path, release, 'scene', preflight);
|
||||||
|
|
||||||
if (fullRelease) {
|
if (fullRelease) {
|
||||||
return {
|
return {
|
||||||
|
@ -128,9 +128,11 @@ async function deepFetchReleases(baseReleases) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeSiteReleases(scraper, site) {
|
async function scrapeSiteReleases(scraper, site) {
|
||||||
|
const preflight = await scraper.preflight?.(site);
|
||||||
|
|
||||||
const [newReleases, upcomingReleases] = await Promise.all([
|
const [newReleases, upcomingReleases] = await Promise.all([
|
||||||
scrapeUniqueReleases(scraper, site), // fetch basic release info from scene overview
|
scrapeUniqueReleases(scraper, site, preflight), // fetch basic release info from scene overview
|
||||||
scrapeUpcomingReleases(scraper, site), // fetch basic release info from upcoming overview
|
scrapeUpcomingReleases(scraper, site, preflight), // fetch basic release info from upcoming overview
|
||||||
]);
|
]);
|
||||||
|
|
||||||
if (argv.upcoming) {
|
if (argv.upcoming) {
|
||||||
|
@ -141,7 +143,7 @@ async function scrapeSiteReleases(scraper, site) {
|
||||||
|
|
||||||
if (argv.deep) {
|
if (argv.deep) {
|
||||||
// follow URL for every release
|
// follow URL for every release
|
||||||
return deepFetchReleases(baseReleases);
|
return deepFetchReleases(baseReleases, preflight);
|
||||||
}
|
}
|
||||||
|
|
||||||
return baseReleases;
|
return baseReleases;
|
||||||
|
|
|
@ -0,0 +1,131 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const { get, geta, ctxa } = require('../utils/q');
|
||||||
|
|
||||||
|
function extractActors(actorString) {
|
||||||
|
return actorString
|
||||||
|
?.replace(/.*:|\(.*\)|\d+(-|\s)year(-|\s)old|nurses?|tangled/ig, '') // remove Patient:, (date) and other nonsense
|
||||||
|
.split(/\band\b|\bvs\b|\/|,|&/ig)
|
||||||
|
.map(actor => actor.trim())
|
||||||
|
.filter(actor => !!actor && !/\banal\b|\bschool\b|\bgamer\b|\breturn\b|\bfor\b|\bare\b|\bpart\b|realdoll|bimbo|p\d+/ig.test(actor))
|
||||||
|
|| [];
|
||||||
|
}
|
||||||
|
|
||||||
|
function matchActors(actorString, models) {
|
||||||
|
return models
|
||||||
|
.filter(model => new RegExp(model.name, 'i')
|
||||||
|
.test(actorString));
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeLatest(scenes, site, models) {
|
||||||
|
return scenes.map(({ q, qd, qu, qi }) => {
|
||||||
|
const release = {};
|
||||||
|
|
||||||
|
const pathname = qu('a.itemimg').slice(1);
|
||||||
|
[release.entryId] = pathname.split('/').slice(-1);
|
||||||
|
release.url = `${site.url}${pathname}`;
|
||||||
|
|
||||||
|
release.title = q('.itemimg img', 'alt') || q('h4 a', true);
|
||||||
|
release.description = q('.mas_longdescription', true);
|
||||||
|
release.date = qd('.movie_info2', 'MM/DD/YY', /\d{2}\/\d{2}\/\d{2}/);
|
||||||
|
|
||||||
|
const actorString = q('.mas_description', true);
|
||||||
|
const actors = matchActors(actorString, models);
|
||||||
|
if (actors.length > 0) release.actors = actors;
|
||||||
|
else release.actors = extractActors(actorString);
|
||||||
|
|
||||||
|
const posterPath = qi('.itemimg img');
|
||||||
|
release.poster = `${site.url}/${posterPath}`;
|
||||||
|
|
||||||
|
return release;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeScene({ html, q, qa, qd, qis }, url, site, models) {
|
||||||
|
const release = { url };
|
||||||
|
|
||||||
|
[release.entryId] = url.split('/').slice(-1);
|
||||||
|
release.title = q('.mas_title', true);
|
||||||
|
release.description = q('.mas_longdescription', true);
|
||||||
|
release.date = qd('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||||
|
|
||||||
|
const actorString = q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, '');
|
||||||
|
const actors = matchActors(actorString, models);
|
||||||
|
if (actors.length > 0) release.actors = actors;
|
||||||
|
else release.actors = extractActors(actorString);
|
||||||
|
|
||||||
|
release.tags = qa('.tags a', true);
|
||||||
|
|
||||||
|
release.photos = qis('.stills img').map(photoPath => `${site.url}/${photoPath}`);
|
||||||
|
|
||||||
|
const posterIndex = 'splash:';
|
||||||
|
const poster = html.slice(html.indexOf('faceimages/', posterIndex), html.indexOf('.jpg', posterIndex) + 4);
|
||||||
|
if (poster) release.poster = `${site.url}/${poster}`;
|
||||||
|
|
||||||
|
const trailerIndex = html.indexOf('video/mp4');
|
||||||
|
const trailer = html.slice(html.indexOf('/content', trailerIndex), html.indexOf('.mp4', trailerIndex) + 4);
|
||||||
|
if (trailer) release.trailer = { src: `${site.url}${trailer}` };
|
||||||
|
|
||||||
|
return release;
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractModels({ el }, site) {
|
||||||
|
const models = ctxa(el, '.item');
|
||||||
|
|
||||||
|
return models.map(({ q, qu }) => {
|
||||||
|
const actor = { gender: 'female' };
|
||||||
|
|
||||||
|
const avatar = q('.itemimg img');
|
||||||
|
actor.avatar = `${site.url}/${avatar.src}`;
|
||||||
|
actor.name = avatar.alt
|
||||||
|
.split(':').slice(-1)[0]
|
||||||
|
.replace(/xtreme girl|nurse/ig, '')
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
const actorPath = qu('.itemimg');
|
||||||
|
actor.url = `${site.url}${actorPath.slice(1)}`;
|
||||||
|
|
||||||
|
return actor;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchModels(site, page = 1, accModels = []) {
|
||||||
|
const url = `${site.url}/?models/${page}`;
|
||||||
|
const qModels = await get(url);
|
||||||
|
|
||||||
|
if (qModels) {
|
||||||
|
const models = extractModels(qModels, site);
|
||||||
|
const nextPage = qModels.qa('.pagenumbers', true)
|
||||||
|
.map(pageX => Number(pageX))
|
||||||
|
.filter(Boolean) // remove << and >>
|
||||||
|
.includes(page + 1);
|
||||||
|
|
||||||
|
if (nextPage) {
|
||||||
|
return fetchModels(site, page + 1, accModels.concat(models));
|
||||||
|
}
|
||||||
|
|
||||||
|
return accModels.concat(models, { name: 'Dr. Gray' });
|
||||||
|
}
|
||||||
|
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchLatest(site, page = 1, models) {
|
||||||
|
const url = `${site.url}/show.php?a=${site.parameters.a}_${page}`;
|
||||||
|
const qLatest = await geta(url, '.item');
|
||||||
|
|
||||||
|
return qLatest && scrapeLatest(qLatest, site, models);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchScene(url, site, release, preflight) {
|
||||||
|
const models = preflight || await fetchModels(site);
|
||||||
|
const qScene = await get(url);
|
||||||
|
|
||||||
|
return qScene && scrapeScene(qScene, url, site, models);
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
fetchLatest,
|
||||||
|
fetchScene,
|
||||||
|
preflight: fetchModels,
|
||||||
|
};
|
|
@ -1,6 +1,7 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const adulttime = require('./adulttime');
|
const adulttime = require('./adulttime');
|
||||||
|
const assylum = require('./assylum');
|
||||||
const babes = require('./babes');
|
const babes = require('./babes');
|
||||||
const bang = require('./bang');
|
const bang = require('./bang');
|
||||||
const bangbros = require('./bangbros');
|
const bangbros = require('./bangbros');
|
||||||
|
@ -52,10 +53,11 @@ const xempire = require('./xempire');
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
releases: {
|
releases: {
|
||||||
adulttime,
|
|
||||||
'21naturals': naturals,
|
'21naturals': naturals,
|
||||||
'21sextreme': sextreme,
|
'21sextreme': sextreme,
|
||||||
'21sextury': sextury,
|
'21sextury': sextury,
|
||||||
|
adulttime,
|
||||||
|
assylum,
|
||||||
babes,
|
babes,
|
||||||
bang,
|
bang,
|
||||||
bangbros,
|
bangbros,
|
||||||
|
|
|
@ -149,6 +149,7 @@ const funcs = {
|
||||||
qurl,
|
qurl,
|
||||||
qurls,
|
qurls,
|
||||||
qa: qall,
|
qa: qall,
|
||||||
|
qs: qall,
|
||||||
qd: qdate,
|
qd: qdate,
|
||||||
qi: qimage,
|
qi: qimage,
|
||||||
qis: qimages,
|
qis: qimages,
|
||||||
|
@ -175,6 +176,7 @@ function init(element, window) {
|
||||||
|
|
||||||
return {
|
return {
|
||||||
element,
|
element,
|
||||||
|
el: element,
|
||||||
html: element.outerHTML || element.body.outerHTML,
|
html: element.outerHTML || element.body.outerHTML,
|
||||||
...(window && {
|
...(window && {
|
||||||
window,
|
window,
|
||||||
|
|
Loading…
Reference in New Issue