diff --git a/config/default.js b/config/default.js index 87a5cfe1..1f5aff98 100644 --- a/config/default.js +++ b/config/default.js @@ -91,6 +91,10 @@ module.exports = { 'burningangel', 'brazzers', 'milehighmedia', + [ + 'devilsfilm', + 'roccosiffredi', + ], [ 'vixen', 'tushy', @@ -112,6 +116,7 @@ module.exports = { ], '21sextury', 'julesjordan', + 'peternorth', 'naughtyamerica', 'cherrypimps', 'pimpxxx', @@ -143,6 +148,10 @@ module.exports = { 'private', 'ddfnetwork', 'bangbros', + [ + 'silverstonedvd', + 'silviasaint', + ], 'kellymadison', 'gangbangcreampie', 'gloryholesecrets', diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 2adf6a7b..95aff319 100644 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -1899,6 +1899,7 @@ const sites = [ parameters: { latest: '/en/All/scenes/0/latest/', upcoming: '/en/All/scenes/0/upcoming', + classic: true, }, }, { @@ -1910,6 +1911,7 @@ const sites = [ parameters: { latest: '/en/scenes/All/0/', upcoming: '/en/scenes/All/0/1/upcoming', + classic: true, }, }, { diff --git a/src/actors.js b/src/actors.js index d885f7a0..3f3a8525 100644 --- a/src/actors.js +++ b/src/actors.js @@ -137,11 +137,10 @@ async function curateProfile(profile) { name: profile.name, avatar: profile.avatar, scraper: profile.scraper, + site: profile.site, + network: profile.network, }; - curatedProfile.site = profile.site.isNetwork ? null : profile.site; - curatedProfile.network = profile.site.isNetwork ? profile.site : null; - curatedProfile.description = profile.description?.trim() || null; curatedProfile.nationality = profile.nationality?.trim() || null; // used to derive country when country not available curatedProfile.ethnicity = profile.ethnicity?.trim() || null; @@ -288,7 +287,7 @@ async function interpolateProfiles(actors) { profile.tattoos = getLongest(valuesByProperty.tattoos); profile.piercings = getLongest(valuesByProperty.piercings); - profile.avatar_media_id = avatars.sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0].id; + profile.avatar_media_id = avatars.sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null; return profile; }); @@ -368,21 +367,25 @@ async function scrapeProfiles(actor, sources, networksBySlug, sitesBySlug) { try { return await [].concat(source).reduce(async (outcome, scraperSlug) => outcome.catch(async () => { const scraper = scrapers[scraperSlug]; - const siteOrNetwork = networksBySlug[scraperSlug] || sitesBySlug[scraperSlug]; + const context = { + site: sitesBySlug[scraperSlug] || null, + network: networksBySlug[scraperSlug] || null, + scraper: scraperSlug, + }; if (!scraper?.fetchProfile) { logger.warn(`No profile profile scraper available for ${scraperSlug}`); throw new Error(`No profile profile scraper available for ${scraperSlug}`); } - if (!siteOrNetwork) { + if (!context.site && !context.network) { logger.warn(`No site or network found for ${scraperSlug}`); throw new Error(`No site or network found for ${scraperSlug}`); } logger.verbose(`Searching profile for '${actor.name}' on '${scraperSlug}'`); - const profile = await scraper.fetchProfile(actor.name, scraperSlug, siteOrNetwork, include); + const profile = await scraper.fetchProfile(actor.name, context, include); if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure logger.verbose(`Profile for '${actor.name}' not available on ${scraperSlug}, scraper returned ${profile}`); @@ -392,8 +395,7 @@ async function scrapeProfiles(actor, sources, networksBySlug, sitesBySlug) { return { ...actor, ...profile, - scraper: scraperSlug, - site: siteOrNetwork, + ...context, }; }), Promise.reject(new Error())); } catch (error) { @@ -424,7 +426,8 @@ async function scrapeActors(actorNames) { ]); const existingActorEntriesBySlug = existingActorEntries.reduce((acc, actorEntry) => ({ ...acc, [actorEntry.slug]: actorEntry }), {}); - const networksBySlug = networks.reduce((acc, network) => ({ ...acc, [network.slug]: { ...network, isNetwork: true } }), {}); + + const networksBySlug = networks.reduce((acc, network) => ({ ...acc, [network.slug]: network }), {}); const sitesBySlug = sites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {}); const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlug[baseActor.slug]); @@ -456,6 +459,8 @@ async function scrapeActors(actorNames) { await upsertProfiles(profilesWithAvatarIds); await interpolateProfiles(actors); } + + return profiles; } async function getOrCreateActors(baseActors, batchId) { diff --git a/src/app.js b/src/app.js index 11b55c64..adeaf70c 100644 --- a/src/app.js +++ b/src/app.js @@ -20,7 +20,13 @@ async function init() { } if (argv.actors) { - await scrapeActors(argv.actors); + const actors = await scrapeActors(argv.actors); + + if (argv.actorScenes) { + const actorReleases = actors.map(actor => actor.releases).flat().filter(Boolean); + + await storeReleases(actorReleases); + } } const updateBaseScenes = (argv.scrape || argv.sites || argv.networks) && await fetchUpdates(); diff --git a/src/argv.js b/src/argv.js index 1d838384..c56887c9 100644 --- a/src/argv.js +++ b/src/argv.js @@ -32,13 +32,13 @@ const { argv } = yargs .option('actor-scenes', { describe: 'Fetch all scenes for an actor', type: 'boolean', - alias: 'with-releases', + alias: 'with-scenes', default: false, }) .option('movie-scenes', { describe: 'Fetch all scenes for a movie', type: 'boolean', - alias: 'with-releases', + alias: 'with-scenes', default: false, }) .option('scene-movies', { diff --git a/src/scrapers/aziani.js b/src/scrapers/aziani.js index 0773ad23..67932c44 100644 --- a/src/scrapers/aziani.js +++ b/src/scrapers/aziani.js @@ -126,7 +126,7 @@ async function fetchScene(url, site) { return res.status; } -async function fetchProfile(actorName, scraperSlug, site) { +async function fetchProfile(actorName, { site }) { const actorSlug = slugify(actorName, ''); const url = `${site.url}/tour/models/${actorSlug}.html`; const res = await get(url, '.page-content .row'); diff --git a/src/scrapers/bamvisions.js b/src/scrapers/bamvisions.js index 658432db..8f6a6c0e 100644 --- a/src/scrapers/bamvisions.js +++ b/src/scrapers/bamvisions.js @@ -132,7 +132,7 @@ async function fetchScene(url, site) { return res.ok ? scrapeScene(res.item, url, site) : res.status; } -async function fetchProfile(actorName, scraperSlug, site, include) { +async function fetchProfile(actorName, { site }, include) { const actorSlugA = slugify(actorName, ''); const actorSlugB = slugify(actorName); diff --git a/src/scrapers/bang.js b/src/scrapers/bang.js index df8246da..46811fe9 100644 --- a/src/scrapers/bang.js +++ b/src/scrapers/bang.js @@ -80,11 +80,69 @@ function scrapeScene(scene, site) { return release; } -function scrapeLatest(scenes, site) { +function scrapeAll(scenes, site) { return scenes.map(({ _source: scene }) => scrapeScene(scene, site)); } -function scrapeProfile(actor) { +async function fetchActorReleases(actor, site) { + const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, { + size: 50, + query: { + bool: { + must: [ + { + match: { + status: 'ok', + }, + }, + { + nested: { + path: 'actors', + query: { + bool: { + must: [ + { + match: { + 'actors.mongoId': { + operator: 'AND', + query: actor.id, + }, + }, + }, + ], + }, + }, + }, + }, + ], + must_not: [ + { + match: { + type: 'trailer', + }, + }, + ], + }, + }, + sort: [ + { + releaseDate: { + order: 'desc', + }, + }, + ], + }, { + encodeJSON: true, + headers: { + Authorization: `Basic ${authKey}`, + }, + }); + + return scrapeAll(res.body.hits.hits, site); +} + + +async function scrapeProfile(actor, site, include) { const profile = {}; profile.aliases = actor.aliases; @@ -115,7 +173,9 @@ function scrapeProfile(actor) { if (actor.twitter) profile.social = [`https://www.twitter.com/${actor.twitter}`]; if (actor.image) profile.avatar = `https://i.bang.com/pornstars/${actor.identifier}.jpg`; - // TODO: get releases + if (include.releases) { + profile.releases = await fetchActorReleases(actor, site); + } return profile; } @@ -204,7 +264,7 @@ async function fetchLatest(site, page = 1) { }, }); - return scrapeLatest(res.body.hits.hits, site); + return scrapeAll(res.body.hits.hits, site); } async function fetchScene(url, site) { @@ -220,7 +280,7 @@ async function fetchScene(url, site) { return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle } -async function fetchProfile(actorName) { +async function fetchProfile(actorName, actorSlug, site, include) { const res = await post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, { size: 5, sort: [{ @@ -255,7 +315,7 @@ async function fetchProfile(actorName) { const actor = res.body.hits.hits.find(hit => hit._source.name.toLowerCase() === actorName.toLowerCase()); if (actor) { - return scrapeProfile(actor._source); + return scrapeProfile(actor._source, site, include); } return null; diff --git a/src/scrapers/blowpass.js b/src/scrapers/blowpass.js index 683baa11..f56fd144 100644 --- a/src/scrapers/blowpass.js +++ b/src/scrapers/blowpass.js @@ -25,8 +25,8 @@ function getActorReleasesUrl(actorPath, page = 1) { return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`; } -async function networkFetchProfile(actorName, scraperSlug, site, include) { - return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include); +async function networkFetchProfile(actorName, context, include) { + return fetchProfile(actorName, context, null, getActorReleasesUrl, include); } module.exports = { diff --git a/src/scrapers/brazzers.js b/src/scrapers/brazzers.js index 95cd7711..80a409ac 100644 --- a/src/scrapers/brazzers.js +++ b/src/scrapers/brazzers.js @@ -200,7 +200,7 @@ async function fetchScene(url, site) { return scrapeScene(res.body.toString(), url, site); } -async function fetchProfile(actorName, scraperSlug, siteOrNetwork, include) { +async function fetchProfile(actorName, context, include) { const searchUrl = 'https://brazzers.com/pornstars-search/'; const searchRes = await bhttp.get(searchUrl, { headers: { diff --git a/src/scrapers/cherrypimps.js b/src/scrapers/cherrypimps.js index b65be552..2341d42e 100644 --- a/src/scrapers/cherrypimps.js +++ b/src/scrapers/cherrypimps.js @@ -120,13 +120,15 @@ async function fetchScene(url, site, release) { return res.ok ? scrapeScene(res.item, url, site, release) : res.status; } -async function fetchProfile(actorName, scraperSlug) { +async function fetchProfile(actorName, { site, network, scraper }) { const actorSlug = slugify(actorName); const actorSlug2 = slugify(actorName, ''); - const [url, url2] = ['cherrypimps', 'wildoncam'].includes(scraperSlug) - ? [`https://${scraperSlug}.com/models/${actorSlug}.html`, `https://${scraperSlug}.com/models/${actorSlug2}.html`] - : [`https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug}.html`, `https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug2}.html`]; + const origin = site?.url || network.url; + + const [url, url2] = ['cherrypimps', 'wildoncam'].includes(scraper) + ? [`${origin}/models/${actorSlug}.html`, `${origin}/models/${actorSlug2}.html`] + : [`${origin}/models/${actorSlug}.html`, `${origin}/models/${actorSlug2}.html`]; const res = await get(url); if (res.ok) return scrapeProfile(res.item); diff --git a/src/scrapers/famedigital.js b/src/scrapers/famedigital.js index ac6e5a74..97edf33c 100644 --- a/src/scrapers/famedigital.js +++ b/src/scrapers/famedigital.js @@ -10,7 +10,7 @@ const { fetchApiProfile, scrapeAll, } = require('./gamma'); -const { get } = require('../utils/q'); +const { get } = require('../utils/qu'); const slugify = require('../utils/slugify'); function extractLowArtActors(release) { @@ -57,10 +57,19 @@ function getActorReleasesUrl(actorPath, page = 1) { return `https://www.peternorth.com/en/videos/All-Categories/0${actorPath}/All-Dvds/0/latest/${page}`; } -async function fetchClassicProfile(actorName, siteSlug) { +function scrapeClassicProfile({ qu, html }, site) { + const profile = {}; + + profile.avatar = qu.img('.actorPicture'); + profile.releases = scrapeAll(html, null, site.url, false); + + return profile; +} + +async function fetchClassicProfile(actorName, { site }) { const actorSlug = slugify(actorName); - const url = `https://${siteSlug}.com/en/pornstars`; + const url = `${site.url}/en/pornstars`; const pornstarsRes = await get(url); if (!pornstarsRes.ok) return null; @@ -70,41 +79,23 @@ async function fetchClassicProfile(actorName, siteSlug) { ?.value; if (actorPath) { - const actorUrl = `https://${siteSlug}.com${actorPath}`; + const actorUrl = `${site.url}${actorPath}`; const res = await get(actorUrl); if (res.ok) { - const releases = scrapeAll(res.item, null, `https://www.${siteSlug}.com`, false); - - return { releases }; + return scrapeClassicProfile(res.item, site); } } return null; } -async function networkFetchProfile(actorName, scraperSlug, site, include) { - // not all Fame Digital sites offer Gamma actors - const [devils, rocco, peter, silvia] = await Promise.all([ - fetchApiProfile(actorName, 'devilsfilm', true), - fetchApiProfile(actorName, 'roccosiffredi'), - include.scenes ? fetchProfile(actorName, 'peternorth', true, getActorReleasesUrl, include) : [], - include.scenes ? fetchClassicProfile(actorName, 'silviasaint') : [], - include.scenes ? fetchClassicProfile(actorName, 'silverstonedvd') : [], - ]); +async function networkFetchProfile(actorName, context, include) { + const profile = await ((context.site.parameters.api && fetchApiProfile(actorName, context, include)) + || (context.site.parameters.classic && include.scenes && fetchClassicProfile(actorName, context, include)) // classic profiles only have scenes, no bio + || fetchProfile(actorName, context, true, getActorReleasesUrl, include)); - if (devils || rocco || peter) { - const releases = [].concat(devils?.releases || [], rocco?.releases || [], peter?.releases || [], silvia?.releases || []); - - return { - ...peter, - ...rocco, - ...devils, - releases, - }; - } - - return null; + return profile; } module.exports = { diff --git a/src/scrapers/fullpornnetwork.js b/src/scrapers/fullpornnetwork.js index 1347fe74..cd3aaeb0 100644 --- a/src/scrapers/fullpornnetwork.js +++ b/src/scrapers/fullpornnetwork.js @@ -78,12 +78,10 @@ async function fetchScene(url, site) { return res.ok ? scrapeScene(res.item, url, site) : res.status; } -async function fetchProfile(actorName, scraperSlug) { +async function fetchProfile(actorName, { site }) { const actorSlug = slugify(actorName, ''); - const url = scraperSlug === 'povperverts' - ? `https://povperverts.net/models/${actorSlug}.html` - : `https://${scraperSlug}.com/models/${actorSlug}.html`; + const url = `${site.url}/models/${actorSlug}.html`; const res = await get(url); return res.ok ? scrapeProfile(res.item, actorName) : res.status; diff --git a/src/scrapers/gamma.js b/src/scrapers/gamma.js index e02ca336..0e36cc4f 100644 --- a/src/scrapers/gamma.js +++ b/src/scrapers/gamma.js @@ -552,7 +552,9 @@ async function fetchActorScenes(actorName, apiUrl, siteSlug) { return []; } -async function fetchProfile(actorName, siteSlug, altSearchUrl, getActorReleasesUrl, include) { +async function fetchProfile(actorName, context, altSearchUrl, getActorReleasesUrl, include) { + const siteSlug = context.site?.slug || context.network.slug; + const actorSlug = actorName.toLowerCase().replace(/\s+/, '+'); const searchUrl = altSearchUrl ? `https://www.${siteSlug}.com/en/search/${actorSlug}/1/actor` @@ -579,7 +581,9 @@ async function fetchProfile(actorName, siteSlug, altSearchUrl, getActorReleasesU return null; } -async function fetchApiProfile(actorName, siteSlug, site, include) { +async function fetchApiProfile(actorName, context, include) { + const siteSlug = context.site?.slug || context.network.slug; + const actorSlug = encodeURI(actorName); const referer = `https://www.${siteSlug}.com/en/search`; diff --git a/src/scrapers/hush.js b/src/scrapers/hush.js index 28983710..04750ea6 100644 --- a/src/scrapers/hush.js +++ b/src/scrapers/hush.js @@ -381,7 +381,7 @@ async function fetchScene(url, site, baseRelease, beforeFetchLatest) { return scrapeScene(res.item, site, url, baseRelease); } -async function fetchProfile(actorName, scraperSlug, site) { +async function fetchProfile(actorName, { site }) { const actorSlugA = slugify(actorName, ''); const actorSlugB = slugify(actorName); diff --git a/src/scrapers/mikeadriano.js b/src/scrapers/mikeadriano.js index 5847581a..cc2d586f 100644 --- a/src/scrapers/mikeadriano.js +++ b/src/scrapers/mikeadriano.js @@ -235,7 +235,7 @@ async function fetchScene(url, site) { } /* API protected -async function fetchProfile(actorName, scraperSlug, site) { +async function fetchProfile(actorName, context , site) { const session = bhttp.session(); await session.get(`https://tour.${site.slug}.com`); diff --git a/src/scrapers/mindgeek.js b/src/scrapers/mindgeek.js index ea5c5b19..270ed5b8 100644 --- a/src/scrapers/mindgeek.js +++ b/src/scrapers/mindgeek.js @@ -222,8 +222,8 @@ async function fetchScene(url, site) { return null; } -async function fetchProfile(actorName, networkName, actorPath = 'model') { - const url = `https://www.${networkName}.com`; +async function fetchProfile(actorName, networkSlug, actorPath = 'model') { + const url = `https://www.${networkSlug}.com`; const { session, instanceToken } = await getSession(url); const res = await session.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, { @@ -236,7 +236,7 @@ async function fetchProfile(actorName, networkName, actorPath = 'model') { const actorData = res.body.result.find(actor => actor.name.toLowerCase() === actorName.toLowerCase()); if (actorData) { - const actorUrl = `https://www.${networkName}.com/${actorPath}/${actorData.id}/`; + const actorUrl = `https://www.${networkSlug}.com/${actorPath}/${actorData.id}/`; const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`; const [actorRes, actorReleasesRes] = await Promise.all([ @@ -249,11 +249,11 @@ async function fetchProfile(actorName, networkName, actorPath = 'model') { ]); if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) { - return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkName); + return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkSlug); } if (actorRes.statusCode === 200) { - return scrapeProfile(actorData, actorRes.body.toString(), null, networkName); + return scrapeProfile(actorData, actorRes.body.toString(), null, networkSlug); } } } diff --git a/src/scrapers/nubiles.js b/src/scrapers/nubiles.js index a084ef8f..e78f128b 100644 --- a/src/scrapers/nubiles.js +++ b/src/scrapers/nubiles.js @@ -136,9 +136,9 @@ async function fetchScene(url, site) { return res.ok ? scrapeScene(res.item, url, site) : res.status; } -async function fetchProfile(actorName, siteSlug) { +async function fetchProfile(actorName, { site }) { const firstLetter = actorName.charAt(0).toLowerCase(); - const origin = slugUrlMap[siteSlug] || `https://www.${siteSlug}.com`; + const origin = slugUrlMap[site.slug] || site.url; const url = `${origin}/model/alpha/${firstLetter}`; const resModels = await get(url); diff --git a/src/scrapers/score.js b/src/scrapers/score.js index 79c85022..7ce8527e 100644 --- a/src/scrapers/score.js +++ b/src/scrapers/score.js @@ -217,7 +217,7 @@ async function fetchScene(url, site) { return null; } -async function fetchProfile(actorName, scraperSlug, site, include, page = 1, source = 0) { +async function fetchProfile(actorName, context, include, page = 1, source = 0) { const letter = actorName.charAt(0).toUpperCase(); const sources = [ @@ -244,11 +244,11 @@ async function fetchProfile(actorName, scraperSlug, site, include, page = 1, sou return null; } - return fetchProfile(actorName, scraperSlug, site, include, page + 1, source); + return fetchProfile(actorName, context, include, page + 1, source); } if (sources[source + 1]) { - return fetchProfile(actorName, scraperSlug, site, include, 1, source + 1); + return fetchProfile(actorName, context, include, 1, source + 1); } return null; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 9715648d..5f06fa81 100644 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -154,12 +154,12 @@ module.exports = { ddfnetwork, deeper: vixen, deeplush: nubiles, + devilsfilm: famedigital, digitalplayground, dtfsluts: fullpornnetwork, evilangel, eyeontheguy: hush, fakehub, - famedigital, freeones, gangbangcreampie: aziani, girlfaction: fullpornnetwork, @@ -189,15 +189,19 @@ module.exports = { nympho: mikeadriano, onlyprince: fullpornnetwork, pervertgallery: fullpornnetwork, + peternorth: famedigital, pimpxxx: cherrypimps, pornhub, povperverts: fullpornnetwork, povpornstars: hush, private: privateNetwork, realitykings, + roccosiffredi: famedigital, score, seehimfuck: hush, sexyhub: mindgeek, + silverstonedvd: famedigital, + silviasaint: famedigital, swallowed: mikeadriano, thatsitcomshow: nubiles, transangels, @@ -207,6 +211,7 @@ module.exports = { twistys, vixen, wicked, + wildoncam: cherrypimps, xempire, }, }; diff --git a/src/scrapers/vixen.js b/src/scrapers/vixen.js index cef49b38..71df674c 100644 --- a/src/scrapers/vixen.js +++ b/src/scrapers/vixen.js @@ -232,8 +232,8 @@ async function fetchScene(url, site, baseRelease) { return res.code; } -async function fetchProfile(actorName, scraperSlug, site, include) { - const origin = `https://www.${scraperSlug}.com`; +async function fetchProfile(actorName, { site }, include) { + const origin = site.url; const actorSlug = slugify(actorName); const url = `${origin}/api/${actorSlug}`; const res = await get(url); diff --git a/src/scrapers/xempire.js b/src/scrapers/xempire.js index f970ba6d..e976754f 100644 --- a/src/scrapers/xempire.js +++ b/src/scrapers/xempire.js @@ -23,8 +23,8 @@ function getActorReleasesUrl(actorPath, page = 1) { return `https://www.xempire.com/en/videos/xempire/latest/${page}/All-Categories/0${actorPath}`; } -async function networkFetchProfile(actorName, scraperSlug, site, include) { - return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include); +async function networkFetchProfile(actorName, context, include) { + return fetchProfile(actorName, context, null, getActorReleasesUrl, include); } module.exports = {