Compare commits

..

5 Commits

Author SHA1 Message Date
DebaucheryLibrarian 46c0b269c3 1.160.4 2021-02-01 20:49:13 +01:00
DebaucheryLibrarian 4b5cd50122 Fixed slug lookup in Perfect Gonzo scraper. 2021-02-01 20:49:08 +01:00
DebaucheryLibrarian aade7490f8 Querying infinite parent depth for deep release entities. 2021-02-01 01:45:30 +01:00
DebaucheryLibrarian 97c088cfb4 Added Anal Only and upcoming scraping to Mike Adriano. Fixed profile expand arrow color. 2021-01-30 17:43:33 +01:00
DebaucheryLibrarian bfb5006e95 Added actor scene URL parameter to Gamma scraper to phase out release URL function. 2021-01-30 01:12:42 +01:00
53 changed files with 204 additions and 197 deletions

View File

@ -51,7 +51,7 @@
<Expand <Expand
v-if="bioExpanded" v-if="bioExpanded"
:expanded="bioExpanded" :expanded="bioExpanded"
class="expand expand-dark" class="expand expand-light"
@expand="(state) => bioExpanded = state" @expand="(state) => bioExpanded = state"
/> />
@ -310,7 +310,7 @@
<Expand <Expand
:expanded="bioExpanded" :expanded="bioExpanded"
class="expand expand-dark" class="expand expand-light"
@expand="(state) => bioExpanded = state" @expand="(state) => bioExpanded = state"
/> />
</div> </div>

2
package-lock.json generated
View File

@ -1,6 +1,6 @@
{ {
"name": "traxxx", "name": "traxxx",
"version": "1.160.3", "version": "1.160.4",
"lockfileVersion": 1, "lockfileVersion": 1,
"requires": true, "requires": true,
"dependencies": { "dependencies": {

View File

@ -1,6 +1,6 @@
{ {
"name": "traxxx", "name": "traxxx",
"version": "1.160.3", "version": "1.160.4",
"description": "All the latest porn releases in one place", "description": "All the latest porn releases in one place",
"main": "src/app.js", "main": "src/app.js",
"scripts": { "scripts": {

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.8 KiB

After

Width:  |  Height:  |  Size: 6.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.3 KiB

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.9 KiB

After

Width:  |  Height:  |  Size: 2.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 8.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 86 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.6 KiB

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.3 KiB

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 42 KiB

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 42 KiB

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.1 KiB

After

Width:  |  Height:  |  Size: 7.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 86 KiB

After

Width:  |  Height:  |  Size: 56 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 410 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 261 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 541 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 353 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

View File

@ -43,6 +43,7 @@ const parentNetworks = [
url: 'https://www.21sextury.com', url: 'https://www.21sextury.com',
description: 'Watch all the latest scenes and porn video updates on 21Sextury.com, the best European porn site with the hottest pornstars from all over the world! Watch porn videos from the large network here.', description: 'Watch all the latest scenes and porn video updates on 21Sextury.com, the best European porn site with the hottest pornstars from all over the world! Watch porn videos from the large network here.',
parameters: { parameters: {
layout: 'api',
mobile: 'https://m.dpfanatics.com/en/video', mobile: 'https://m.dpfanatics.com/en/video',
}, },
parent: 'gamma', parent: 'gamma',
@ -56,6 +57,7 @@ const networks = [
url: 'https://www.21sextreme.com', url: 'https://www.21sextreme.com',
description: 'Welcome to 21Sextreme.com, your portal to fisting porn, old and young lesbians, horny grannies & extreme BDSM featuring the best Euro & American Pornstars', description: 'Welcome to 21Sextreme.com, your portal to fisting porn, old and young lesbians, horny grannies & extreme BDSM featuring the best Euro & American Pornstars',
parameters: { parameters: {
layout: 'api',
mobile: 'https://m.dpfanatics.com/en/video', mobile: 'https://m.dpfanatics.com/en/video',
}, },
parent: '21sextury', parent: '21sextury',
@ -66,6 +68,7 @@ const networks = [
url: 'https://www.21naturals.com', url: 'https://www.21naturals.com',
description: 'Welcome to 21Naturals.com, the porn network featuring the hottest pornstars from all over the world in all natural porn and erotic sex videos. Watch thousands of girls with natural tits', description: 'Welcome to 21Naturals.com, the porn network featuring the hottest pornstars from all over the world in all natural porn and erotic sex videos. Watch thousands of girls with natural tits',
parameters: { parameters: {
layout: 'api',
mobile: 'https://m.dpfanatics.com/en/video', mobile: 'https://m.dpfanatics.com/en/video',
}, },
parent: '21sextury', parent: '21sextury',
@ -125,6 +128,7 @@ const networks = [
description: 'Welcome to Blowpass.com, your ultimate source for deepthroat porn, MILF and teen blowjob videos, big cumshots and any and everything oral!', description: 'Welcome to Blowpass.com, your ultimate source for deepthroat porn, MILF and teen blowjob videos, big cumshots and any and everything oral!',
parameters: { parameters: {
mobile: 'https://m.blowpass.com/en/video/v/%d', // v can be any string, %d will be scene ID mobile: 'https://m.blowpass.com/en/video/v/%d', // v can be any string, %d will be scene ID
actorScenes: 'https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0{path}/{page}',
}, },
parent: 'gamma', parent: 'gamma',
}, },

View File

@ -4710,6 +4710,13 @@ const sites = [
tags: ['anal'], tags: ['anal'],
parent: 'mikeadriano', parent: 'mikeadriano',
}, },
{
slug: 'analonly',
name: 'Anal Only',
url: 'https://analonly.com',
tags: ['anal'],
parent: 'mikeadriano',
},
{ {
slug: 'allanal', slug: 'allanal',
name: 'All Anal', name: 'All Anal',

View File

@ -685,12 +685,13 @@ const tagPhotos = [
['69', 2, 'Abigail Mac and Kissa Sins in "Lesbian Anal Workout" for HardX'], ['69', 2, 'Abigail Mac and Kissa Sins in "Lesbian Anal Workout" for HardX'],
['airtight', 7, 'Lana Rhoades in "Gangbang Me 3" for HardX'], ['airtight', 7, 'Lana Rhoades in "Gangbang Me 3" for HardX'],
['airtight', 6, 'Remy Lacroix in "Ass Worship 14" for Jules Jordan'], ['airtight', 6, 'Remy Lacroix in "Ass Worship 14" for Jules Jordan'],
['airtight', 11, 'Malena Nazionale in "Rocco\'s Perverted Secretaries 2: Italian Edition" for Rocco Siffredi'],
['airtight', 1, 'Jynx Maze in "Pump My Ass Full of Cum 3" for Jules Jordan'],
['airtight', 10, 'Asa Akira in "Asa Akira To The Limit" for Jules Jordan'], ['airtight', 10, 'Asa Akira in "Asa Akira To The Limit" for Jules Jordan'],
['airtight', 8, 'Veronica Leal in LegalPorno SZ2520'], ['airtight', 8, 'Veronica Leal in LegalPorno SZ2520'],
['airtight', 5, 'Chloe Amour in "DP Masters 4" for Jules Jordan'],
['airtight', 3, 'Anita Bellini in "Triple Dick Gangbang" for Hands On Hardcore (DDF Network)'], ['airtight', 3, 'Anita Bellini in "Triple Dick Gangbang" for Hands On Hardcore (DDF Network)'],
['airtight', 5, 'Chloe Amour in "DP Masters 4" for Jules Jordan'],
['airtight', 9, 'Cindy Shine in LegalPorno GP1658'], ['airtight', 9, 'Cindy Shine in LegalPorno GP1658'],
['airtight', 1, 'Jynx Maze in "Pump My Ass Full of Cum 3" for Jules Jordan'],
['atm', 3, 'Natasha Teen in "Work That Ass!" for Her Limit'], ['atm', 3, 'Natasha Teen in "Work That Ass!" for Her Limit'],
['atm', 0, 'Roxy Lips in "Under Her Coat" for 21 Naturals'], ['atm', 0, 'Roxy Lips in "Under Her Coat" for 21 Naturals'],
['atm', 6, 'Jane Wilde in "Teen Anal" for Evil Angel'], ['atm', 6, 'Jane Wilde in "Teen Anal" for Evil Angel'],
@ -873,10 +874,11 @@ const tagPhotos = [
['orgy', 'poster', 'Zoey Mornoe (DP), Jillian Janson (sex), Frida Sante, Katerina Kay and Natasha Starr in "Orgy Masters 6" for Jules Jordan'], ['orgy', 'poster', 'Zoey Mornoe (DP), Jillian Janson (sex), Frida Sante, Katerina Kay and Natasha Starr in "Orgy Masters 6" for Jules Jordan'],
['pussy-eating', 4, 'Anastasia Knight and Jillian Janson in "Teach Me" for Screwbox'], ['pussy-eating', 4, 'Anastasia Knight and Jillian Janson in "Teach Me" for Screwbox'],
['pussy-eating', 7, 'Jewelz Blu and Katie Kush in "Pick Your Pleasure" for Reality Kings'], ['pussy-eating', 7, 'Jewelz Blu and Katie Kush in "Pick Your Pleasure" for Reality Kings'],
['pussy-eating', 6, 'Abella Danger and Karma Rx in "Neon Dreaming" for Brazzers'], ['pussy-eating', 8, 'Sia Lust and Lacey London in "Naughty Gamer Girls" for Girls Gone Pink'],
['pussy-eating', 0, 'Kali Roses and Emily Willis\' pussy in "Peeping On My Neighbor" for Girl Girl'], ['pussy-eating', 0, 'Kali Roses and Emily Willis\' pussy in "Peeping On My Neighbor" for Girl Girl'],
['pussy-eating', 2, 'Anikka Albrite and Mia Malkova in "Big Anal Bombshells" for LesbianX'], ['pussy-eating', 2, 'Anikka Albrite and Mia Malkova in "Big Anal Bombshells" for LesbianX'],
['pussy-eating', 3, 'Kylie Page and Kalina Ryu in "Training My Masseuse" for All Girl Massage'], ['pussy-eating', 3, 'Kylie Page and Kalina Ryu in "Training My Masseuse" for All Girl Massage'],
['pussy-eating', 6, 'Abella Danger and Karma Rx in "Neon Dreaming" for Brazzers'],
['pussy-eating', 1, 'Anikka Albrite and Riley Reid for In The Crack'], ['pussy-eating', 1, 'Anikka Albrite and Riley Reid for In The Crack'],
['redhead', 0, 'Penny Pax in "The Submission of Emma Marx: Boundaries" for New Sensations'], ['redhead', 0, 'Penny Pax in "The Submission of Emma Marx: Boundaries" for New Sensations'],
['schoolgirl', 1, 'Eliza Ibarra for Brazzers'], ['schoolgirl', 1, 'Eliza Ibarra for Brazzers'],

View File

@ -622,7 +622,10 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
const entity = entitiesBySlug[scraperSlug] || null; const entity = entitiesBySlug[scraperSlug] || null;
const scraper = scrapers[scraperSlug]; const scraper = scrapers[scraperSlug];
const layoutScraper = scraper?.[entity.parameters?.layout] || scraper?.[entity.parent?.parameters?.layout] || scraper?.[entity.parent?.parent?.parameters?.layout] || scraper; const layoutScraper = scraper?.[entity.parameters?.layout]
|| scraper?.[entity.parent?.parameters?.layout]
|| scraper?.[entity.parent?.parent?.parameters?.layout]
|| scraper;
const context = { const context = {
...entity, ...entity,
@ -631,6 +634,11 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
network: entity?.parent, network: entity?.parent,
entity, entity,
scraper: scraperSlug, scraper: scraperSlug,
parameters: {
...entity?.parent?.parent?.parameters,
...entity?.parent?.parameters,
...entity?.parameters,
},
}; };
const label = context.entity?.name; const label = context.entity?.name;

View File

@ -5,49 +5,11 @@ const merge = require('object-merge-advanced');
const argv = require('./argv'); const argv = require('./argv');
const include = require('./utils/argv-include')(argv); const include = require('./utils/argv-include')(argv);
const { fetchReleaseEntities, urlToSiteSlug } = require('./entities');
const logger = require('./logger')(__filename); const logger = require('./logger')(__filename);
const knex = require('./knex');
const qu = require('./utils/qu'); const qu = require('./utils/qu');
const scrapers = require('./scrapers/scrapers'); const scrapers = require('./scrapers/scrapers');
function urlToSiteSlug(url) {
try {
const slug = new URL(url)
.hostname
.match(/([\w-]+)\.\w+$/)?.[1]
.replace(/[-_]+/g, '');
return slug;
} catch (error) {
logger.warn(`Failed to derive entity slug from '${url}': ${error.message}`);
return null;
}
}
async function findEntities(baseReleases) {
const baseReleasesWithoutEntity = baseReleases.filter(release => release.url && !release.site && !release.entity);
const entitySlugs = Array.from(new Set(
baseReleasesWithoutEntity
.map(baseRelease => urlToSiteSlug(baseRelease.url))
.filter(Boolean),
));
const entities = await knex('entities')
.select(knex.raw('entities.*, row_to_json(parents) as parent, json_agg(children) as children'))
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
.leftJoin('entities as children', 'children.parent_id', 'entities.id')
.whereIn('entities.slug', entitySlugs)
.groupBy('entities.id', 'parents.id')
.orderBy('entities.type', 'asc');
// channel entity will overwrite network entity
const entitiesBySlug = entities.reduce((accEntities, entity) => ({ ...accEntities, [entity.slug]: accEntities[entity.slug] || entity }), {});
return entitiesBySlug;
}
function toBaseReleases(baseReleasesOrUrls, entity = null) { function toBaseReleases(baseReleasesOrUrls, entity = null) {
if (!baseReleasesOrUrls) { if (!baseReleasesOrUrls) {
return []; return [];
@ -106,8 +68,32 @@ async function fetchScene(scraper, url, entity, baseRelease, options) {
return null; return null;
} }
async function scrapeRelease(baseRelease, entities, type = 'scene') { function findScraper(entity) {
const entity = baseRelease.entity || entities[urlToSiteSlug(baseRelease.url)]; if (scrapers.releases[entity.slug]) {
return scrapers.releases[entity.slug];
}
if (entity.parent) {
return findScraper(entity.parent);
}
return null;
}
function findLayoutScraper(entity, scraper) {
if (scraper?.[entity.parameters?.layout]) {
return scraper[entity.parameters.layout];
}
if (entity.parent) {
return findLayoutScraper(entity.parent, scraper);
}
return scraper;
}
async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
const entity = baseRelease.entity || entitiesBySlug[urlToSiteSlug(baseRelease.url)];
if (!entity) { if (!entity) {
logger.warn(`No entity available for ${baseRelease.url}`); logger.warn(`No entity available for ${baseRelease.url}`);
@ -121,8 +107,8 @@ async function scrapeRelease(baseRelease, entities, type = 'scene') {
}; };
} }
const scraper = scrapers.releases[entity.slug] || scrapers.releases[entity.parent?.slug] || scrapers.releases[entity.parent?.parent?.slug]; const scraper = findScraper(entity);
const layoutScraper = scraper?.[entity.parameters?.layout] || scraper?.[entity.parent?.parameters?.layout] || scraper?.[entity.parent?.parent?.parameters?.layout] || scraper; const layoutScraper = findLayoutScraper(entity, scraper);
if (!layoutScraper) { if (!layoutScraper) {
logger.warn(`Could not find scraper for ${baseRelease.url}`); logger.warn(`Could not find scraper for ${baseRelease.url}`);
@ -184,19 +170,19 @@ async function scrapeRelease(baseRelease, entities, type = 'scene') {
} }
} }
async function scrapeReleases(baseReleases, entities, type) { async function scrapeReleases(baseReleases, entitiesBySlug, type) {
return Promise.map( return Promise.map(
baseReleases, baseReleases,
async baseRelease => scrapeRelease(baseRelease, entities, type), async baseRelease => scrapeRelease(baseRelease, entitiesBySlug, type),
{ concurrency: 10 }, { concurrency: 10 },
); );
} }
async function fetchReleases(baseReleasesOrUrls, type = 'scene') { async function fetchReleases(baseReleasesOrUrls, type = 'scene') {
const baseReleases = toBaseReleases(baseReleasesOrUrls); const baseReleases = toBaseReleases(baseReleasesOrUrls);
const entities = await findEntities(baseReleases); const entitiesBySlug = await fetchReleaseEntities(baseReleases);
const deepReleases = await scrapeReleases(baseReleases, entities, type); const deepReleases = await scrapeReleases(baseReleases, entitiesBySlug, type);
return deepReleases.filter(Boolean); return deepReleases.filter(Boolean);
} }

View File

@ -66,6 +66,21 @@ async function curateEntities(entities, includeParameters) {
return Promise.all(entities.map(async entity => curateEntity(entity, includeParameters))); return Promise.all(entities.map(async entity => curateEntity(entity, includeParameters)));
} }
function urlToSiteSlug(url) {
try {
const slug = new URL(url)
.hostname
.match(/([\w-]+)\.\w+$/)?.[1]
.replace(/[-_]+/g, '');
return slug;
} catch (error) {
logger.warn(`Failed to derive entity slug from '${url}': ${error.message}`);
return null;
}
}
async function fetchIncludedEntities() { async function fetchIncludedEntities() {
const include = { const include = {
includeAll: !argv.networks && !argv.channels && !config.include?.networks && !config.include?.channels, includeAll: !argv.networks && !argv.channels && !config.include?.networks && !config.include?.channels,
@ -139,6 +154,46 @@ async function fetchIncludedEntities() {
return curatedNetworks; return curatedNetworks;
} }
async function fetchReleaseEntities(baseReleases) {
const baseReleasesWithoutEntity = baseReleases.filter(release => release.url && !release.site && !release.entity);
const entitySlugs = Array.from(new Set(
baseReleasesWithoutEntity
.map(baseRelease => urlToSiteSlug(baseRelease.url))
.filter(Boolean),
));
const entities = await knex.raw(`
WITH RECURSIVE tree as (
SELECT to_jsonb(entities) as entity,
parent_id,
array['parent'] as parent_path,
0 as depth
FROM entities
WHERE slug = ANY(:entitySlugs)
UNION ALL
SELECT jsonb_set(tree.entity, tree.parent_path, to_jsonb(entities)),
entities.parent_id,
tree.parent_path || array['parent'],
depth + 1
FROM tree
JOIN entities ON tree.parent_id = entities.id
)
SELECT entity FROM tree WHERE parent_id is null
ORDER BY entity->'type' ASC;
`, { entitySlugs });
// channel entity will overwrite network entity
const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => ({
...accEntities,
[entity.slug]: accEntities[entity.slug] || curateEntity(entity, true),
}), {});
return entitiesBySlug;
}
async function fetchEntity(entityId, type) { async function fetchEntity(entityId, type) {
const entity = await knex('entities') const entity = await knex('entities')
.select(knex.raw(` .select(knex.raw(`
@ -290,8 +345,10 @@ module.exports = {
curateEntity, curateEntity,
curateEntities, curateEntities,
fetchIncludedEntities, fetchIncludedEntities,
fetchReleaseEntities,
fetchEntity, fetchEntity,
fetchEntities, fetchEntities,
searchEntities, searchEntities,
flushEntities, flushEntities,
urlToSiteSlug,
}; };

View File

@ -1,10 +0,0 @@
'use strict';
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
};

View File

@ -1,10 +0,0 @@
'use strict';
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
};

View File

@ -1,10 +0,0 @@
'use strict';
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
};

View File

@ -19,17 +19,9 @@ async function fetchSceneWrapper(url, site, baseRelease, options) {
return release; return release;
} }
function getActorReleasesUrl(actorPath, page = 1) {
return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`;
}
async function networkFetchProfile({ name: actorName }, context, include) {
return fetchProfile({ name: actorName }, context, null, getActorReleasesUrl, include);
}
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchProfile: networkFetchProfile, fetchProfile,
fetchUpcoming, fetchUpcoming,
fetchScene: fetchSceneWrapper, fetchScene: fetchSceneWrapper,
}; };

View File

@ -5,6 +5,7 @@ const util = require('util');
const { JSDOM } = require('jsdom'); const { JSDOM } = require('jsdom');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const moment = require('moment'); const moment = require('moment');
const format = require('template-format');
const logger = require('../logger')(__filename); const logger = require('../logger')(__filename);
const qu = require('../utils/qu'); const qu = require('../utils/qu');
@ -376,26 +377,34 @@ function scrapeActorSearch(html, url, actorName) {
return actorLink ? actorLink.href : null; return actorLink ? actorLink.href : null;
} }
async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, accReleases = []) { async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, accReleases = [], context) {
const { origin, pathname } = new URL(profileUrl); const { origin, pathname } = new URL(profileUrl);
const profilePath = `/${pathname.split('/').slice(-2).join('/')}`; const profilePath = `/${pathname.split('/').slice(-2).join('/')}`;
const url = getActorReleasesUrl(profilePath, page); const url = (context.parameters.actorScenes && format(context.parameters.actorScenes, { path: profilePath, page }))
|| getActorReleasesUrl?.(profilePath, page);
if (!url) {
return [];
}
const res = await qu.get(url); const res = await qu.get(url);
if (!res.ok) return []; if (!res.ok) {
return [];
}
const releases = scrapeAll(res.item.html, null, origin); const releases = scrapeAll(res.item.html, null, origin);
const nextPage = res.item.query.url('.Gamma_Paginator a.next'); const nextPage = res.item.query.url('.Gamma_Paginator a.next');
if (nextPage) { if (nextPage) {
return fetchActorReleases(profileUrl, getActorReleasesUrl, page + 1, accReleases.concat(releases)); return fetchActorReleases(profileUrl, getActorReleasesUrl, page + 1, accReleases.concat(releases), context);
} }
return accReleases.concat(releases); return accReleases.concat(releases);
} }
async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl, withReleases) { async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl, withReleases, context) {
const { query } = qu.extract(html); const { query } = qu.extract(html);
const avatar = query.el('img.actorPicture'); const avatar = query.el('img.actorPicture');
@ -429,8 +438,8 @@ async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUr
if (alias) profile.aliases = alias.split(':')[1].trim().split(', '); if (alias) profile.aliases = alias.split(':')[1].trim().split(', ');
if (nationality) profile.nationality = nationality.split(':')[1].trim(); if (nationality) profile.nationality = nationality.split(':')[1].trim();
if (getActorReleasesUrl && withReleases) { if ((getActorReleasesUrl || context.parameters.actorScenes) && withReleases) {
profile.releases = await fetchActorReleases(url, getActorReleasesUrl); profile.releases = await fetchActorReleases(url, getActorReleasesUrl, 1, [], context);
} }
return profile; return profile;
@ -661,7 +670,7 @@ async function fetchActorScenes(actorName, apiUrl, siteSlug) {
return []; return [];
} }
async function fetchProfile({ name: actorName }, context, altSearchUrl, getActorReleasesUrl, include) { async function fetchProfile({ name: actorName }, context, include, altSearchUrl, getActorReleasesUrl) {
const siteSlug = context.entity.slug || context.site?.slug || context.network?.slug; const siteSlug = context.entity.slug || context.site?.slug || context.network?.slug;
const actorSlug = actorName.toLowerCase().replace(/\s+/, '+'); const actorSlug = actorName.toLowerCase().replace(/\s+/, '+');
@ -684,7 +693,7 @@ async function fetchProfile({ name: actorName }, context, altSearchUrl, getActor
return null; return null;
} }
return scrapeProfile(actorRes.body.toString(), url, actorName, siteSlug, getActorReleasesUrl, include.scenes); return scrapeProfile(actorRes.body.toString(), url, actorName, siteSlug, getActorReleasesUrl, include.scenes, context);
} }
return null; return null;

View File

@ -67,6 +67,29 @@ async function fetchLatest(channel, page = 1) {
return res.status; return res.status;
} }
async function fetchUpcoming(channel) {
const { host } = new URL(channel.url);
const url = `https://tour.${host}`;
const res = await qu.get(url);
if (res.ok) {
if (res.item.query.exists('a[href*="stackpath.com"]')) {
throw new Error('URL blocked by StackPath');
}
const sceneItem = qu.init(res.item.el, '#upcoming-content');
if (sceneItem) {
return scrapeAll([sceneItem], channel);
}
return null;
}
return res.status;
}
async function fetchScene(url, channel) { async function fetchScene(url, channel) {
const cookieJar = http.cookieJar(); const cookieJar = http.cookieJar();
const session = http.session({ cookieJar }); const session = http.session({ cookieJar });
@ -122,6 +145,7 @@ async function fetchProfile({ name: actorName }, context , site) {
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchUpcoming,
// fetchProfile, // fetchProfile,
fetchScene, fetchScene,
}; };

View File

@ -3,14 +3,13 @@
const blake2 = require('blake2'); const blake2 = require('blake2');
const knex = require('../knex'); const knex = require('../knex');
const { ex, ctxa } = require('../utils/q'); const qu = require('../utils/qu');
const http = require('../utils/http');
async function getSiteSlugs() { async function getSiteSlugs() {
return knex('sites') return knex('entities')
.pluck('sites.slug') .pluck('entities.slug')
.join('networks', 'networks.id', 'sites.network_id') .join('entities AS parents', 'parents.id', 'entities.parent_id')
.where('networks.slug', 'perfectgonzo'); .where('parents.slug', 'perfectgonzo');
} }
function getHash(identifier) { function getHash(identifier) {
@ -39,8 +38,10 @@ function extractMaleModelsFromTags(tagContainer) {
return []; return [];
} }
async function extractChannelFromPhoto(photo, metaSiteSlugs) { async function extractChannelFromPhoto(photo, channel) {
const siteSlugs = metaSiteSlugs || await getSiteSlugs(); const siteSlugs = (channel.type === 'network' ? channel.children : channel.parent?.children)?.map(child => child.slug)
|| await getSiteSlugs();
const channelMatch = photo.match(new RegExp(siteSlugs.join('|'))); const channelMatch = photo.match(new RegExp(siteSlugs.join('|')));
if (channelMatch) { if (channelMatch) {
@ -50,66 +51,50 @@ async function extractChannelFromPhoto(photo, metaSiteSlugs) {
return null; return null;
} }
async function scrapeLatest(html, site) { async function scrapeLatest(scenes, site) {
const siteSlugs = await getSiteSlugs(); return scenes.map(({ query }) => {
const { element } = ex(html); const release = {};
return ctxa(element, '#content-main .itemm').map(({ release.title = query.q('a', 'title');
q, qa, qlength, qdate, qimages, release.url = query.url('a', 'href', { origin: site.url });
}) => { release.date = query.date('.nm-date', 'MM/DD/YYYY');
const release = {
site,
meta: {
siteSlugs,
},
};
const sceneLink = q('a');
release.title = sceneLink.title;
release.url = `${site.url}${sceneLink.href}`;
release.date = qdate('.nm-date', 'MM/DD/YYYY');
const slug = new URL(release.url).pathname.split('/')[2]; const slug = new URL(release.url).pathname.split('/')[2];
release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`); release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`);
release.actors = release.title.split('&').map(actor => actor.trim()); release.actors = release.title.split('&').map(actor => actor.trim());
[release.poster, ...release.photos] = qimages('.bloc-link img'); [release.poster, ...release.photos] = query.imgs('.bloc-link img');
release.tags = qa('.dropdown ul a', true).slice(1); release.tags = query.cnts('.dropdown ul a').slice(1);
release.duration = qlength('.dropdown p:first-child'); release.duration = query.duration('.dropdown p:first-child');
return release; return release;
}); });
} }
async function scrapeScene(html, site, url, metaSiteSlugs) { async function scrapeScene({ query }, site, url) {
const {
q, qa, qlength, qdate, qposter, qtrailer,
} = ex(html);
const release = { url, site }; const release = { url, site };
release.title = q('#movie-header h2', true); release.title = query.cnt('#movie-header h2');
release.date = qdate('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/); release.date = query.date('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
release.description = q('.container .mg-md', true); release.description = query.cnt('.container .mg-md');
release.duration = qlength('#video-ribbon .container > div > span:nth-child(3)'); release.duration = query.duration('#video-ribbon .container > div > span:nth-child(3)');
release.actors = qa('#video-info a', true).concat(extractMaleModelsFromTags(q('.tag-container'))); release.actors = query.cnts('#video-info a').concat(extractMaleModelsFromTags(query.q('.tag-container')));
release.tags = qa('.tag-container a', true); release.tags = query.cnts('.tag-container a');
const uhd = q('#video-ribbon .container > div > span:nth-child(2)', true); const uhd = query.cnt('#video-ribbon .container > div > span:nth-child(2)');
if (/4K/.test(uhd)) release.tags = release.tags.concat('4k'); if (/4K/.test(uhd)) release.tags = release.tags.concat('4k');
release.photos = qa('.bxslider_pics img').map(el => el.dataset.original || el.src); release.photos = query.all('.bxslider_pics img').map(el => el.dataset.original || el.src);
release.poster = qposter(); release.poster = query.poster();
const trailer = qtrailer(); const trailer = query.trailer();
if (trailer) release.trailer = { src: trailer }; if (trailer) release.trailer = { src: trailer };
if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], metaSiteSlugs); if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], site);
if (release.channel) { if (release.channel) {
const { pathname } = new URL(url); const { pathname } = new URL(url);
@ -124,23 +109,23 @@ async function scrapeScene(html, site, url, metaSiteSlugs) {
async function fetchLatest(site, page = 1) { async function fetchLatest(site, page = 1) {
const url = `${site.url}/movies/page-${page}`; const url = `${site.url}/movies/page-${page}`;
const res = await http.get(url); const res = await qu.getAll(url, '#content-main [class^="item"]');
if (res.statusCode === 200) { if (res.ok) {
return scrapeLatest(res.body.toString(), site); return scrapeLatest(res.items, site);
} }
return []; return res.status;
} }
async function fetchScene(url, site, release) { async function fetchScene(url, channel) {
const res = await http.get(url); const res = await qu.get(url);
if (res.statusCode === 200) { if (res.ok) {
return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs); return scrapeScene(res.item, channel, url);
} }
return []; return res.status;
} }
module.exports = { module.exports = {

View File

@ -47,7 +47,6 @@ const mikeadriano = require('./mikeadriano');
const milehighmedia = require('./milehighmedia'); const milehighmedia = require('./milehighmedia');
const mindgeek = require('./mindgeek'); const mindgeek = require('./mindgeek');
const mofos = require('./mofos'); const mofos = require('./mofos');
const naturals = require('./21naturals');
const naughtyamerica = require('./naughtyamerica'); const naughtyamerica = require('./naughtyamerica');
const newsensations = require('./newsensations'); const newsensations = require('./newsensations');
const nubiles = require('./nubiles'); const nubiles = require('./nubiles');
@ -62,8 +61,6 @@ const privateNetwork = require('./private'); // reserved keyword
const puretaboo = require('./puretaboo'); const puretaboo = require('./puretaboo');
const realitykings = require('./realitykings'); const realitykings = require('./realitykings');
const score = require('./score'); const score = require('./score');
const sextreme = require('./21sextreme');
const sextury = require('./21sextury');
const teamskeet = require('./teamskeet'); const teamskeet = require('./teamskeet');
const teencoreclub = require('./teencoreclub'); const teencoreclub = require('./teencoreclub');
const topwebmodels = require('./topwebmodels'); const topwebmodels = require('./topwebmodels');
@ -85,9 +82,6 @@ const freeones = require('./freeones');
const scrapers = { const scrapers = {
releases: { releases: {
'21naturals': naturals,
'21sextreme': sextreme,
'21sextury': sextury,
adulttime, adulttime,
amateurallure, amateurallure,
americanpornstar, americanpornstar,
@ -176,7 +170,7 @@ const scrapers = {
zerotolerance, zerotolerance,
}, },
actors: { actors: {
'21sextury': sextury, '21sextury': gamma,
allanal: mikeadriano, allanal: mikeadriano,
amateureuro: porndoe, amateureuro: porndoe,
americanpornstar, americanpornstar,

View File

@ -42,39 +42,6 @@ function getAvatarFallbacks(avatar) {
.flat(); .flat();
} }
/*
async function getTrailerLegacy(scene, site, url) {
const qualities = [360, 480, 720, 1080, 2160];
const tokenRes = await http.post(`${site.url}/api/__record_tknreq`, {
file: scene.previewVideoUrl1080P,
sizes: qualities.join('+'),
type: 'trailer',
}, {
headers: {
referer: url,
origin: site.url,
},
});
if (!tokenRes.ok) {
return null;
}
const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`;
const trailersRes = await http.post(trailerUrl, null, { headers: { referer: url } });
if (trailersRes.ok) {
return qualities.map(quality => (trailersRes.body[quality] ? {
src: trailersRes.body[quality].token,
quality,
} : null)).filter(Boolean);
}
return null;
}
*/
async function getTrailer(scene, channel, url) { async function getTrailer(scene, channel, url) {
const res = await http.post(`${channel.url}/graphql`, { const res = await http.post(`${channel.url}/graphql`, {
operationName: 'getToken', operationName: 'getToken',

View File

@ -205,6 +205,8 @@ async function scrapeChannelReleases(scraper, channelEntity, preData) {
} }
async function scrapeChannel(channelEntity, accNetworkReleases) { async function scrapeChannel(channelEntity, accNetworkReleases) {
console.log(channelEntity);
const scraper = scrapers.releases[channelEntity.slug] const scraper = scrapers.releases[channelEntity.slug]
|| scrapers.releases[channelEntity.parent?.slug] || scrapers.releases[channelEntity.parent?.slug]
|| scrapers.releases[channelEntity.parent?.parent?.slug]; || scrapers.releases[channelEntity.parent?.parent?.slug];