Removed stray slice from Gamma scraper.
This commit is contained in:
parent
24adf04fe3
commit
91981a6dd7
|
|
@ -833,7 +833,7 @@ async function scrapeActors(argNames) {
|
||||||
const entitySlugs = sources.flat();
|
const entitySlugs = sources.flat();
|
||||||
|
|
||||||
const [entitiesBySlug, existingActorEntries] = await Promise.all([
|
const [entitiesBySlug, existingActorEntries] = await Promise.all([
|
||||||
fetchEntitiesBySlug(entitySlugs, { types: ['channel', 'network', 'info'], prefer: argv.prefer }),
|
fetchEntitiesBySlug(entitySlugs, { types: ['channel', 'network', 'info'], prefer: argv.prefer || 'channel' }),
|
||||||
knex('actors')
|
knex('actors')
|
||||||
.select(knex.raw('actors.id, actors.name, actors.slug, actors.entry_id, actors.entity_id, row_to_json(entities) as entity'))
|
.select(knex.raw('actors.id, actors.name, actors.slug, actors.entry_id, actors.entity_id, row_to_json(entities) as entity'))
|
||||||
.whereIn('actors.slug', baseActors.map((baseActor) => baseActor.slug))
|
.whereIn('actors.slug', baseActors.map((baseActor) => baseActor.slug))
|
||||||
|
|
|
||||||
|
|
@ -218,7 +218,7 @@ function curateTitle(title, channel) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeApiReleases(json, site, options) {
|
async function scrapeApiReleases(json, site, options) {
|
||||||
return json.slice(0, 6).reduce((acc, scene) => {
|
return json.reduce((acc, scene) => {
|
||||||
if (options.parameters?.extract && scene.sitename !== options.parameters.extract) {
|
if (options.parameters?.extract && scene.sitename !== options.parameters.extract) {
|
||||||
return acc;
|
return acc;
|
||||||
}
|
}
|
||||||
|
|
@ -658,7 +658,7 @@ function scrapeApiProfile(data, releases, siteSlug) {
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatestApi(site, page = 1, options, preData, upcoming = false) {
|
async function fetchLatestApi(site, page = 1, options, _preData, upcoming = false) {
|
||||||
const referer = options.parameters?.referer || `${options.parameters?.networkReferer ? site.parent.url : site.url}/en/videos`;
|
const referer = options.parameters?.referer || `${options.parameters?.networkReferer ? site.parent.url : site.url}/en/videos`;
|
||||||
const { apiUrl } = await fetchApiCredentials(referer, site);
|
const { apiUrl } = await fetchApiCredentials(referer, site);
|
||||||
const slug = options.parameters.querySlug || site.slug;
|
const slug = options.parameters.querySlug || site.slug;
|
||||||
|
|
|
||||||
|
|
@ -136,7 +136,7 @@ async function fetchScene(url, channel) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchProfile({ name: actorName }, entity, include) {
|
async function fetchProfile({ name: actorName }, entity, include) {
|
||||||
const url = `${entity.url}/models/${slugify(actorName)}.en.html`;
|
const url = `${new URL(entity.url).origin}/models/${slugify(actorName)}.en.html`;
|
||||||
const res = await unprint.get(url);
|
const res = await unprint.get(url);
|
||||||
|
|
||||||
return res.ok ? scrapeProfile(res.context, url, include) : res.status;
|
return res.ok ? scrapeProfile(res.context, url, include) : res.status;
|
||||||
|
|
|
||||||
|
|
@ -277,19 +277,36 @@ const scrapers = {
|
||||||
interracialpovs: hush,
|
interracialpovs: hush,
|
||||||
povpornstars: hush,
|
povpornstars: hush,
|
||||||
seehimfuck: hush,
|
seehimfuck: hush,
|
||||||
|
// wankzvr
|
||||||
|
wankzvr,
|
||||||
|
tranzvr: wankzvr,
|
||||||
|
milfvr: wankzvr,
|
||||||
|
// nubilus
|
||||||
|
anilos: nubiles,
|
||||||
|
brattysis: nubiles,
|
||||||
|
deeplush: nubiles,
|
||||||
|
hotcrazymess: nubiles,
|
||||||
|
nfbusty: nubiles,
|
||||||
|
nubilefilms: nubiles,
|
||||||
|
nubiles,
|
||||||
|
thatsitcomshow: nubiles,
|
||||||
|
// porndoe
|
||||||
|
amateureuro: porndoe,
|
||||||
|
forbondage: porndoe,
|
||||||
|
mamacitaz: porndoe,
|
||||||
|
transbella: porndoe,
|
||||||
|
vipsexvault: porndoe,
|
||||||
// etc
|
// etc
|
||||||
'18vr': badoink,
|
'18vr': badoink,
|
||||||
theflourishxxx: theflourish,
|
theflourishxxx: theflourish,
|
||||||
adultempire,
|
adultempire,
|
||||||
archangel,
|
archangel,
|
||||||
allherluv: missax,
|
allherluv: missax,
|
||||||
amateureuro: porndoe,
|
|
||||||
americanpornstar,
|
americanpornstar,
|
||||||
analbbc: fullpornnetwork,
|
analbbc: fullpornnetwork,
|
||||||
analized: fullpornnetwork,
|
analized: fullpornnetwork,
|
||||||
analviolation: fullpornnetwork,
|
analviolation: fullpornnetwork,
|
||||||
angelogodshackoriginal,
|
angelogodshackoriginal,
|
||||||
anilos: nubiles,
|
|
||||||
asiam: modelmedia,
|
asiam: modelmedia,
|
||||||
aziani,
|
aziani,
|
||||||
'2poles1hole': aziani,
|
'2poles1hole': aziani,
|
||||||
|
|
@ -305,23 +322,19 @@ const scrapers = {
|
||||||
vurigvlaanderen: bluedonkeymedia,
|
vurigvlaanderen: bluedonkeymedia,
|
||||||
boobpedia,
|
boobpedia,
|
||||||
bradmontana,
|
bradmontana,
|
||||||
brattysis: nubiles,
|
|
||||||
cherrypimps,
|
cherrypimps,
|
||||||
cumlouder,
|
cumlouder,
|
||||||
deeplush: nubiles,
|
|
||||||
dorcelclub: dorcel,
|
dorcelclub: dorcel,
|
||||||
doubleviewcasting: firstanalquest,
|
doubleviewcasting: firstanalquest,
|
||||||
dtfsluts: fullpornnetwork,
|
dtfsluts: fullpornnetwork,
|
||||||
exploitedx, // only from known URL that will specify site
|
exploitedx, // only from known URL that will specify site
|
||||||
firstanalquest,
|
firstanalquest,
|
||||||
forbondage: porndoe,
|
|
||||||
freeones,
|
freeones,
|
||||||
girlfaction: fullpornnetwork,
|
girlfaction: fullpornnetwork,
|
||||||
hergape: fullpornnetwork,
|
hergape: fullpornnetwork,
|
||||||
hitzefrei,
|
hitzefrei,
|
||||||
homemadeanalwhores: fullpornnetwork,
|
homemadeanalwhores: fullpornnetwork,
|
||||||
hookuphotshot,
|
hookuphotshot,
|
||||||
hotcrazymess: nubiles,
|
|
||||||
inthecrack,
|
inthecrack,
|
||||||
jamesdeen: fullpornnetwork,
|
jamesdeen: fullpornnetwork,
|
||||||
jerkaoke: modelmedia,
|
jerkaoke: modelmedia,
|
||||||
|
|
@ -339,18 +352,13 @@ const scrapers = {
|
||||||
// analvids,
|
// analvids,
|
||||||
analvids: pornbox,
|
analvids: pornbox,
|
||||||
littlecapricedreams,
|
littlecapricedreams,
|
||||||
mamacitaz: porndoe,
|
|
||||||
mariskax,
|
mariskax,
|
||||||
milfvr: wankzvr,
|
|
||||||
missax,
|
missax,
|
||||||
mylf: teamskeet,
|
mylf: teamskeet,
|
||||||
mugfucked: fullpornnetwork,
|
mugfucked: fullpornnetwork,
|
||||||
naughtyamerica,
|
naughtyamerica,
|
||||||
tonightsgirlfriend: naughtyamerica,
|
tonightsgirlfriend: naughtyamerica,
|
||||||
nebraskacoeds: elevatedx,
|
nebraskacoeds: elevatedx,
|
||||||
nfbusty: nubiles,
|
|
||||||
nubilefilms: nubiles,
|
|
||||||
nubiles,
|
|
||||||
onlyprince: fullpornnetwork,
|
onlyprince: fullpornnetwork,
|
||||||
pascalssubsluts,
|
pascalssubsluts,
|
||||||
pervcity,
|
pervcity,
|
||||||
|
|
@ -383,11 +391,7 @@ const scrapers = {
|
||||||
teencoreclub,
|
teencoreclub,
|
||||||
teenmegaworld,
|
teenmegaworld,
|
||||||
testedefudelidade,
|
testedefudelidade,
|
||||||
thatsitcomshow: nubiles,
|
|
||||||
tokyohot,
|
tokyohot,
|
||||||
transbella: porndoe,
|
|
||||||
tranzvr: wankzvr,
|
|
||||||
vipsexvault: porndoe,
|
|
||||||
virtualtaboo,
|
virtualtaboo,
|
||||||
darkroomvr: virtualtaboo,
|
darkroomvr: virtualtaboo,
|
||||||
onlytarts: virtualtaboo,
|
onlytarts: virtualtaboo,
|
||||||
|
|
@ -403,7 +407,6 @@ const scrapers = {
|
||||||
slayed: vixen,
|
slayed: vixen,
|
||||||
wifey: vixen,
|
wifey: vixen,
|
||||||
vrcosplayx: badoink,
|
vrcosplayx: badoink,
|
||||||
wankzvr,
|
|
||||||
wildoncam: cherrypimps,
|
wildoncam: cherrypimps,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -105,9 +105,9 @@ function scrapeScene({ query, window }, { url }) {
|
||||||
function scrapeProfile({ query }) {
|
function scrapeProfile({ query }) {
|
||||||
const profile = {};
|
const profile = {};
|
||||||
|
|
||||||
const bioKeys = query.contents('.pornstar-detail__params--top strong, .actor-detail__param-name');
|
const bioKeys = query.contents('.pornstar-detail__params--top strong, .actor-detail__param-name, td.pornstar-detail__info--title');
|
||||||
const bioValues = query.exists('.actor-detail__param-value')
|
const bioValues = query.exists('.actor-detail__param-value, .pornstar-detail__info--title')
|
||||||
? query.contents('.actor-detail__param-value')
|
? query.contents('.actor-detail__param-value, .pornstar-detail__info--title + td')
|
||||||
: query.text('.pornstar-detail__params--top', { join: false })?.map((text) => text.split('•')[0].replace(':', '').trim());
|
: query.text('.pornstar-detail__params--top', { join: false })?.map((text) => text.split('•')[0].replace(':', '').trim());
|
||||||
|
|
||||||
const bio = Object.fromEntries(bioKeys.map((key, index) => [slugify(key, '_'), bioValues[index]]));
|
const bio = Object.fromEntries(bioKeys.map((key, index) => [slugify(key, '_'), bioValues[index]]));
|
||||||
|
|
|
||||||
|
|
@ -133,7 +133,7 @@ async function scrapeProfile({ query }, url, entity, options) {
|
||||||
profile.description = query.cnt('.person__content');
|
profile.description = query.cnt('.person__content');
|
||||||
|
|
||||||
profile.gender = entity.slug === 'tranzvr' ? 'transsexual' : 'female';
|
profile.gender = entity.slug === 'tranzvr' ? 'transsexual' : 'female';
|
||||||
profile.age = bio.age;
|
profile.age = Number(bio.age) || null;
|
||||||
|
|
||||||
profile.birthPlace = bio.birthplace;
|
profile.birthPlace = bio.birthplace;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -124,6 +124,27 @@ const actors = [
|
||||||
{ entity: 'archangel', name: 'Summer Brielle', fields: ['avatar', 'description', 'dateOfBirth', 'age', 'measurements', 'height', 'aliases'] },
|
{ entity: 'archangel', name: 'Summer Brielle', fields: ['avatar', 'description', 'dateOfBirth', 'age', 'measurements', 'height', 'aliases'] },
|
||||||
{ entity: 'theflourishxxx', name: 'XWifeKaren', fields: ['avatar', 'description'] },
|
{ entity: 'theflourishxxx', name: 'XWifeKaren', fields: ['avatar', 'description'] },
|
||||||
{ entity: 'hookuphotshot', name: 'Kenzie Reeves', fields: ['avatar', 'description'] },
|
{ entity: 'hookuphotshot', name: 'Kenzie Reeves', fields: ['avatar', 'description'] },
|
||||||
|
{ entity: 'tokyohot', name: 'Mai Kawana', url: 'https://my.tokyo-hot.com/cast/2099/', fields: ['avatar', 'birthPlace', 'height', 'cup', 'bust', 'waist', 'hip', 'hairStyle', 'shoeSize', 'bloodType'] },
|
||||||
|
{ entity: 'teenmegaworld', name: 'Sheri Vi', fields: ['avatar', 'description', 'hairColor', 'eyes'] },
|
||||||
|
// wankz
|
||||||
|
{ entity: 'wankzvr', name: 'Melody Marks', fields: ['avatar', 'gender', 'description', 'birthPlace', 'height', 'measurements', 'age'] },
|
||||||
|
{ entity: 'milfvr', name: 'Ember Snow', fields: ['avatar', 'gender', 'description', 'measurements', 'birthPlace', 'height', 'age'] },
|
||||||
|
{ entity: 'tranzvr', name: 'Thayssa Fadinha', fields: ['avatar', 'gender', 'description', 'birthPlace', 'height', 'age'] },
|
||||||
|
// virtual taboo
|
||||||
|
{ entity: 'virtualtaboo', name: 'Kama Oxi', fields: ['avatar', 'birthPlace', 'dateOfBirth', 'naturalBoobs', 'hasTattoos'] },
|
||||||
|
{ entity: 'onlytarts', name: 'Kama Oxi', fields: ['avatar', 'birthPlace', 'dateOfBirth', 'measurements', 'height', 'weight'] },
|
||||||
|
{ entity: 'oopsfamily', name: 'Angel Windell', fields: ['avatar', 'birthPlace', 'dateOfBirth', 'measurements', 'height', 'weight'] },
|
||||||
|
{ entity: 'darkroomvr', name: 'Alexa Flexy', fields: ['avatar', 'birthPlace', 'dateOfBirth', 'measurements', 'height', 'weight'] },
|
||||||
|
// nubiles
|
||||||
|
{ entity: 'nubiles', name: 'Lolli Pop', fields: ['avatar', 'age', 'description', 'residencePlace', 'height', 'measurements', 'photos'] },
|
||||||
|
{ entity: 'anilos', name: 'Rina Helen', fields: ['avatar', 'age', 'description', 'residencePlace', 'height', 'measurements', 'photos'] },
|
||||||
|
{ entity: 'deeplush', name: 'Penelope Woods', fields: ['avatar', 'age', 'residencePlace', 'height', 'measurements', 'photos'] },
|
||||||
|
{ entity: 'hotcrazymess', name: 'Eliza Ibarra', fields: ['avatar', 'age', 'residencePlace', 'height', 'measurements', 'photos'] },
|
||||||
|
{ entity: 'nfbusty', name: 'Ella Reese', fields: ['avatar', 'age', 'residencePlace', 'height', 'measurements', 'photos'] },
|
||||||
|
{ entity: 'nubilefilms', name: 'Jade Kimiko', fields: ['avatar', 'age', 'residencePlace', 'height', 'measurements', 'photos'] },
|
||||||
|
{ entity: 'thatsitcomshow', name: 'Casey Calvert', fields: ['avatar', 'age', 'residencePlace', 'height', 'measurements', 'photos'] },
|
||||||
|
// porndoe
|
||||||
|
{ entity: 'vipsexvault', name: 'Amirah Adara', fields: ['avatar', 'nationality', 'placeOfBirth', 'age', 'naturalBoobs', 'hairColor', 'description'] },
|
||||||
];
|
];
|
||||||
|
|
||||||
const actorScrapers = scrapers.actors;
|
const actorScrapers = scrapers.actors;
|
||||||
|
|
@ -158,7 +179,7 @@ const validators = {
|
||||||
birthCountry: (value) => typeof value === 'string' && value.length > 1,
|
birthCountry: (value) => typeof value === 'string' && value.length > 1,
|
||||||
nationality: (value) => typeof value === 'string' && value.length > 3,
|
nationality: (value) => typeof value === 'string' && value.length > 3,
|
||||||
// height: (value) => !!Number(value) || /\d'\d{1,2}"/.test(value), // ft in needs to be converted
|
// height: (value) => !!Number(value) || /\d'\d{1,2}"/.test(value), // ft in needs to be converted
|
||||||
height: (value) => !!Number(value) && value > 150,
|
height: (value) => !!Number(value) && value > 130,
|
||||||
weight: (value) => !!Number(value) && value > 40,
|
weight: (value) => !!Number(value) && value > 40,
|
||||||
eyes: (value) => typeof value === 'string' && value.length > 3,
|
eyes: (value) => typeof value === 'string' && value.length > 3,
|
||||||
hairColor: (value) => typeof value === 'string' && value.length > 3,
|
hairColor: (value) => typeof value === 'string' && value.length > 3,
|
||||||
|
|
@ -188,7 +209,7 @@ const validators = {
|
||||||
|
|
||||||
// profiler in this context is shorthand for profile scraper
|
// profiler in this context is shorthand for profile scraper
|
||||||
async function init() {
|
async function init() {
|
||||||
const entitiesBySlug = await fetchEntitiesBySlug(Object.keys(actorScrapers), { types: ['channel', 'network', 'info'], prefer: argv.prefer });
|
const entitiesBySlug = await fetchEntitiesBySlug(Object.keys(actorScrapers), { types: ['channel', 'network', 'info'], prefer: 'channel' });
|
||||||
|
|
||||||
Object.entries(actorScrapers).reduce(async (chain, [entitySlug, scraper]) => {
|
Object.entries(actorScrapers).reduce(async (chain, [entitySlug, scraper]) => {
|
||||||
await chain;
|
await chain;
|
||||||
|
|
@ -218,6 +239,7 @@ async function init() {
|
||||||
const profile = await fetchProfile({
|
const profile = await fetchProfile({
|
||||||
name: actor.name,
|
name: actor.name,
|
||||||
slug: slugify(actor.name),
|
slug: slugify(actor.name),
|
||||||
|
url: actor.url,
|
||||||
}, {
|
}, {
|
||||||
...entity,
|
...entity,
|
||||||
entity,
|
entity,
|
||||||
|
|
@ -226,13 +248,13 @@ async function init() {
|
||||||
parameters: getRecursiveParameters(entity),
|
parameters: getRecursiveParameters(entity),
|
||||||
}, include);
|
}, include);
|
||||||
|
|
||||||
console.log(profile);
|
|
||||||
console.log('Untested fields', Object.entries(profile).filter(([field, value]) => !actor.fields.includes(field) && typeof value !== 'undefined' && value !== null).map(([field]) => `'${field}'`).join(', '));
|
|
||||||
|
|
||||||
if (!profile) {
|
if (!profile) {
|
||||||
assert.fail('profile not found');
|
assert.fail('profile not found');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
console.log(profile);
|
||||||
|
console.log('Untested fields', Object.entries(profile).filter(([field, value]) => !actor.fields.includes(field) && typeof value !== 'undefined' && value !== null).map(([field]) => `'${field}'`).join(', '));
|
||||||
|
|
||||||
await Promise.all(actor.fields.map(async (field) => {
|
await Promise.all(actor.fields.map(async (field) => {
|
||||||
assert.ok(
|
assert.ok(
|
||||||
validators[field]
|
validators[field]
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue