Changed qu's HTML element detection. Passing base actor instead of actorName to profile scrapers.

This commit is contained in:
DebaucheryLibrarian 2020-07-21 01:16:26 +02:00
parent 0e4c0d8fff
commit 939eba8e61
9 changed files with 84 additions and 16 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 741 KiB

After

Width:  |  Height:  |  Size: 356 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.5 KiB

After

Width:  |  Height:  |  Size: 7.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

After

Width:  |  Height:  |  Size: 32 KiB

View File

@ -719,7 +719,7 @@ const tagPhotos = [
['latina', 2, 'Alexis Love for Penthouse'], ['latina', 2, 'Alexis Love for Penthouse'],
['mff', 0, 'Madison Ivy, Adriana Chechik and Keiran Lee in "Day With A Pornstar" for Brazzers'], ['mff', 0, 'Madison Ivy, Adriana Chechik and Keiran Lee in "Day With A Pornstar" for Brazzers'],
['mfm', 6, 'Honey Gold in "Slut Puppies 12" for Jules Jordan'], ['mfm', 6, 'Honey Gold in "Slut Puppies 12" for Jules Jordan'],
['natural-boobs', 0, 'Autumn Falls in "Manuel Ferrara\'s Ripe 7" for Jules Jordan'], ['natural-boobs', 0, 'Valentina Nappi in "Hypnotic Curves" for LesbianX'],
['oil', 1, 'Kissa Sins in "Oil Overload 14" for JulesJordan'], ['oil', 1, 'Kissa Sins in "Oil Overload 14" for JulesJordan'],
['oil', 3, 'Vina Sky for Lubed'], ['oil', 3, 'Vina Sky for Lubed'],
['oil', 0, 'Jada Stevens in "Jada Stevens Anal Ass Oiled Up For James Deen\'s Cock" for Jules Jordan'], ['oil', 0, 'Jada Stevens in "Jada Stevens Anal Ass Oiled Up For James Deen\'s Cock" for Jules Jordan'],

View File

@ -114,12 +114,15 @@ function getAverage(items) {
function toBaseActors(actorsOrNames, release) { function toBaseActors(actorsOrNames, release) {
return actorsOrNames.map((actorOrName) => { return actorsOrNames.map((actorOrName) => {
const name = capitalize(actorOrName.name || actorOrName); const [baseName, entryId] = (actorOrName.name || actorOrName).split(':');
const name = capitalize(baseName);
const slug = slugify(name); const slug = slugify(name);
const baseActor = { const baseActor = {
name, name,
slug, slug,
entryId: entryId || null,
entity: release?.site?.network || release?.entity?.parent || release?.entity || null, entity: release?.site?.network || release?.entity?.parent || release?.entity || null,
}; };
@ -213,6 +216,7 @@ function curateActorEntry(baseActor, batchId) {
name: baseActor.name, name: baseActor.name,
slug: baseActor.slug, slug: baseActor.slug,
entity_id: null, entity_id: null,
entry_id: baseActor.entry_id,
batch_id: batchId, batch_id: batchId,
}; };
} }
@ -538,7 +542,7 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
logger.verbose(`Searching profile for '${actor.name}' on '${label}'`); logger.verbose(`Searching profile for '${actor.name}' on '${label}'`);
const profile = await scraper.fetchProfile(actor.name, context, include); const profile = await scraper.fetchProfile(actor, context, include);
if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure
logger.verbose(`Profile for '${actor.name}' not available on ${label}, scraper returned ${profile}`); logger.verbose(`Profile for '${actor.name}' not available on ${label}, scraper returned ${profile}`);
@ -587,7 +591,7 @@ async function scrapeActors(actorNames) {
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id') .leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
.orderBy('entities.type'), .orderBy('entities.type'),
knex('actors') knex('actors')
.select(['id', 'name', 'slug']) .select(['id', 'name', 'slug', 'entry_id'])
.modify((queryBuilder) => { .modify((queryBuilder) => {
if (actorNames.length > 0) { if (actorNames.length > 0) {
queryBuilder.whereIn('slug', baseActors.map(baseActor => baseActor.slug)); queryBuilder.whereIn('slug', baseActors.map(baseActor => baseActor.slug));
@ -598,12 +602,22 @@ async function scrapeActors(actorNames) {
const entitiesBySlug = entities.reduce((acc, entity) => ({ ...acc, [entity.slug]: entity }), {}); const entitiesBySlug = entities.reduce((acc, entity) => ({ ...acc, [entity.slug]: entity }), {});
const existingActorEntriesBySlug = existingActorEntries.reduce((acc, actorEntry) => ({ ...acc, [actorEntry.slug]: actorEntry }), {}); const existingActorEntriesBySlugAndEntryId = existingActorEntries.reduce((acc, actorEntry) => ({
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlug[baseActor.slug]); ...acc,
[actorEntry.slug]: {
...acc[actorEntry.slug],
[actorEntry.entryId || null]: actorEntry,
},
}), {});
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlugAndEntryId[baseActor.slug][baseActor.entryId]);
const [batchId] = newBaseActors.length > 0 ? await knex('batches').insert({ comment: null }).returning('id') : [null]; const [batchId] = newBaseActors.length > 0 ? await knex('batches').insert({ comment: null }).returning('id') : [null];
const curatedActorEntries = batchId && curateActorEntries(newBaseActors, batchId); const curatedActorEntries = batchId && curateActorEntries(newBaseActors, batchId);
const newActorEntries = batchId && await knex('actors').insert(curatedActorEntries).returning(['id', 'name', 'slug']);
const newActorEntries = batchId && await knex('actors')
.insert(curatedActorEntries)
.returning(['id', 'name', 'slug', 'entry_id']);
const actors = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []); const actors = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []);

View File

@ -69,10 +69,12 @@ async function scrapeScene({ query, html }, url, _site) {
], ],
}), {}); }), {});
release.actors = query.all('.related-model a').map(actorEl => ({ release.actors = query.all('.related-model a').map((actorEl) => {
name: query.q(actorEl, null, 'title'), const name = query.q(actorEl, null, 'title');
avatar: actorImagesByActorId[query.url(actorEl, null).match(/\/view\/id\/(\d+)/)?.[1]], const avatar = actorImagesByActorId[query.url(actorEl, null).match(/\/view\/id\/(\d+)/)?.[1]];
}));
return { name, avatar };
});
release.likes = query.number('.label-rating .like'); release.likes = query.number('.label-rating .like');
release.dislikes = query.number('.label-rating .dislike'); release.dislikes = query.number('.label-rating .dislike');

View File

@ -1,8 +1,8 @@
'use strict'; 'use strict';
const qu = require('../utils/qu'); const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
// TODO: profile scraping const { feetInchesToCm } = require('../utils/convert');
function scrapeLatestBlog(scenes, channel) { function scrapeLatestBlog(scenes, channel) {
return scenes.map(({ query }) => { return scenes.map(({ query }) => {
@ -106,6 +106,33 @@ function scrapeScene({ query, html }, url, channel) {
return release; return release;
} }
function scrapeProfile({ query }, entity) {
const profile = {};
const bio = query.cnts('.info p').reduce((acc, info) => {
const [key, value] = info.match(/(\w+):\s*(.*)/).slice(1);
return { ...acc, [slugify(key, '_')]: value };
}, {});
profile.age = Number(bio.age);
profile.height = feetInchesToCm(bio.height);
profile.eyes = bio.eyes || bio.eyecolor;
if (bio.figure || bio.measurements) {
const [bust, cup, waist, hip] = (bio.figure || bio.measurements)?.match(/(\d+)(\w+)-(\d+)-(\d+)/).slice(1);
profile.bust = Number(bust);
profile.cup = cup;
profile.waist = Number(waist);
profile.hip = Number(hip);
}
profile.avatar = query.img('img.main-preview', 'src', { origin: entity.url });
return profile;
}
async function fetchLatestBlog(channel, page) { async function fetchLatestBlog(channel, page) {
const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`; const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`;
const res = await qu.getAll(url, '.videos'); const res = await qu.getAll(url, '.videos');
@ -148,8 +175,32 @@ async function fetchScene(url, channel) {
return res.status; return res.status;
} }
async function fetchProfile(baseActor, entity) {
const modelsRes = await qu.getAll(`${entity.url}/free/girls.php?alpha=${baseActor.name.slice(0, 1)}`, '.model');
console.log(baseActor);
if (modelsRes.ok) {
const models = modelsRes.items.filter(({ query }) => query.cnt('strong') === baseActor.name);
return Promise.all(models.map(async (model) => {
const modelUrl = model.query.url('a', 'href', { origin: entity.url });
const modelRes = await qu.get(modelUrl);
if (modelRes.ok) {
return scrapeProfile(modelRes.item, entity);
}
return modelRes.status;
}));
}
return modelsRes.status;
}
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchScene, fetchScene,
fetchUpcoming, fetchUpcoming,
fetchProfile,
}; };

View File

@ -176,6 +176,7 @@ module.exports = {
evilangel, evilangel,
eyeontheguy: hush, eyeontheguy: hush,
fakehub, fakehub,
exploitedcollegegirls: fcuk,
forbondage: porndoe, forbondage: porndoe,
freeones, freeones,
gangbangcreampie: aziani, gangbangcreampie: aziani,

View File

@ -326,9 +326,9 @@ function init(element, window) {
const quContextFuncs = Object.entries(quFuncs) // dynamically attach methods with context const quContextFuncs = Object.entries(quFuncs) // dynamically attach methods with context
.reduce((acc, [key, func]) => ({ .reduce((acc, [key, func]) => ({
...acc, ...acc,
[key]: (...args) => (args[0] instanceof globalWindow.HTMLElement // allow for different context [key]: (...args) => (args[0].nodeType === undefined // allow for different context
? func(...args) ? func(element, ...args)
: func(element, ...args)), : func(...args)),
}), {}); }), {});
return { return {