Changed qu's HTML element detection. Passing base actor instead of actorName to profile scrapers.

This commit is contained in:
DebaucheryLibrarian 2020-07-21 01:16:26 +02:00
parent 0e4c0d8fff
commit 939eba8e61
9 changed files with 84 additions and 16 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 741 KiB

After

Width:  |  Height:  |  Size: 356 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.5 KiB

After

Width:  |  Height:  |  Size: 7.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

After

Width:  |  Height:  |  Size: 32 KiB

View File

@ -719,7 +719,7 @@ const tagPhotos = [
['latina', 2, 'Alexis Love for Penthouse'],
['mff', 0, 'Madison Ivy, Adriana Chechik and Keiran Lee in "Day With A Pornstar" for Brazzers'],
['mfm', 6, 'Honey Gold in "Slut Puppies 12" for Jules Jordan'],
['natural-boobs', 0, 'Autumn Falls in "Manuel Ferrara\'s Ripe 7" for Jules Jordan'],
['natural-boobs', 0, 'Valentina Nappi in "Hypnotic Curves" for LesbianX'],
['oil', 1, 'Kissa Sins in "Oil Overload 14" for JulesJordan'],
['oil', 3, 'Vina Sky for Lubed'],
['oil', 0, 'Jada Stevens in "Jada Stevens Anal Ass Oiled Up For James Deen\'s Cock" for Jules Jordan'],

View File

@ -114,12 +114,15 @@ function getAverage(items) {
function toBaseActors(actorsOrNames, release) {
return actorsOrNames.map((actorOrName) => {
const name = capitalize(actorOrName.name || actorOrName);
const [baseName, entryId] = (actorOrName.name || actorOrName).split(':');
const name = capitalize(baseName);
const slug = slugify(name);
const baseActor = {
name,
slug,
entryId: entryId || null,
entity: release?.site?.network || release?.entity?.parent || release?.entity || null,
};
@ -213,6 +216,7 @@ function curateActorEntry(baseActor, batchId) {
name: baseActor.name,
slug: baseActor.slug,
entity_id: null,
entry_id: baseActor.entry_id,
batch_id: batchId,
};
}
@ -538,7 +542,7 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
logger.verbose(`Searching profile for '${actor.name}' on '${label}'`);
const profile = await scraper.fetchProfile(actor.name, context, include);
const profile = await scraper.fetchProfile(actor, context, include);
if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure
logger.verbose(`Profile for '${actor.name}' not available on ${label}, scraper returned ${profile}`);
@ -587,7 +591,7 @@ async function scrapeActors(actorNames) {
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
.orderBy('entities.type'),
knex('actors')
.select(['id', 'name', 'slug'])
.select(['id', 'name', 'slug', 'entry_id'])
.modify((queryBuilder) => {
if (actorNames.length > 0) {
queryBuilder.whereIn('slug', baseActors.map(baseActor => baseActor.slug));
@ -598,12 +602,22 @@ async function scrapeActors(actorNames) {
const entitiesBySlug = entities.reduce((acc, entity) => ({ ...acc, [entity.slug]: entity }), {});
const existingActorEntriesBySlug = existingActorEntries.reduce((acc, actorEntry) => ({ ...acc, [actorEntry.slug]: actorEntry }), {});
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlug[baseActor.slug]);
const existingActorEntriesBySlugAndEntryId = existingActorEntries.reduce((acc, actorEntry) => ({
...acc,
[actorEntry.slug]: {
...acc[actorEntry.slug],
[actorEntry.entryId || null]: actorEntry,
},
}), {});
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlugAndEntryId[baseActor.slug][baseActor.entryId]);
const [batchId] = newBaseActors.length > 0 ? await knex('batches').insert({ comment: null }).returning('id') : [null];
const curatedActorEntries = batchId && curateActorEntries(newBaseActors, batchId);
const newActorEntries = batchId && await knex('actors').insert(curatedActorEntries).returning(['id', 'name', 'slug']);
const newActorEntries = batchId && await knex('actors')
.insert(curatedActorEntries)
.returning(['id', 'name', 'slug', 'entry_id']);
const actors = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []);

View File

@ -69,10 +69,12 @@ async function scrapeScene({ query, html }, url, _site) {
],
}), {});
release.actors = query.all('.related-model a').map(actorEl => ({
name: query.q(actorEl, null, 'title'),
avatar: actorImagesByActorId[query.url(actorEl, null).match(/\/view\/id\/(\d+)/)?.[1]],
}));
release.actors = query.all('.related-model a').map((actorEl) => {
const name = query.q(actorEl, null, 'title');
const avatar = actorImagesByActorId[query.url(actorEl, null).match(/\/view\/id\/(\d+)/)?.[1]];
return { name, avatar };
});
release.likes = query.number('.label-rating .like');
release.dislikes = query.number('.label-rating .dislike');

View File

@ -1,8 +1,8 @@
'use strict';
const qu = require('../utils/qu');
// TODO: profile scraping
const slugify = require('../utils/slugify');
const { feetInchesToCm } = require('../utils/convert');
function scrapeLatestBlog(scenes, channel) {
return scenes.map(({ query }) => {
@ -106,6 +106,33 @@ function scrapeScene({ query, html }, url, channel) {
return release;
}
function scrapeProfile({ query }, entity) {
const profile = {};
const bio = query.cnts('.info p').reduce((acc, info) => {
const [key, value] = info.match(/(\w+):\s*(.*)/).slice(1);
return { ...acc, [slugify(key, '_')]: value };
}, {});
profile.age = Number(bio.age);
profile.height = feetInchesToCm(bio.height);
profile.eyes = bio.eyes || bio.eyecolor;
if (bio.figure || bio.measurements) {
const [bust, cup, waist, hip] = (bio.figure || bio.measurements)?.match(/(\d+)(\w+)-(\d+)-(\d+)/).slice(1);
profile.bust = Number(bust);
profile.cup = cup;
profile.waist = Number(waist);
profile.hip = Number(hip);
}
profile.avatar = query.img('img.main-preview', 'src', { origin: entity.url });
return profile;
}
async function fetchLatestBlog(channel, page) {
const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`;
const res = await qu.getAll(url, '.videos');
@ -148,8 +175,32 @@ async function fetchScene(url, channel) {
return res.status;
}
async function fetchProfile(baseActor, entity) {
const modelsRes = await qu.getAll(`${entity.url}/free/girls.php?alpha=${baseActor.name.slice(0, 1)}`, '.model');
console.log(baseActor);
if (modelsRes.ok) {
const models = modelsRes.items.filter(({ query }) => query.cnt('strong') === baseActor.name);
return Promise.all(models.map(async (model) => {
const modelUrl = model.query.url('a', 'href', { origin: entity.url });
const modelRes = await qu.get(modelUrl);
if (modelRes.ok) {
return scrapeProfile(modelRes.item, entity);
}
return modelRes.status;
}));
}
return modelsRes.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchUpcoming,
fetchProfile,
};

View File

@ -176,6 +176,7 @@ module.exports = {
evilangel,
eyeontheguy: hush,
fakehub,
exploitedcollegegirls: fcuk,
forbondage: porndoe,
freeones,
gangbangcreampie: aziani,

View File

@ -326,9 +326,9 @@ function init(element, window) {
const quContextFuncs = Object.entries(quFuncs) // dynamically attach methods with context
.reduce((acc, [key, func]) => ({
...acc,
[key]: (...args) => (args[0] instanceof globalWindow.HTMLElement // allow for different context
? func(...args)
: func(element, ...args)),
[key]: (...args) => (args[0].nodeType === undefined // allow for different context
? func(element, ...args)
: func(...args)),
}), {});
return {