Changed qu's HTML element detection. Passing base actor instead of actorName to profile scrapers.
This commit is contained in:
parent
0e4c0d8fff
commit
939eba8e61
Binary file not shown.
Before Width: | Height: | Size: 741 KiB After Width: | Height: | Size: 356 KiB |
Binary file not shown.
Before Width: | Height: | Size: 7.5 KiB After Width: | Height: | Size: 7.9 KiB |
Binary file not shown.
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 32 KiB |
|
@ -719,7 +719,7 @@ const tagPhotos = [
|
|||
['latina', 2, 'Alexis Love for Penthouse'],
|
||||
['mff', 0, 'Madison Ivy, Adriana Chechik and Keiran Lee in "Day With A Pornstar" for Brazzers'],
|
||||
['mfm', 6, 'Honey Gold in "Slut Puppies 12" for Jules Jordan'],
|
||||
['natural-boobs', 0, 'Autumn Falls in "Manuel Ferrara\'s Ripe 7" for Jules Jordan'],
|
||||
['natural-boobs', 0, 'Valentina Nappi in "Hypnotic Curves" for LesbianX'],
|
||||
['oil', 1, 'Kissa Sins in "Oil Overload 14" for JulesJordan'],
|
||||
['oil', 3, 'Vina Sky for Lubed'],
|
||||
['oil', 0, 'Jada Stevens in "Jada Stevens Anal Ass Oiled Up For James Deen\'s Cock" for Jules Jordan'],
|
||||
|
|
|
@ -114,12 +114,15 @@ function getAverage(items) {
|
|||
|
||||
function toBaseActors(actorsOrNames, release) {
|
||||
return actorsOrNames.map((actorOrName) => {
|
||||
const name = capitalize(actorOrName.name || actorOrName);
|
||||
const [baseName, entryId] = (actorOrName.name || actorOrName).split(':');
|
||||
|
||||
const name = capitalize(baseName);
|
||||
const slug = slugify(name);
|
||||
|
||||
const baseActor = {
|
||||
name,
|
||||
slug,
|
||||
entryId: entryId || null,
|
||||
entity: release?.site?.network || release?.entity?.parent || release?.entity || null,
|
||||
};
|
||||
|
||||
|
@ -213,6 +216,7 @@ function curateActorEntry(baseActor, batchId) {
|
|||
name: baseActor.name,
|
||||
slug: baseActor.slug,
|
||||
entity_id: null,
|
||||
entry_id: baseActor.entry_id,
|
||||
batch_id: batchId,
|
||||
};
|
||||
}
|
||||
|
@ -538,7 +542,7 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
|
|||
|
||||
logger.verbose(`Searching profile for '${actor.name}' on '${label}'`);
|
||||
|
||||
const profile = await scraper.fetchProfile(actor.name, context, include);
|
||||
const profile = await scraper.fetchProfile(actor, context, include);
|
||||
|
||||
if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure
|
||||
logger.verbose(`Profile for '${actor.name}' not available on ${label}, scraper returned ${profile}`);
|
||||
|
@ -587,7 +591,7 @@ async function scrapeActors(actorNames) {
|
|||
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
|
||||
.orderBy('entities.type'),
|
||||
knex('actors')
|
||||
.select(['id', 'name', 'slug'])
|
||||
.select(['id', 'name', 'slug', 'entry_id'])
|
||||
.modify((queryBuilder) => {
|
||||
if (actorNames.length > 0) {
|
||||
queryBuilder.whereIn('slug', baseActors.map(baseActor => baseActor.slug));
|
||||
|
@ -598,12 +602,22 @@ async function scrapeActors(actorNames) {
|
|||
|
||||
const entitiesBySlug = entities.reduce((acc, entity) => ({ ...acc, [entity.slug]: entity }), {});
|
||||
|
||||
const existingActorEntriesBySlug = existingActorEntries.reduce((acc, actorEntry) => ({ ...acc, [actorEntry.slug]: actorEntry }), {});
|
||||
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlug[baseActor.slug]);
|
||||
const existingActorEntriesBySlugAndEntryId = existingActorEntries.reduce((acc, actorEntry) => ({
|
||||
...acc,
|
||||
[actorEntry.slug]: {
|
||||
...acc[actorEntry.slug],
|
||||
[actorEntry.entryId || null]: actorEntry,
|
||||
},
|
||||
}), {});
|
||||
|
||||
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlugAndEntryId[baseActor.slug][baseActor.entryId]);
|
||||
|
||||
const [batchId] = newBaseActors.length > 0 ? await knex('batches').insert({ comment: null }).returning('id') : [null];
|
||||
const curatedActorEntries = batchId && curateActorEntries(newBaseActors, batchId);
|
||||
const newActorEntries = batchId && await knex('actors').insert(curatedActorEntries).returning(['id', 'name', 'slug']);
|
||||
|
||||
const newActorEntries = batchId && await knex('actors')
|
||||
.insert(curatedActorEntries)
|
||||
.returning(['id', 'name', 'slug', 'entry_id']);
|
||||
|
||||
const actors = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []);
|
||||
|
||||
|
|
|
@ -69,10 +69,12 @@ async function scrapeScene({ query, html }, url, _site) {
|
|||
],
|
||||
}), {});
|
||||
|
||||
release.actors = query.all('.related-model a').map(actorEl => ({
|
||||
name: query.q(actorEl, null, 'title'),
|
||||
avatar: actorImagesByActorId[query.url(actorEl, null).match(/\/view\/id\/(\d+)/)?.[1]],
|
||||
}));
|
||||
release.actors = query.all('.related-model a').map((actorEl) => {
|
||||
const name = query.q(actorEl, null, 'title');
|
||||
const avatar = actorImagesByActorId[query.url(actorEl, null).match(/\/view\/id\/(\d+)/)?.[1]];
|
||||
|
||||
return { name, avatar };
|
||||
});
|
||||
|
||||
release.likes = query.number('.label-rating .like');
|
||||
release.dislikes = query.number('.label-rating .dislike');
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
'use strict';
|
||||
|
||||
const qu = require('../utils/qu');
|
||||
|
||||
// TODO: profile scraping
|
||||
const slugify = require('../utils/slugify');
|
||||
const { feetInchesToCm } = require('../utils/convert');
|
||||
|
||||
function scrapeLatestBlog(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
|
@ -106,6 +106,33 @@ function scrapeScene({ query, html }, url, channel) {
|
|||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, entity) {
|
||||
const profile = {};
|
||||
|
||||
const bio = query.cnts('.info p').reduce((acc, info) => {
|
||||
const [key, value] = info.match(/(\w+):\s*(.*)/).slice(1);
|
||||
|
||||
return { ...acc, [slugify(key, '_')]: value };
|
||||
}, {});
|
||||
|
||||
profile.age = Number(bio.age);
|
||||
profile.height = feetInchesToCm(bio.height);
|
||||
profile.eyes = bio.eyes || bio.eyecolor;
|
||||
|
||||
if (bio.figure || bio.measurements) {
|
||||
const [bust, cup, waist, hip] = (bio.figure || bio.measurements)?.match(/(\d+)(\w+)-(\d+)-(\d+)/).slice(1);
|
||||
|
||||
profile.bust = Number(bust);
|
||||
profile.cup = cup;
|
||||
profile.waist = Number(waist);
|
||||
profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
profile.avatar = query.img('img.main-preview', 'src', { origin: entity.url });
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatestBlog(channel, page) {
|
||||
const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`;
|
||||
const res = await qu.getAll(url, '.videos');
|
||||
|
@ -148,8 +175,32 @@ async function fetchScene(url, channel) {
|
|||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(baseActor, entity) {
|
||||
const modelsRes = await qu.getAll(`${entity.url}/free/girls.php?alpha=${baseActor.name.slice(0, 1)}`, '.model');
|
||||
|
||||
console.log(baseActor);
|
||||
|
||||
if (modelsRes.ok) {
|
||||
const models = modelsRes.items.filter(({ query }) => query.cnt('strong') === baseActor.name);
|
||||
|
||||
return Promise.all(models.map(async (model) => {
|
||||
const modelUrl = model.query.url('a', 'href', { origin: entity.url });
|
||||
const modelRes = await qu.get(modelUrl);
|
||||
|
||||
if (modelRes.ok) {
|
||||
return scrapeProfile(modelRes.item, entity);
|
||||
}
|
||||
|
||||
return modelRes.status;
|
||||
}));
|
||||
}
|
||||
|
||||
return modelsRes.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
fetchProfile,
|
||||
};
|
||||
|
|
|
@ -176,6 +176,7 @@ module.exports = {
|
|||
evilangel,
|
||||
eyeontheguy: hush,
|
||||
fakehub,
|
||||
exploitedcollegegirls: fcuk,
|
||||
forbondage: porndoe,
|
||||
freeones,
|
||||
gangbangcreampie: aziani,
|
||||
|
|
|
@ -326,9 +326,9 @@ function init(element, window) {
|
|||
const quContextFuncs = Object.entries(quFuncs) // dynamically attach methods with context
|
||||
.reduce((acc, [key, func]) => ({
|
||||
...acc,
|
||||
[key]: (...args) => (args[0] instanceof globalWindow.HTMLElement // allow for different context
|
||||
? func(...args)
|
||||
: func(element, ...args)),
|
||||
[key]: (...args) => (args[0].nodeType === undefined // allow for different context
|
||||
? func(element, ...args)
|
||||
: func(...args)),
|
||||
}), {});
|
||||
|
||||
return {
|
||||
|
|
Loading…
Reference in New Issue