Changed qu's HTML element detection. Passing base actor instead of actorName to profile scrapers.
This commit is contained in:
parent
0e4c0d8fff
commit
939eba8e61
Binary file not shown.
Before Width: | Height: | Size: 741 KiB After Width: | Height: | Size: 356 KiB |
Binary file not shown.
Before Width: | Height: | Size: 7.5 KiB After Width: | Height: | Size: 7.9 KiB |
Binary file not shown.
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 32 KiB |
|
@ -719,7 +719,7 @@ const tagPhotos = [
|
||||||
['latina', 2, 'Alexis Love for Penthouse'],
|
['latina', 2, 'Alexis Love for Penthouse'],
|
||||||
['mff', 0, 'Madison Ivy, Adriana Chechik and Keiran Lee in "Day With A Pornstar" for Brazzers'],
|
['mff', 0, 'Madison Ivy, Adriana Chechik and Keiran Lee in "Day With A Pornstar" for Brazzers'],
|
||||||
['mfm', 6, 'Honey Gold in "Slut Puppies 12" for Jules Jordan'],
|
['mfm', 6, 'Honey Gold in "Slut Puppies 12" for Jules Jordan'],
|
||||||
['natural-boobs', 0, 'Autumn Falls in "Manuel Ferrara\'s Ripe 7" for Jules Jordan'],
|
['natural-boobs', 0, 'Valentina Nappi in "Hypnotic Curves" for LesbianX'],
|
||||||
['oil', 1, 'Kissa Sins in "Oil Overload 14" for JulesJordan'],
|
['oil', 1, 'Kissa Sins in "Oil Overload 14" for JulesJordan'],
|
||||||
['oil', 3, 'Vina Sky for Lubed'],
|
['oil', 3, 'Vina Sky for Lubed'],
|
||||||
['oil', 0, 'Jada Stevens in "Jada Stevens Anal Ass Oiled Up For James Deen\'s Cock" for Jules Jordan'],
|
['oil', 0, 'Jada Stevens in "Jada Stevens Anal Ass Oiled Up For James Deen\'s Cock" for Jules Jordan'],
|
||||||
|
|
|
@ -114,12 +114,15 @@ function getAverage(items) {
|
||||||
|
|
||||||
function toBaseActors(actorsOrNames, release) {
|
function toBaseActors(actorsOrNames, release) {
|
||||||
return actorsOrNames.map((actorOrName) => {
|
return actorsOrNames.map((actorOrName) => {
|
||||||
const name = capitalize(actorOrName.name || actorOrName);
|
const [baseName, entryId] = (actorOrName.name || actorOrName).split(':');
|
||||||
|
|
||||||
|
const name = capitalize(baseName);
|
||||||
const slug = slugify(name);
|
const slug = slugify(name);
|
||||||
|
|
||||||
const baseActor = {
|
const baseActor = {
|
||||||
name,
|
name,
|
||||||
slug,
|
slug,
|
||||||
|
entryId: entryId || null,
|
||||||
entity: release?.site?.network || release?.entity?.parent || release?.entity || null,
|
entity: release?.site?.network || release?.entity?.parent || release?.entity || null,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -213,6 +216,7 @@ function curateActorEntry(baseActor, batchId) {
|
||||||
name: baseActor.name,
|
name: baseActor.name,
|
||||||
slug: baseActor.slug,
|
slug: baseActor.slug,
|
||||||
entity_id: null,
|
entity_id: null,
|
||||||
|
entry_id: baseActor.entry_id,
|
||||||
batch_id: batchId,
|
batch_id: batchId,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -538,7 +542,7 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
|
||||||
|
|
||||||
logger.verbose(`Searching profile for '${actor.name}' on '${label}'`);
|
logger.verbose(`Searching profile for '${actor.name}' on '${label}'`);
|
||||||
|
|
||||||
const profile = await scraper.fetchProfile(actor.name, context, include);
|
const profile = await scraper.fetchProfile(actor, context, include);
|
||||||
|
|
||||||
if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure
|
if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure
|
||||||
logger.verbose(`Profile for '${actor.name}' not available on ${label}, scraper returned ${profile}`);
|
logger.verbose(`Profile for '${actor.name}' not available on ${label}, scraper returned ${profile}`);
|
||||||
|
@ -587,7 +591,7 @@ async function scrapeActors(actorNames) {
|
||||||
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
|
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
|
||||||
.orderBy('entities.type'),
|
.orderBy('entities.type'),
|
||||||
knex('actors')
|
knex('actors')
|
||||||
.select(['id', 'name', 'slug'])
|
.select(['id', 'name', 'slug', 'entry_id'])
|
||||||
.modify((queryBuilder) => {
|
.modify((queryBuilder) => {
|
||||||
if (actorNames.length > 0) {
|
if (actorNames.length > 0) {
|
||||||
queryBuilder.whereIn('slug', baseActors.map(baseActor => baseActor.slug));
|
queryBuilder.whereIn('slug', baseActors.map(baseActor => baseActor.slug));
|
||||||
|
@ -598,12 +602,22 @@ async function scrapeActors(actorNames) {
|
||||||
|
|
||||||
const entitiesBySlug = entities.reduce((acc, entity) => ({ ...acc, [entity.slug]: entity }), {});
|
const entitiesBySlug = entities.reduce((acc, entity) => ({ ...acc, [entity.slug]: entity }), {});
|
||||||
|
|
||||||
const existingActorEntriesBySlug = existingActorEntries.reduce((acc, actorEntry) => ({ ...acc, [actorEntry.slug]: actorEntry }), {});
|
const existingActorEntriesBySlugAndEntryId = existingActorEntries.reduce((acc, actorEntry) => ({
|
||||||
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlug[baseActor.slug]);
|
...acc,
|
||||||
|
[actorEntry.slug]: {
|
||||||
|
...acc[actorEntry.slug],
|
||||||
|
[actorEntry.entryId || null]: actorEntry,
|
||||||
|
},
|
||||||
|
}), {});
|
||||||
|
|
||||||
|
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlugAndEntryId[baseActor.slug][baseActor.entryId]);
|
||||||
|
|
||||||
const [batchId] = newBaseActors.length > 0 ? await knex('batches').insert({ comment: null }).returning('id') : [null];
|
const [batchId] = newBaseActors.length > 0 ? await knex('batches').insert({ comment: null }).returning('id') : [null];
|
||||||
const curatedActorEntries = batchId && curateActorEntries(newBaseActors, batchId);
|
const curatedActorEntries = batchId && curateActorEntries(newBaseActors, batchId);
|
||||||
const newActorEntries = batchId && await knex('actors').insert(curatedActorEntries).returning(['id', 'name', 'slug']);
|
|
||||||
|
const newActorEntries = batchId && await knex('actors')
|
||||||
|
.insert(curatedActorEntries)
|
||||||
|
.returning(['id', 'name', 'slug', 'entry_id']);
|
||||||
|
|
||||||
const actors = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []);
|
const actors = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []);
|
||||||
|
|
||||||
|
|
|
@ -69,10 +69,12 @@ async function scrapeScene({ query, html }, url, _site) {
|
||||||
],
|
],
|
||||||
}), {});
|
}), {});
|
||||||
|
|
||||||
release.actors = query.all('.related-model a').map(actorEl => ({
|
release.actors = query.all('.related-model a').map((actorEl) => {
|
||||||
name: query.q(actorEl, null, 'title'),
|
const name = query.q(actorEl, null, 'title');
|
||||||
avatar: actorImagesByActorId[query.url(actorEl, null).match(/\/view\/id\/(\d+)/)?.[1]],
|
const avatar = actorImagesByActorId[query.url(actorEl, null).match(/\/view\/id\/(\d+)/)?.[1]];
|
||||||
}));
|
|
||||||
|
return { name, avatar };
|
||||||
|
});
|
||||||
|
|
||||||
release.likes = query.number('.label-rating .like');
|
release.likes = query.number('.label-rating .like');
|
||||||
release.dislikes = query.number('.label-rating .dislike');
|
release.dislikes = query.number('.label-rating .dislike');
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const qu = require('../utils/qu');
|
const qu = require('../utils/qu');
|
||||||
|
const slugify = require('../utils/slugify');
|
||||||
// TODO: profile scraping
|
const { feetInchesToCm } = require('../utils/convert');
|
||||||
|
|
||||||
function scrapeLatestBlog(scenes, channel) {
|
function scrapeLatestBlog(scenes, channel) {
|
||||||
return scenes.map(({ query }) => {
|
return scenes.map(({ query }) => {
|
||||||
|
@ -106,6 +106,33 @@ function scrapeScene({ query, html }, url, channel) {
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function scrapeProfile({ query }, entity) {
|
||||||
|
const profile = {};
|
||||||
|
|
||||||
|
const bio = query.cnts('.info p').reduce((acc, info) => {
|
||||||
|
const [key, value] = info.match(/(\w+):\s*(.*)/).slice(1);
|
||||||
|
|
||||||
|
return { ...acc, [slugify(key, '_')]: value };
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
profile.age = Number(bio.age);
|
||||||
|
profile.height = feetInchesToCm(bio.height);
|
||||||
|
profile.eyes = bio.eyes || bio.eyecolor;
|
||||||
|
|
||||||
|
if (bio.figure || bio.measurements) {
|
||||||
|
const [bust, cup, waist, hip] = (bio.figure || bio.measurements)?.match(/(\d+)(\w+)-(\d+)-(\d+)/).slice(1);
|
||||||
|
|
||||||
|
profile.bust = Number(bust);
|
||||||
|
profile.cup = cup;
|
||||||
|
profile.waist = Number(waist);
|
||||||
|
profile.hip = Number(hip);
|
||||||
|
}
|
||||||
|
|
||||||
|
profile.avatar = query.img('img.main-preview', 'src', { origin: entity.url });
|
||||||
|
|
||||||
|
return profile;
|
||||||
|
}
|
||||||
|
|
||||||
async function fetchLatestBlog(channel, page) {
|
async function fetchLatestBlog(channel, page) {
|
||||||
const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`;
|
const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`;
|
||||||
const res = await qu.getAll(url, '.videos');
|
const res = await qu.getAll(url, '.videos');
|
||||||
|
@ -148,8 +175,32 @@ async function fetchScene(url, channel) {
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function fetchProfile(baseActor, entity) {
|
||||||
|
const modelsRes = await qu.getAll(`${entity.url}/free/girls.php?alpha=${baseActor.name.slice(0, 1)}`, '.model');
|
||||||
|
|
||||||
|
console.log(baseActor);
|
||||||
|
|
||||||
|
if (modelsRes.ok) {
|
||||||
|
const models = modelsRes.items.filter(({ query }) => query.cnt('strong') === baseActor.name);
|
||||||
|
|
||||||
|
return Promise.all(models.map(async (model) => {
|
||||||
|
const modelUrl = model.query.url('a', 'href', { origin: entity.url });
|
||||||
|
const modelRes = await qu.get(modelUrl);
|
||||||
|
|
||||||
|
if (modelRes.ok) {
|
||||||
|
return scrapeProfile(modelRes.item, entity);
|
||||||
|
}
|
||||||
|
|
||||||
|
return modelRes.status;
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
return modelsRes.status;
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
fetchScene,
|
fetchScene,
|
||||||
fetchUpcoming,
|
fetchUpcoming,
|
||||||
|
fetchProfile,
|
||||||
};
|
};
|
||||||
|
|
|
@ -176,6 +176,7 @@ module.exports = {
|
||||||
evilangel,
|
evilangel,
|
||||||
eyeontheguy: hush,
|
eyeontheguy: hush,
|
||||||
fakehub,
|
fakehub,
|
||||||
|
exploitedcollegegirls: fcuk,
|
||||||
forbondage: porndoe,
|
forbondage: porndoe,
|
||||||
freeones,
|
freeones,
|
||||||
gangbangcreampie: aziani,
|
gangbangcreampie: aziani,
|
||||||
|
|
|
@ -326,9 +326,9 @@ function init(element, window) {
|
||||||
const quContextFuncs = Object.entries(quFuncs) // dynamically attach methods with context
|
const quContextFuncs = Object.entries(quFuncs) // dynamically attach methods with context
|
||||||
.reduce((acc, [key, func]) => ({
|
.reduce((acc, [key, func]) => ({
|
||||||
...acc,
|
...acc,
|
||||||
[key]: (...args) => (args[0] instanceof globalWindow.HTMLElement // allow for different context
|
[key]: (...args) => (args[0].nodeType === undefined // allow for different context
|
||||||
? func(...args)
|
? func(element, ...args)
|
||||||
: func(element, ...args)),
|
: func(...args)),
|
||||||
}), {});
|
}), {});
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
Loading…
Reference in New Issue