Refactored First Anal Quest.

This commit is contained in:
DebaucheryLibrarian 2026-02-02 06:02:50 +01:00
parent 4b0e1b6711
commit ecf7abd2ae
5 changed files with 166 additions and 137 deletions

9
package-lock.json generated
View File

@ -94,7 +94,7 @@
"tunnel": "0.0.6",
"ua-parser-js": "^1.0.37",
"undici": "^5.28.1",
"unprint": "^0.18.16",
"unprint": "^0.18.18",
"url-pattern": "^1.0.3",
"v-tooltip": "^2.1.3",
"video.js": "^8.6.1",
@ -20380,9 +20380,10 @@
}
},
"node_modules/unprint": {
"version": "0.18.16",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.16.tgz",
"integrity": "sha512-BiqHfGmQIHjTgAta3d2zAnw+jDzlrlJ3IYEkRQe9f3kNMZRbhOTOmWlkRYIzKpJBAEn2ECRwfoiYUaW8gtI5rQ==",
"version": "0.18.18",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.18.tgz",
"integrity": "sha512-M4sjzpPPAemZ1ND+FNlyGnCO4MP1qSupLiary1qZWfFQwSohePG962BWvja4r4AfwqzviV4mAC2RJILVKIPhYg==",
"license": "ISC",
"dependencies": {
"bottleneck": "^2.19.5",
"cookie": "^1.1.1",

View File

@ -153,7 +153,7 @@
"tunnel": "0.0.6",
"ua-parser-js": "^1.0.37",
"undici": "^5.28.1",
"unprint": "^0.18.16",
"unprint": "^0.18.18",
"url-pattern": "^1.0.3",
"v-tooltip": "^2.1.3",
"video.js": "^8.6.1",

View File

@ -190,6 +190,9 @@ module.exports = {
julesjordan,
amateurallure: julesjordan, // different company, same scraper
swallowsalon: julesjordan, // different company, same scraper
// first anal quest
doubleviewcasting: firstanalquest,
firstanalquest,
// etc
'18vr': badoink,
theflourishxxx: theflourish,
@ -211,8 +214,6 @@ module.exports = {
cherrypimps,
cumlouder,
dorcelclub: dorcel,
doubleviewcasting: firstanalquest,
firstanalquest,
freeones,
hitzefrei,
hookuphotshot,

View File

@ -1,200 +1,224 @@
'use strict';
const qu = require('../utils/qu');
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const { stripQuery } = require('../utils/url');
function scrapeAllA(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('a.thumb-img, a.thumb', 'href', { origin: channel.url });
release.url = query.url('a.thumb-img, a.thumb', { origin: channel.url, protocol: 'http' });
release.entryId = new URL(release.url).pathname.match(/(\d+)\/?$/)?.[1];
release.title = query.text('.thumb-title, .title');
release.date = query.date('.thumb-added, .date', ['MMM D, YYYY', 'MMMM DD, YYYY'], /\w+ \d{1,2}, \d{4}/);
release.duration = query.dur('.thumb-duration');
release.duration = query.duration('.thumb-duration');
release.actors = query.all('.thumb-models a, .models a').map((actorEl) => ({
name: query.cnt(actorEl),
url: query.url(actorEl, null, 'href', { origin: channel.url }),
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null, { origin: channel.url, protocol: 'http' }),
}));
const [, photoUrl, photoCount] = query.q('.thumb-img img', 'onmouseover')?.match(/'(.*)', (\d+)\)/) || [];
const [, photoUrl, photoCount] = query.attribute('.thumb-img img', 'onmouseover')?.match(/'(.*)', (\d+)\)/) || [];
if (photoUrl && photoCount) {
[release.poster, ...release.photos] = Array.from({ length: 5 }, (value, index) => `${photoUrl}${index + 1}.jpg`);
[release.poster, ...release.photos] = Array.from({ length: 5 }, (_value, index) => unprint.prefixUrl(`${photoUrl}${index + 1}.jpg`, channel.origin, { protocol: 'http' }));
} else {
release.poster = query.img('.thumb-img img, .thumb img', 'src', { origin: channel.url });
release.poster = query.img('.thumb-img img, .thumb img', { origin: channel.url, protocol: 'http' });
}
release.tags = query.cnts('.tags a');
release.tags = query.contents('.tags a');
release.rating = query.number('.thumb-rating');
return release;
});
}
function scrapeAllB(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.title = query.cnt('.title, h2');
release.description = query.cnt('.description, p textarea');
release.duration = query.dur('.time');
const previewHtml = query.html('script')?.match(/document.write\("(.*)"\);/)?.[1];
const previewEl = qu.extract(previewHtml);
const previewQuery = previewEl?.query.q('param[name="flashvars"]', 'value') || query.q('param[name="flashvars"]', 'value');
const previewParams = previewQuery && new URLSearchParams(previewQuery);
if (previewParams) {
release.poster = qu.prefixUrl(previewParams.get('image') || previewParams.get('poster'), channel.url);
release.trailer = previewParams.get('file');
}
release.photos = query.imgs('img[src*="sets/"], img[src*="thumbnails/"]', 'src', { origin: channel.url });
release.entryId = release.poster?.match(/\/sets\/(.*)\//)?.[1] || slugify(release.title);
return release;
});
}
function scrapeSceneA({ query }, url, channel) {
const release = {};
release.entryId = new URL(url).pathname.match(/(\d+)\/?$/)?.[1];
release.title = query.cnt('.title, .scene-title h3').replace(/:$/, '');
release.description = query.cnt('.text-desc p, .info-description p');
release.duration = query.dur('.media-body li span, .duration');
release.actors = query.all('.media-body a[href*="models/"], .models a').map((actorEl) => ({
name: query.cnt(actorEl),
url: query.url(actorEl, null, 'href', { origin: channel.url }),
}));
release.tags = query.cnts('.media-body a[href*="tags/"], .tags a');
release.poster = [
query.img('.player-preview'),
qu.prefixUrl(`/contents/videos_screenshots/0/${release.entryId}/preview_trailer.mp4.jpg`, channel.url),
qu.prefixUrl(query.q('param[name="flashvars"]', 'value')?.match(/poster=(.*\.jpg)/)?.[1], channel.url),
qu.prefixUrl(`/contents/scenes/${release.entyId}/thumbnails/920x518.jpg`, channel.url),
];
release.photos = query.urls('.thumb-album a:not([href="#"]), .thumbs-photo a:not([href*="signup"])', 'href', { origin: channel.url })
.concat(query.imgs('.thumb-album a[href="#"] img, .thumbs-photo a[href*="signup"] img', 'src', { origin: channel.url }));
release.trailer = query.url('a[href*="get_file/"], .download a');
return release;
}
function scrapeProfileA({ query, el }, entity) {
const profile = {};
const bio = query.all('.list-model-info li, .profile-info li').reduce((acc, bioEl) => ({
...acc,
[slugify(query.cnt(bioEl, '.title, span'), '_')]: query.cnt(bioEl, ':nth-child(2)') || query.q(bioEl, ':nth-child(2)', 'title') || query.text(bioEl),
}), {});
profile.dateOfBirth = qu.parseDate(bio.birth_date || bio.date_of_birth, 'DD MMMM, YYYY');
profile.birthPlace = bio.nationality || bio.place_of_birth || null;
profile.weight = Number(bio.weight?.match(/\d+/)?.[0]);
profile.height = Number(bio.height?.match(/\d+/)?.[0]);
profile.eyes = bio.eye_color;
profile.hairColor = bio.hair || bio.hair_color;
profile.aliases = query.text('.sub-title')?.replace(/:\s*/, '').split(/,\s*/);
if (bio.measurements || bio.body_shape_dimensions) {
const [, bust, cup, waist, hip] = (bio.measurements || bio.body_shape_dimensions).match(/(\d+)(\w+)-(\d+)-(\d+)/);
profile.bust = Number(bust);
profile.cup = cup;
profile.waist = Number(waist);
profile.hip = Number(hip);
}
const description = query.cnt('.model-biography p');
const avatar = query.img('.model-box img, .profile-model-photo', 'src', { origin: entity.url });
if (!/there is no description/.test(description)) {
profile.description = description;
}
if (avatar) {
profile.avatar = [
avatar,
avatar.replace('s2_', 's1_'),
];
}
profile.scenes = scrapeAllA(qu.initAll(el, '.list-thumbs .thumb, .main-thumbs > li'), entity);
return profile;
}
async function fetchLatestA(channel, page) {
const url = channel.parameters?.latest
? `${channel.parameters.latest}/${page}`
: `${channel.url}/latest-updates/${page}/`;
const res = await qu.getAll(url, '.list-thumbs ul > li, .main-thumbs > li');
const res = await unprint.get(url, { selectAll: '.list-thumbs ul > li, .main-thumbs > li' });
if (res.ok) {
return scrapeAllA(res.items, channel);
return scrapeAllA(res.context, channel);
}
return res.status;
}
function scrapeAllB(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.title = query.content('.title, h2');
release.duration = query.duration('.time');
const description = query.content('.description, p textarea');
if (!/there is no description/i.test(description)) {
release.description = description;
}
release.poster = query.poster('#player, #example_video_1', { origin: channel.origin, protocol: 'http' });
release.trailer = query.video('#player source, #example_video_1 source', { origin: channel.origin, protocol: 'http' });
release.photos = query.imgs('img[src*="sets/"], img[src*="thumbnails/"]', { origin: channel.origin, protocol: 'http' });
release.entryId = release.poster?.match(/\/sets\/(.*)\//)?.[1] || slugify(release.title);
return release;
});
}
async function fetchLatestB(channel, page) {
const url = channel.parameters?.paginated
? `${channel.url}/page/${page}`
: channel.url;
const res = await qu.getAll(url, '#container, article:not(.sortby)');
const res = await unprint.get(url, {
selectAll: '#container, article:not(.sortby)',
parser: {
runScripts: 'dangerously',
},
});
if (res.ok) {
return scrapeAllB(res.items, channel);
return scrapeAllB(res.context, channel);
}
return res.status;
}
function scrapeSceneA({ query }, url, channel) {
const release = {};
release.entryId = new URL(url).pathname.match(/(\d+)\/?$/)?.[1];
release.title = query.content('.title, .scene-title h3').replace(/:$/, '');
const description = query.content('.text-desc p, .info-description p');
if (!/there is no description/i.test(description)) {
release.description = description;
}
release.duration = query.duration('.media-body li span, .duration');
release.actors = query.all('.media-body a[href*="models/"], .models a').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null, { origin: channel.url, protocol: 'http' }),
}));
release.tags = query.contents('.media-body a[href*="tags/"], .tags a');
release.poster = Array.from(new Set([
query.img('.player-preview', { protocol: 'http' }),
unprint.prefixUrl(`/contents/videos_screenshots/0/${release.entryId}/preview_trailer.mp4.jpg`, channel.url, { protocol: 'http' }),
unprint.prefixUrl(query.attribute('param[name="flashvars"]', 'value')?.match(/poster=(.*\.jpg)/)?.[1], channel.url, { protocol: 'http' }),
unprint.prefixUrl(`/contents/scenes/${release.entryId}/thumbnails/920x518.jpg`, channel.url, { protocol: 'http' }),
].filter(Boolean)));
release.photos = query.urls('.thumb-album a:not([href="#"]), .thumbs-photo a:not([href*="signup"])', { origin: channel.url, protocol: 'http' })
.concat(query.imgs('.thumb-album a[href="#"] img, .thumbs-photo a[href*="signup"] img', { origin: channel.url, protocol: 'http' }));
release.trailer = stripQuery(query.url('a[href*="get_file/"], .download a'));
return release;
}
async function fetchSceneA(url, channel) {
const res = await qu.get(url, '.main, .main-content');
const res = await unprint.get(url, { select: '.main, .main-content' });
if (res.ok) {
return scrapeSceneA(res.item, url, channel);
return scrapeSceneA(res.context, url, channel);
}
return res.status;
}
async function fetchProfileA({ name, slug }, { entity }) {
const searchRes = await qu.getAll(`${entity.url}/models/search/?q=${name}`, '.thumb-modal, .big-thumb');
function scrapeProfileA({ query }, entity) {
const profile = {};
if (!searchRes.ok) {
return searchRes.status;
const bio = query.all('.list-model-info li, .profile-info li').reduce((acc, bioEl) => ({
...acc,
[slugify(unprint.query.content(bioEl, '.title, span'), '_')]: unprint.query.content(bioEl, ':nth-child(2)')
|| unprint.query.attribute(bioEl, ':nth-child(2)', 'title')
|| unprint.query.text(bioEl),
}), {});
profile.dateOfBirth = unprint.extractDate(bio.birth_date || bio.date_of_birth, 'DD MMMM, YYYY', { match: null });
profile.birthPlace = bio.nationality || bio.place_of_birth || null;
profile.weight = unprint.extractNumber(bio.weight);
profile.height = unprint.extractNumber(bio.height);
profile.eyes = bio.eye_color;
profile.hairColor = bio.hair || bio.hair_color;
profile.aliases = query.text('.sub-title')?.replace(/:\s*/, '').split(/,\s*/);
profile.measurements = bio.measurements || bio.body_shape_dimensions;
const description = query.content('.model-biography p');
const avatar = query.img('.model-box img, .profile-model-photo', { origin: entity.url, protocol: 'http' });
if (!/there is no description/i.test(description)) {
profile.description = description;
}
const actor = searchRes.items.find(({ query }) => slugify(query.cnt('.thumb-title a, .title')) === slug);
if (avatar) {
profile.avatar = Array.from(new Set([
avatar,
avatar.replace('s2_', 's1_'),
avatar.replace('s1_', 's2_'),
]));
}
if (!actor) {
profile.scenes = scrapeAllA(unprint.initAll(query.all('.list-thumbs .thumb, .main-thumbs > li')), entity);
return profile;
}
async function getActorUrl(actor, entity) {
if (actor.url) {
return actor.url;
}
// Double View Casting seems to be case sensitive...
const res = await unprint.get(`${entity.origin}/models/search/?q=${actor.name}`, { selectAll: '.thumb-modal, .big-thumb' });
if (!res.ok) {
return res.status;
}
const actorItem = res.context.find(({ query }) => slugify(query.content('.thumb-title a, .title')) === actor.slug);
if (!actorItem) {
return null;
}
const actorUrl = actor.query.url('a', 'href', { origin: entity.url });
const actorRes = await qu.get(actorUrl);
const actorUrl = actorItem.query.url('a', { origin: entity.url, protocol: 'http' });
if (actorUrl) {
return actorUrl;
}
return null;
}
async function fetchProfileA(actor, { entity }) {
const actorUrl = await getActorUrl(actor, entity);
if (actorUrl) {
const actorRes = await unprint.get(actorUrl);
if (actorRes.ok) {
return scrapeProfileA(actorRes.item, entity);
return scrapeProfileA(actorRes.context, entity);
}
}
return null;

View File

@ -232,6 +232,9 @@ const actors = [
{ entity: 'bang', name: 'Riley Reid', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'ethnicity', 'hairColor', 'eyes'] },
{ entity: 'littlecapricedreams', name: 'Littlecaprice', fields: ['avatar', 'nationality', 'cup', 'measurements', 'height', 'description'] }, // sic
{ entity: 'pascalssubsluts', name: 'Zlata Shine', fields: ['avatar', 'gender', 'nationality', 'hairColor', 'height', 'description'] }, // sic
{ entity: 'nebraskacoeds', name: 'Mary Beth Haglin', fields: ['avatar'] }, // sic
{ entity: 'firstanalquest', name: 'Abigaile Johnson', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'weight', 'height', 'measurements'] }, // sic
{ entity: 'doubleviewcasting', name: 'Abigaile Johnson', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'weight', 'height', 'measurements'] }, // sic
];
const actorScrapers = scrapers.actors;