traxxx/src/scrapers/assylum.js

132 lines
4.2 KiB
JavaScript

'use strict';
const { get, geta, ctxa } = require('../utils/q');
function extractActors(actorString) {
return actorString
?.replace(/.*:|\(.*\)|\d+(-|\s)year(-|\s)old|nurses?|tangled/ig, '') // remove Patient:, (date) and other nonsense
.split(/\band\b|\bvs\b|\/|,|&/ig)
.map(actor => actor.trim())
.filter(actor => !!actor && !/\banal\b|\bschool\b|\bgamer\b|\breturn\b|\bfor\b|\bare\b|\bpart\b|realdoll|bimbo|p\d+/ig.test(actor))
|| [];
}
function matchActors(actorString, models) {
return models
.filter(model => new RegExp(model.name, 'i')
.test(actorString));
}
function scrapeLatest(scenes, site, models) {
return scenes.map(({ q, qd, qu, qi }) => {
const release = {};
const pathname = qu('a.itemimg').slice(1);
[release.entryId] = pathname.split('/').slice(-1);
release.url = `${site.url}${pathname}`;
release.title = q('.itemimg img', 'alt') || q('h4 a', true);
release.description = q('.mas_longdescription', true);
release.date = qd('.movie_info2', 'MM/DD/YY', /\d{2}\/\d{2}\/\d{2}/);
const actorString = q('.mas_description', true);
const actors = matchActors(actorString, models);
if (actors.length > 0) release.actors = actors;
else release.actors = extractActors(actorString);
const posterPath = qi('.itemimg img');
release.poster = `${site.url}/${posterPath}`;
return release;
});
}
function scrapeScene({ html, q, qa, qd, qis }, url, site, models) {
const release = { url };
[release.entryId] = url.split('/').slice(-1);
release.title = q('.mas_title', true);
release.description = q('.mas_longdescription', true);
release.date = qd('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
const actorString = q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, '');
const actors = matchActors(actorString, models);
if (actors.length > 0) release.actors = actors;
else release.actors = extractActors(actorString);
release.tags = qa('.tags a', true);
release.photos = qis('.stills img').map(photoPath => `${site.url}/${photoPath}`);
const posterIndex = 'splash:';
const poster = html.slice(html.indexOf('faceimages/', posterIndex), html.indexOf('.jpg', posterIndex) + 4);
if (poster) release.poster = `${site.url}/${poster}`;
const trailerIndex = html.indexOf('video/mp4');
const trailer = html.slice(html.indexOf('/content', trailerIndex), html.indexOf('.mp4', trailerIndex) + 4);
if (trailer) release.trailer = { src: `${site.url}${trailer}` };
return release;
}
function extractModels({ el }, site) {
const models = ctxa(el, '.item');
return models.map(({ q, qu }) => {
const actor = { gender: 'female' };
const avatar = q('.itemimg img');
actor.avatar = `${site.url}/${avatar.src}`;
actor.name = avatar.alt
.split(':').slice(-1)[0]
.replace(/xtreme girl|nurse/ig, '')
.trim();
const actorPath = qu('.itemimg');
actor.url = `${site.url}${actorPath.slice(1)}`;
return actor;
});
}
async function fetchModels(site, page = 1, accModels = []) {
const url = `${site.url}/?models/${page}`;
const qModels = await get(url);
if (qModels) {
const models = extractModels(qModels, site);
const nextPage = qModels.qa('.pagenumbers', true)
.map(pageX => Number(pageX))
.filter(Boolean) // remove << and >>
.includes(page + 1);
if (nextPage) {
return fetchModels(site, page + 1, accModels.concat(models));
}
return accModels.concat(models, { name: 'Dr. Gray' });
}
return [];
}
async function fetchLatest(site, page = 1, models) {
const url = `${site.url}/show.php?a=${site.parameters.a}_${page}`;
const qLatest = await geta(url, '.item');
return qLatest && scrapeLatest(qLatest, site, models);
}
async function fetchScene(url, site, release, preflight) {
const models = preflight || await fetchModels(site);
const qScene = await get(url);
return qScene && scrapeScene(qScene, url, site, models);
}
module.exports = {
fetchLatest,
fetchScene,
preflight: fetchModels,
};