Updated all dependencies. Updated MindGeek to Aylo.
This commit is contained in:
433
src/scrapers/aylo.js
Executable file
433
src/scrapers/aylo.js
Executable file
@@ -0,0 +1,433 @@
|
||||
'use strict';
|
||||
|
||||
/* eslint-disable newline-per-chained-call */
|
||||
const Promise = require('bluebird');
|
||||
const { CookieJar } = Promise.promisifyAll(require('tough-cookie'));
|
||||
const cookie = require('cookie');
|
||||
const moment = require('moment');
|
||||
|
||||
const qu = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
const http = require('../utils/http');
|
||||
const { inchesToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
function getBasePath(channel, path = '/scene') {
|
||||
return channel.parameters?.scene
|
||||
|| ((channel.parameters?.native || channel.type === 'network') && `${channel.url}${path}`)
|
||||
|| `${channel.parent.url}${path}`;
|
||||
}
|
||||
|
||||
function getThumbs(scene) {
|
||||
if (scene.images.poster) {
|
||||
return Object.values(scene.images.poster) // can be { 0: {}, 1: {}, ... } instead of array
|
||||
.filter((img) => typeof img === 'object') // remove alternateText property
|
||||
.map((image) => image.xl.url);
|
||||
}
|
||||
|
||||
if (Array.isArray(scene.images.card_main_rect)) {
|
||||
return scene.images.card_main_rect
|
||||
.concat(scene.images.card_secondary_rect || [])
|
||||
.map((image) => image.xl.url.replace('.thumb', ''));
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
function getCovers(images, target = 'cover') {
|
||||
if (!images[target]) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const covers = [
|
||||
images[target][0].md?.url,
|
||||
images[target][0].sm?.url,
|
||||
images[target][0].xs?.url,
|
||||
// bigger but usually upscaled
|
||||
images[target][0].xx?.url,
|
||||
images[target][0].xl?.url,
|
||||
images[target][0].lg?.url,
|
||||
];
|
||||
|
||||
if (target === 'poster') {
|
||||
return covers;
|
||||
}
|
||||
|
||||
return [covers];
|
||||
}
|
||||
|
||||
function getVideos(data) {
|
||||
const teaserSources = data.videos.mediabook?.files;
|
||||
const trailerSources = data.children.find((child) => child.type === 'trailer')?.videos.full?.files;
|
||||
|
||||
const teaser = teaserSources && Object.values(teaserSources).map((source) => ({
|
||||
src: source.urls.view,
|
||||
quality: parseInt(source.format, 10),
|
||||
}));
|
||||
|
||||
const trailer = trailerSources && Object.values(trailerSources).map((source) => ({
|
||||
src: source.urls.view,
|
||||
quality: parseInt(source.format, 10),
|
||||
}));
|
||||
|
||||
return { teaser, trailer };
|
||||
}
|
||||
|
||||
function scrapeLatestX(data, site, filterChannel) {
|
||||
const release = {
|
||||
entryId: data.id,
|
||||
title: data.title,
|
||||
description: data.description,
|
||||
};
|
||||
|
||||
const basepath = getBasePath(site);
|
||||
|
||||
release.url = `${basepath}/${release.entryId}/${slugify(release.title)}`;
|
||||
release.date = new Date(data.dateReleased);
|
||||
release.duration = data.videos.mediabook?.length > 1 ? data.videos.mediabook.length : null;
|
||||
|
||||
release.actors = data.actors.map((actor) => ({ name: actor.name, gender: actor.gender }));
|
||||
release.tags = data.tags.map((tag) => tag.name);
|
||||
|
||||
[release.poster, ...release.photos] = getThumbs(data);
|
||||
|
||||
const { teaser, trailer } = getVideos(data);
|
||||
|
||||
if (teaser) release.teaser = teaser;
|
||||
if (trailer) release.trailer = trailer;
|
||||
|
||||
release.chapters = data.timeTags?.map((chapter) => ({
|
||||
time: chapter.startTime,
|
||||
duration: chapter.endTime - chapter.startTime,
|
||||
tags: [chapter.name],
|
||||
}));
|
||||
|
||||
if ((site.parameters?.extract === true && data.collections.length > 0) // release should not belong to any channel
|
||||
|| (typeof site.parameters?.extract === 'string' && !data.collections.some((collection) => collection.shortName === site.parameters.extract)) // release should belong to specific channel
|
||||
|| (filterChannel && !data.collections?.some((collection) => collection.id === site.parameters?.siteId))) { // used to separate upcoming Brazzers scenes
|
||||
return {
|
||||
...release,
|
||||
exclude: true,
|
||||
};
|
||||
}
|
||||
|
||||
const siteName = data.collections[0]?.name || data.brand;
|
||||
release.channel = slugify(siteName, '');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function scrapeLatest(items, site, filterChannel) {
|
||||
const latestReleases = items.map((data) => scrapeLatestX(data, site, filterChannel));
|
||||
|
||||
return {
|
||||
scenes: latestReleases.filter((scene) => !scene.exclude),
|
||||
unextracted: latestReleases.filter((scene) => scene.exclude),
|
||||
};
|
||||
}
|
||||
|
||||
function scrapeRelease(data, url, channel, networkName) {
|
||||
const release = {};
|
||||
|
||||
const { id: entryId, title, description } = data;
|
||||
|
||||
release.entryId = data.id;
|
||||
release.title = title;
|
||||
release.description = description;
|
||||
|
||||
release.date = new Date(data.dateReleased);
|
||||
release.duration = data.videos.mediabook?.length > 1 ? data.videos.mediabook.length : null;
|
||||
|
||||
release.actors = data.actors.map((actor) => ({ name: actor.name, gender: actor.gender }));
|
||||
release.tags = data.tags.map((tag) => tag.name);
|
||||
|
||||
[release.poster, ...release.photos] = getThumbs(data);
|
||||
|
||||
const { teaser, trailer } = getVideos(data);
|
||||
|
||||
if (teaser) release.teaser = teaser;
|
||||
if (trailer) release.trailer = trailer;
|
||||
|
||||
release.chapters = data.timeTags?.map((chapter) => ({
|
||||
time: chapter.startTime,
|
||||
duration: chapter.endTime - chapter.startTime,
|
||||
tags: [chapter.name],
|
||||
}));
|
||||
|
||||
const siteName = data.collections[0]?.name || data.brand;
|
||||
release.channel = slugify(siteName, '');
|
||||
|
||||
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
|
||||
|
||||
if (data.parent?.type === 'movie' || data.parent?.type === 'serie') {
|
||||
release[data.parent.type] = {
|
||||
entryId: data.parent.id,
|
||||
url: `${getBasePath(channel, data.parent.type === 'movie' ? '/movie' : '/series')}/${data.parent.id}/${slugify(data.parent.title, '-', { removePunctuation: true })}`,
|
||||
title: data.parent.title,
|
||||
description: data.parent.description,
|
||||
date: new Date(data.parent.dateReleased),
|
||||
channel: slugify(data.parent.collections?.name || data.parent.brand),
|
||||
poster: getCovers(data.parent.images, 'poster'),
|
||||
shallow: true,
|
||||
};
|
||||
}
|
||||
|
||||
if (data.type === 'movie') {
|
||||
release.covers = getCovers(data.images);
|
||||
release.scenes = data.children?.map((scene) => ({
|
||||
entryId: scene.id,
|
||||
url: `${getBasePath(channel)}/${scene.id}/${slugify(scene.title)}`,
|
||||
title: scene.title,
|
||||
shallow: true,
|
||||
}));
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function getUrl(site) {
|
||||
const { searchParams, pathname } = new URL(site.url);
|
||||
|
||||
// if (search.match(/\?site=\d+/)) {
|
||||
if (searchParams.has('site') || /\/site\/\d+/.test(pathname)) {
|
||||
return site.url;
|
||||
}
|
||||
|
||||
if (site.parameters?.native) {
|
||||
return `${site.url}/scenes`;
|
||||
}
|
||||
|
||||
if (site.parameters?.extract) {
|
||||
return `${site.url}/scenes`;
|
||||
}
|
||||
|
||||
if (site.parameters?.siteId) {
|
||||
return `${site.parent.url}/scenes?site=${site.parameters.siteId}`;
|
||||
}
|
||||
|
||||
throw new Error(`Mind Geek site '${site.name}' (${site.url}) not supported`);
|
||||
}
|
||||
|
||||
async function getSession(site, parameters, url) {
|
||||
if (site.slug === 'mindgeek' || site.parameters?.parentSession === false) {
|
||||
// most MG sites have a parent network to acquire a session from, don't try to acquire session from mindgeek.com for independent channels
|
||||
return null;
|
||||
}
|
||||
|
||||
const cookieJar = new CookieJar();
|
||||
const session = http.session({ cookieJar });
|
||||
|
||||
const sessionUrl = site.parameters?.siteId && !(site.parameters?.native || site.parameters?.childSession || site.parent?.parameters?.childSession)
|
||||
? site.parent.url
|
||||
: (url || site.url);
|
||||
|
||||
const res = await http.get(sessionUrl, {
|
||||
session,
|
||||
headers: {
|
||||
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
||||
Connection: 'keep-alive',
|
||||
'User-Agent': 'HTTPie/3.2.1',
|
||||
},
|
||||
interval: parameters?.interval,
|
||||
concurrency: parameters?.concurrency,
|
||||
parse: false,
|
||||
});
|
||||
|
||||
if (res.status === 200) {
|
||||
const cookieString = await cookieJar.getCookieStringAsync(sessionUrl);
|
||||
const { instance_token: instanceToken } = cookie.parse(cookieString);
|
||||
|
||||
if (instanceToken) {
|
||||
return { session, instanceToken };
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`Failed to acquire MindGeek session (${res.statusCode})`);
|
||||
}
|
||||
|
||||
function scrapeProfile(data, networkName, releases = []) {
|
||||
const profile = {
|
||||
description: data.bio,
|
||||
aliases: data.aliases.filter(Boolean),
|
||||
};
|
||||
|
||||
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
|
||||
profile.measurements = data.measurements;
|
||||
|
||||
profile.dateOfBirth = qu.parseDate(data.birthday);
|
||||
profile.birthPlace = data.birthPlace;
|
||||
profile.height = inchesToCm(data.height);
|
||||
profile.weight = lbsToKg(data.weight);
|
||||
|
||||
profile.hairColor = data.tags.find((tag) => /hair color/i.test(tag.category))?.name;
|
||||
profile.ethnicity = data.tags.find((tag) => /ethnicity/i.test(tag.category))?.name;
|
||||
|
||||
if (data.images.card_main_rect?.[0]) {
|
||||
profile.avatar = data.images.card_main_rect[0].xl?.url
|
||||
|| data.images.card_main_rect[0].lg?.url
|
||||
|| data.images.card_main_rect[0].md?.url
|
||||
|| data.images.card_main_rect[0].sm?.url
|
||||
|| data.images.card_main_rect[0].xs?.url;
|
||||
}
|
||||
|
||||
if (data.tags.some((tag) => /boob type/i.test(tag.category) && /natural tits/i.test(tag.name))) {
|
||||
profile.naturalBoobs = true;
|
||||
}
|
||||
|
||||
if (data.tags.some((tag) => /boob type/i.test(tag.category) && /enhanced/i.test(tag.name))) {
|
||||
profile.naturalBoobs = false;
|
||||
}
|
||||
|
||||
if (data.tags.some((tag) => /body art/i.test(tag.category) && /tattoo/i.test(tag.name))) {
|
||||
profile.hasTattoos = true;
|
||||
}
|
||||
|
||||
if (data.tags.some((tag) => /body art/i.test(tag.category) && /piercing/i.test(tag.name))) {
|
||||
profile.hasPiercings = true;
|
||||
}
|
||||
|
||||
profile.releases = releases.map((release) => scrapeRelease(release, null, null, networkName));
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1, options) {
|
||||
const url = getUrl(site);
|
||||
const { searchParams, pathname } = new URL(url);
|
||||
const siteId = searchParams.get('site') || Number(pathname.match(/\/site\/(\d+)\//)?.[1]);
|
||||
|
||||
if (!siteId && !site.parameters?.native && !site.parameters?.extract) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const { instanceToken } = options.beforeNetwork?.headers?.Instance
|
||||
? options.beforeNetwork
|
||||
: await getSession(site, options.parameters, url);
|
||||
|
||||
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
|
||||
const limit = 24;
|
||||
const apiUrl = site.parameters?.native || site.parameters?.extract
|
||||
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
|
||||
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
|
||||
|
||||
const res = await http.get(apiUrl, {
|
||||
interval: options.parameters.interval,
|
||||
concurrency: options.parameters.concurrency,
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
Origin: site.url,
|
||||
Referer: url,
|
||||
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
||||
},
|
||||
});
|
||||
|
||||
if (res.status === 200 && res.body.result) {
|
||||
return scrapeLatest(res.body.result, site);
|
||||
}
|
||||
|
||||
return res.statusCode;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site, page, options) {
|
||||
const url = getUrl(site);
|
||||
const { session, instanceToken } = await getSession(site, options.parameters);
|
||||
|
||||
const apiUrl = 'https://site-api.project1service.com/v2/upcoming-releases';
|
||||
|
||||
const res = await http.get(apiUrl, {
|
||||
session,
|
||||
interval: options.parameters.interval,
|
||||
concurrency: options.parameters.concurrency,
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
Origin: site.url,
|
||||
Referer: url,
|
||||
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
||||
},
|
||||
});
|
||||
|
||||
if (res.statusCode === 200 && res.body.result) {
|
||||
return scrapeLatest(res.body.result, site, true);
|
||||
}
|
||||
|
||||
return res.statusCode;
|
||||
}
|
||||
|
||||
async function fetchRelease(url, site, baseScene, options) {
|
||||
if (baseScene?.entryId && !baseScene.shallow && !options.parameters.forceDeep) {
|
||||
// overview and deep data is the same, don't hit server unnecessarily
|
||||
return baseScene;
|
||||
}
|
||||
|
||||
const entryId = new URL(url).pathname.match(/\/(\d+)/)?.[1];
|
||||
const { session, instanceToken } = options.beforeFetchScenes || await getSession(site, options.parameters);
|
||||
|
||||
const res = await http.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
|
||||
session,
|
||||
interval: options.parameters.interval,
|
||||
concurrency: options.parameters.concurrency,
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
||||
},
|
||||
});
|
||||
|
||||
if (res.status === 200 && res.body.result) {
|
||||
return {
|
||||
scene: scrapeRelease(res.body.result, url, site),
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, { entity, parameters }, include) {
|
||||
// const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
|
||||
const { session, instanceToken } = await getSession(entity, parameters);
|
||||
|
||||
const res = await http.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
|
||||
session,
|
||||
interval: parameters.interval,
|
||||
concurrency: parameters.concurrency,
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
||||
},
|
||||
});
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const actorData = res.body.result.find((actor) => actor.name.toLowerCase() === actorName.toLowerCase());
|
||||
|
||||
if (actorData) {
|
||||
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
|
||||
|
||||
const actorReleasesRes = include.includeActorScenes && await http.get(actorReleasesUrl, {
|
||||
session,
|
||||
interval: parameters.interval,
|
||||
concurrency: parameters.concurrency,
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
},
|
||||
});
|
||||
|
||||
if (actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
|
||||
return scrapeProfile(actorData, entity.slug, actorReleasesRes.body.result);
|
||||
}
|
||||
|
||||
return scrapeProfile(actorData, entity.slug, []);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
beforeNetwork: getSession,
|
||||
beforeFetchScenes: getSession,
|
||||
requireBeforeNetwork: false,
|
||||
scrapeLatestX,
|
||||
fetchLatest,
|
||||
fetchUpcoming,
|
||||
fetchScene: fetchRelease,
|
||||
fetchMovie: fetchRelease,
|
||||
fetchProfile,
|
||||
};
|
||||
Reference in New Issue
Block a user