Fixed Aylo session acquire, migrated to unprint. Fixed Jules Jordan profile test.
This commit is contained in:
@@ -1,15 +1,10 @@
|
|||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
/* eslint-disable newline-per-chained-call */
|
/* eslint-disable newline-per-chained-call */
|
||||||
const Promise = require('bluebird');
|
|
||||||
const { CookieJar } = Promise.promisifyAll(require('tough-cookie'));
|
|
||||||
const cookie = require('cookie');
|
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
// const unprint = require('unprint');
|
const unprint = require('unprint');
|
||||||
|
|
||||||
const qu = require('../utils/qu');
|
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
const http = require('../utils/http');
|
|
||||||
const { inchesToCm, lbsToKg } = require('../utils/convert');
|
const { inchesToCm, lbsToKg } = require('../utils/convert');
|
||||||
|
|
||||||
function getBasePath(parameters, channel, path = '/scene') {
|
function getBasePath(parameters, channel, path = '/scene') {
|
||||||
@@ -126,6 +121,119 @@ async function scrapeLatest(items, site, filterChannel, options) {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getUrl(site) {
|
||||||
|
const { searchParams, pathname } = new URL(site.url);
|
||||||
|
|
||||||
|
// if (search.match(/\?site=\d+/)) {
|
||||||
|
if (searchParams.has('site') || /\/site\/\d+/.test(pathname)) {
|
||||||
|
return site.url;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (site.parameters?.native) {
|
||||||
|
return `${site.url}/scenes`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (site.parameters?.extract) {
|
||||||
|
return `${site.url}/scenes`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (site.parameters?.siteId) {
|
||||||
|
return `${site.parent.url}/scenes?site=${site.parameters.siteId}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`Aylo site '${site.name}' (${site.url}) not supported`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getSession(site, _parameters, url) {
|
||||||
|
// if (site.slug === 'aylo' || site.parameters?.parentSession === false) {
|
||||||
|
if (site.slug === 'aylo') {
|
||||||
|
// most MG sites have a parent network to acquire a session from, don't try to acquire session from mindgeek.com for independent channels
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const sessionUrl = site.parameters?.siteId && !(site.parameters?.native || site.parameters?.childSession || site.parent?.parameters?.childSession || site.parameters?.parentSession === false)
|
||||||
|
? site.parent.url
|
||||||
|
: (url || site.url);
|
||||||
|
|
||||||
|
const res = await unprint.get(sessionUrl, {
|
||||||
|
headers: {
|
||||||
|
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
||||||
|
Connection: 'keep-alive',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res.status === 200) {
|
||||||
|
const instanceToken = res.cookies.instance_token;
|
||||||
|
|
||||||
|
if (instanceToken) {
|
||||||
|
return { instanceToken };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`Failed to acquire Aylo session (${res.statusCode})`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchLatest(site, page = 1, options) {
|
||||||
|
const url = getUrl(site);
|
||||||
|
const { searchParams, pathname } = new URL(url);
|
||||||
|
const siteId = searchParams.get('site') || Number(pathname.match(/\/site\/(\d+)\//)?.[1]);
|
||||||
|
|
||||||
|
if (!siteId && !site.parameters?.native && !site.parameters?.extract) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const { instanceToken } = options.beforeNetwork?.instanceToken && !(options.parameters?.native || options.parameters?.childSession || options.parameters?.parentSession === false)
|
||||||
|
? options.beforeNetwork
|
||||||
|
: await getSession(site, options.parameters, url);
|
||||||
|
|
||||||
|
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
|
||||||
|
const limit = 24;
|
||||||
|
const apiUrl = site.parameters?.native || site.parameters?.extract
|
||||||
|
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
|
||||||
|
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
|
||||||
|
|
||||||
|
const res = await unprint.get(apiUrl, {
|
||||||
|
interval: options.parameters.interval,
|
||||||
|
concurrency: options.parameters.concurrency,
|
||||||
|
headers: {
|
||||||
|
Instance: instanceToken,
|
||||||
|
Origin: site.url,
|
||||||
|
Referer: url,
|
||||||
|
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res.status === 200 && res.data.result) {
|
||||||
|
return scrapeLatest(res.data.result, site, false, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchUpcoming(site, _page, options) {
|
||||||
|
const url = getUrl(site);
|
||||||
|
const { instanceToken } = await getSession(site, options.parameters);
|
||||||
|
|
||||||
|
const apiUrl = 'https://site-api.project1service.com/v2/upcoming-releases';
|
||||||
|
|
||||||
|
const res = await unprint.get(apiUrl, {
|
||||||
|
interval: options.parameters.interval,
|
||||||
|
concurrency: options.parameters.concurrency,
|
||||||
|
headers: {
|
||||||
|
Instance: instanceToken,
|
||||||
|
Origin: site.url,
|
||||||
|
Referer: url,
|
||||||
|
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res.status === 200 && res.data.result) {
|
||||||
|
return scrapeLatest(res.data.result, site, true, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
function scrapeRelease(data, url, channel, networkName, options) {
|
function scrapeRelease(data, url, channel, networkName, options) {
|
||||||
if (Array.isArray(data)) {
|
if (Array.isArray(data)) {
|
||||||
return null;
|
return null;
|
||||||
@@ -192,139 +300,6 @@ function scrapeRelease(data, url, channel, networkName, options) {
|
|||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
function getUrl(site) {
|
|
||||||
const { searchParams, pathname } = new URL(site.url);
|
|
||||||
|
|
||||||
// if (search.match(/\?site=\d+/)) {
|
|
||||||
if (searchParams.has('site') || /\/site\/\d+/.test(pathname)) {
|
|
||||||
return site.url;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (site.parameters?.native) {
|
|
||||||
return `${site.url}/scenes`;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (site.parameters?.extract) {
|
|
||||||
return `${site.url}/scenes`;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (site.parameters?.siteId) {
|
|
||||||
return `${site.parent.url}/scenes?site=${site.parameters.siteId}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new Error(`Aylo site '${site.name}' (${site.url}) not supported`);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function getSession(site, parameters, url) {
|
|
||||||
// if (site.slug === 'aylo' || site.parameters?.parentSession === false) {
|
|
||||||
if (site.slug === 'aylo') {
|
|
||||||
// most MG sites have a parent network to acquire a session from, don't try to acquire session from mindgeek.com for independent channels
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
const cookieJar = new CookieJar();
|
|
||||||
const session = http.session({ cookieJar });
|
|
||||||
|
|
||||||
const sessionUrl = site.parameters?.siteId && !(site.parameters?.native || site.parameters?.childSession || site.parent?.parameters?.childSession || site.parameters?.parentSession === false)
|
|
||||||
? site.parent.url
|
|
||||||
: (url || site.url);
|
|
||||||
|
|
||||||
/*
|
|
||||||
await unprint.browserRequest(sessionUrl, {
|
|
||||||
browser: {
|
|
||||||
headless: false,
|
|
||||||
},
|
|
||||||
async control() {
|
|
||||||
await new Promise((resolve) => { setTimeout(() => resolve(), 10000); });
|
|
||||||
},
|
|
||||||
});
|
|
||||||
*/
|
|
||||||
|
|
||||||
const res = await http.get(sessionUrl, {
|
|
||||||
session,
|
|
||||||
headers: {
|
|
||||||
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
|
||||||
Connection: 'keep-alive',
|
|
||||||
},
|
|
||||||
interval: parameters?.interval,
|
|
||||||
concurrency: parameters?.concurrency,
|
|
||||||
parse: false,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (res.status === 200) {
|
|
||||||
const cookieString = await cookieJar.getCookieStringAsync(sessionUrl);
|
|
||||||
const { instance_token: instanceToken } = cookie.parse(cookieString);
|
|
||||||
|
|
||||||
if (instanceToken) {
|
|
||||||
return { session, instanceToken };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new Error(`Failed to acquire Aylo session (${res.statusCode})`);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1, options) {
|
|
||||||
const url = getUrl(site);
|
|
||||||
const { searchParams, pathname } = new URL(url);
|
|
||||||
const siteId = searchParams.get('site') || Number(pathname.match(/\/site\/(\d+)\//)?.[1]);
|
|
||||||
|
|
||||||
if (!siteId && !site.parameters?.native && !site.parameters?.extract) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
const { instanceToken } = options.beforeNetwork?.instanceToken && !(options.parameters?.native || options.parameters?.childSession || options.parameters?.parentSession === false)
|
|
||||||
? options.beforeNetwork
|
|
||||||
: await getSession(site, options.parameters, url);
|
|
||||||
|
|
||||||
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
|
|
||||||
const limit = 24;
|
|
||||||
const apiUrl = site.parameters?.native || site.parameters?.extract
|
|
||||||
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
|
|
||||||
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
|
|
||||||
|
|
||||||
const res = await http.get(apiUrl, {
|
|
||||||
interval: options.parameters.interval,
|
|
||||||
concurrency: options.parameters.concurrency,
|
|
||||||
headers: {
|
|
||||||
Instance: instanceToken,
|
|
||||||
Origin: site.url,
|
|
||||||
Referer: url,
|
|
||||||
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
if (res.status === 200 && res.body.result) {
|
|
||||||
return scrapeLatest(res.body.result, site, false, options);
|
|
||||||
}
|
|
||||||
|
|
||||||
return res.statusCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchUpcoming(site, page, options) {
|
|
||||||
const url = getUrl(site);
|
|
||||||
const { session, instanceToken } = await getSession(site, options.parameters);
|
|
||||||
|
|
||||||
const apiUrl = 'https://site-api.project1service.com/v2/upcoming-releases';
|
|
||||||
|
|
||||||
const res = await http.get(apiUrl, {
|
|
||||||
session,
|
|
||||||
interval: options.parameters.interval,
|
|
||||||
concurrency: options.parameters.concurrency,
|
|
||||||
headers: {
|
|
||||||
Instance: instanceToken,
|
|
||||||
Origin: site.url,
|
|
||||||
Referer: url,
|
|
||||||
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
if (res.statusCode === 200 && res.body.result) {
|
|
||||||
return scrapeLatest(res.body.result, site, true, options);
|
|
||||||
}
|
|
||||||
|
|
||||||
return res.statusCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchRelease(url, site, baseScene, options) {
|
async function fetchRelease(url, site, baseScene, options) {
|
||||||
if (baseScene?.entryId && !baseScene.shallow && !options.parameters.forceDeep) {
|
if (baseScene?.entryId && !baseScene.shallow && !options.parameters.forceDeep) {
|
||||||
// overview and deep data is the same, don't hit server unnecessarily
|
// overview and deep data is the same, don't hit server unnecessarily
|
||||||
@@ -332,10 +307,9 @@ async function fetchRelease(url, site, baseScene, options) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const entryId = new URL(url).pathname.match(/\/(\d+)/)?.[1];
|
const entryId = new URL(url).pathname.match(/\/(\d+)/)?.[1];
|
||||||
const { session, instanceToken } = options.beforeFetchScenes || await getSession(site, options.parameters);
|
const { instanceToken } = options.beforeFetchScenes || await getSession(site, options.parameters);
|
||||||
|
|
||||||
const res = await http.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
|
const res = await unprint.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
|
||||||
session,
|
|
||||||
interval: options.parameters.interval,
|
interval: options.parameters.interval,
|
||||||
concurrency: options.parameters.concurrency,
|
concurrency: options.parameters.concurrency,
|
||||||
headers: {
|
headers: {
|
||||||
@@ -344,16 +318,16 @@ async function fetchRelease(url, site, baseScene, options) {
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
if (res.status === 200 && res.body.result) {
|
if (res.status === 200 && res.data.result) {
|
||||||
return {
|
return {
|
||||||
scene: scrapeRelease(res.body.result, url, site, null, options),
|
scene: scrapeRelease(res.data.result, url, site, null, options),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeProfile(data, networkName, _releases = []) {
|
function scrapeProfile(data, _networkName, _releases = []) {
|
||||||
const profile = {
|
const profile = {
|
||||||
description: data.bio,
|
description: data.bio,
|
||||||
aliases: data.aliases.filter(Boolean),
|
aliases: data.aliases.filter(Boolean),
|
||||||
@@ -367,7 +341,7 @@ function scrapeProfile(data, networkName, _releases = []) {
|
|||||||
profile.measurements = data.measurements;
|
profile.measurements = data.measurements;
|
||||||
}
|
}
|
||||||
|
|
||||||
profile.dateOfBirth = qu.parseDate(data.birthday);
|
profile.dateOfBirth = unprint.extractDate(data.birthday);
|
||||||
profile.birthPlace = data.birthPlace;
|
profile.birthPlace = data.birthPlace;
|
||||||
profile.height = inchesToCm(data.height);
|
profile.height = inchesToCm(data.height);
|
||||||
profile.weight = lbsToKg(data.weight);
|
profile.weight = lbsToKg(data.weight);
|
||||||
@@ -406,10 +380,9 @@ function scrapeProfile(data, networkName, _releases = []) {
|
|||||||
|
|
||||||
async function fetchProfile({ name: actorName }, { entity, parameters }, include) {
|
async function fetchProfile({ name: actorName }, { entity, parameters }, include) {
|
||||||
// const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
|
// const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
|
||||||
const { session, instanceToken } = await getSession(entity, parameters);
|
const { instanceToken } = await getSession(entity, parameters);
|
||||||
|
|
||||||
const res = await http.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
|
const res = await unprint.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
|
||||||
session,
|
|
||||||
interval: parameters.interval,
|
interval: parameters.interval,
|
||||||
concurrency: parameters.concurrency,
|
concurrency: parameters.concurrency,
|
||||||
headers: {
|
headers: {
|
||||||
@@ -418,14 +391,13 @@ async function fetchProfile({ name: actorName }, { entity, parameters }, include
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.status === 200) {
|
||||||
const actorData = res.body.result.find((actor) => actor.name.toLowerCase() === actorName.toLowerCase());
|
const actorData = res.data.result.find((actor) => actor.name.toLowerCase() === actorName.toLowerCase());
|
||||||
|
|
||||||
if (actorData) {
|
if (actorData) {
|
||||||
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
|
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
|
||||||
|
|
||||||
const actorReleasesRes = include.includeActorScenes && await http.get(actorReleasesUrl, {
|
const actorReleasesRes = include.includeActorScenes && await unprint.get(actorReleasesUrl, {
|
||||||
session,
|
|
||||||
interval: parameters.interval,
|
interval: parameters.interval,
|
||||||
concurrency: parameters.concurrency,
|
concurrency: parameters.concurrency,
|
||||||
headers: {
|
headers: {
|
||||||
@@ -433,8 +405,8 @@ async function fetchProfile({ name: actorName }, { entity, parameters }, include
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
if (actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
|
if (actorReleasesRes.status === 200 && actorReleasesRes.data.result) {
|
||||||
return scrapeProfile(actorData, entity.slug, actorReleasesRes.body.result);
|
return scrapeProfile(actorData, entity.slug, actorReleasesRes.data.result);
|
||||||
}
|
}
|
||||||
|
|
||||||
return scrapeProfile(actorData, entity.slug, []);
|
return scrapeProfile(actorData, entity.slug, []);
|
||||||
|
|||||||
@@ -213,7 +213,7 @@ const actors = [
|
|||||||
{ entity: 'naughtyamerica', name: 'Nicole Aniston', fields: ['avatar', 'description'] },
|
{ entity: 'naughtyamerica', name: 'Nicole Aniston', fields: ['avatar', 'description'] },
|
||||||
{ entity: 'tonightsgirlfriend', name: 'Abella Danger', fields: ['avatar'] },
|
{ entity: 'tonightsgirlfriend', name: 'Abella Danger', fields: ['avatar'] },
|
||||||
// jules jordan scraper
|
// jules jordan scraper
|
||||||
{ entity: 'julesjordan', name: 'Vanna Bardot', fields: ['height', 'dateOfBirth', 'measurements', 'description', 'avatar'] },
|
{ entity: 'julesjordan', name: 'Vanna Bardot', fields: ['height', 'dateOfBirth', 'measurements', 'avatar'] },
|
||||||
{ entity: 'amateurallure', name: 'Ava Amira', fields: ['avatar', 'description'] },
|
{ entity: 'amateurallure', name: 'Ava Amira', fields: ['avatar', 'description'] },
|
||||||
{ entity: 'swallowsalon', name: 'Abella Danger', fields: ['avatar'] },
|
{ entity: 'swallowsalon', name: 'Abella Danger', fields: ['avatar'] },
|
||||||
// exploitedx
|
// exploitedx
|
||||||
|
|||||||
Reference in New Issue
Block a user