forked from DebaucheryLibrarian/traxxx
Passing recursive parameters to all scraper methods. Using throttle parameters in MindGeek scraper, fixed missing slug breaking scene and actor URLs.
This commit is contained in:
@@ -68,7 +68,7 @@ function scrapeLatestX(data, site, filterChannel) {
|
||||
|| (site.parameters?.native && `${site.url}/scene`)
|
||||
|| `${site.parent.url}/scene`;
|
||||
|
||||
release.url = `${basepath}/${release.entryId}/`;
|
||||
release.url = `${basepath}/${release.entryId}/${slugify(release.title)}`;
|
||||
release.date = new Date(data.dateReleased);
|
||||
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
|
||||
|
||||
@@ -143,7 +143,7 @@ function getUrl(site) {
|
||||
throw new Error(`Mind Geek site '${site.name}' (${site.url}) not supported`);
|
||||
}
|
||||
|
||||
async function getSession(site) {
|
||||
async function getSession(site, parameters) {
|
||||
const cookieJar = new CookieJar();
|
||||
const session = http.session({ cookieJar });
|
||||
|
||||
@@ -152,7 +152,11 @@ async function getSession(site) {
|
||||
? site.parent.url
|
||||
: site.url;
|
||||
|
||||
const res = await http.get(sessionUrl, { session });
|
||||
const res = await http.get(sessionUrl, {
|
||||
session,
|
||||
interval: parameters?.interval,
|
||||
concurrency: parameters?.concurrency,
|
||||
});
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const cookieString = await cookieJar.getCookieStringAsync(sessionUrl);
|
||||
@@ -212,12 +216,12 @@ function scrapeProfile(data, html, releases = [], networkName) {
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
async function fetchLatest(site, page = 1, options) {
|
||||
const url = getUrl(site);
|
||||
const { searchParams } = new URL(url);
|
||||
const siteId = searchParams.get('site');
|
||||
|
||||
const { session, instanceToken } = await getSession(site);
|
||||
const { session, instanceToken } = await getSession(site, options.parameters);
|
||||
|
||||
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
|
||||
const limit = 10;
|
||||
@@ -227,6 +231,8 @@ async function fetchLatest(site, page = 1) {
|
||||
|
||||
const res = await http.get(apiUrl, {
|
||||
session,
|
||||
interval: options.parameters.interval,
|
||||
concurrency: options.parameters.concurrency,
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
Origin: site.url,
|
||||
@@ -241,14 +247,16 @@ async function fetchLatest(site, page = 1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
async function fetchUpcoming(site, page, options) {
|
||||
const url = getUrl(site);
|
||||
const { session, instanceToken } = await getSession(site);
|
||||
const { session, instanceToken } = await getSession(site, options.parameters);
|
||||
|
||||
const apiUrl = 'https://site-api.project1service.com/v2/upcoming-releases';
|
||||
|
||||
const res = await http.get(apiUrl, {
|
||||
session,
|
||||
interval: options.parameters.interval,
|
||||
concurrency: options.parameters.concurrency,
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
Origin: site.url,
|
||||
@@ -263,17 +271,19 @@ async function fetchUpcoming(site) {
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, baseScene) {
|
||||
async function fetchScene(url, site, baseScene, options) {
|
||||
if (baseScene?.entryId) {
|
||||
// overview and deep data is the same, don't hit server unnecessarily
|
||||
return baseScene;
|
||||
}
|
||||
|
||||
const entryId = url.match(/\d+/)[0];
|
||||
const { session, instanceToken } = await getSession(site);
|
||||
const { session, instanceToken } = await getSession(site, options.parameters);
|
||||
|
||||
const res = await http.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
|
||||
session,
|
||||
interval: options.parameters.interval,
|
||||
concurrency: options.parameters.concurrency,
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
},
|
||||
@@ -286,12 +296,14 @@ async function fetchScene(url, site, baseScene) {
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, { entity }) {
|
||||
async function fetchProfile({ name: actorName, slug: actorSlug }, { entity, parameters }) {
|
||||
// const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
|
||||
const { session, instanceToken } = await getSession(entity);
|
||||
const { session, instanceToken } = await getSession(entity, parameters);
|
||||
|
||||
const res = await http.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
|
||||
session,
|
||||
interval: parameters.interval,
|
||||
concurrency: parameters.concurrency,
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
},
|
||||
@@ -301,13 +313,18 @@ async function fetchProfile({ name: actorName }, { entity }) {
|
||||
const actorData = res.body.result.find(actor => actor.name.toLowerCase() === actorName.toLowerCase());
|
||||
|
||||
if (actorData) {
|
||||
const actorUrl = `https://www.${entity.slug}.com/${entity.parameters?.actorPath || 'model'}/${actorData.id}/`;
|
||||
const actorUrl = `https://www.${entity.slug}.com/${entity.parameters?.actorPath || 'model'}/${actorData.id}/${actorSlug}`;
|
||||
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
|
||||
|
||||
const [actorRes, actorReleasesRes] = await Promise.all([
|
||||
http.get(actorUrl),
|
||||
http.get(actorUrl, {
|
||||
interval: parameters.interval,
|
||||
concurrency: parameters.concurrency,
|
||||
}),
|
||||
http.get(actorReleasesUrl, {
|
||||
session,
|
||||
interval: parameters.interval,
|
||||
concurrency: parameters.concurrency,
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user