Switched to tabs. Adding missing actor entries when scraping actors, with batch ID.
This commit is contained in:
@@ -8,246 +8,246 @@ const { get, post } = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
const genderMap = {
|
||||
F: 'female',
|
||||
M: 'male',
|
||||
T: 'transsexual', // not yet observed
|
||||
F: 'female',
|
||||
M: 'male',
|
||||
T: 'transsexual', // not yet observed
|
||||
};
|
||||
|
||||
function getPosterFallbacks(poster) {
|
||||
return poster
|
||||
.filter(image => /landscape/i.test(image.name))
|
||||
.sort((imageA, imageB) => imageB.height - imageA.height)
|
||||
.map((image) => {
|
||||
const sources = [image.src, image.highdpi?.['2x'], image.highdpi?.['3x']];
|
||||
// high DPI images for full HD source are huge, only prefer for smaller fallback sources
|
||||
return image.height === 1080 ? sources : sources.reverse();
|
||||
})
|
||||
.flat();
|
||||
return poster
|
||||
.filter(image => /landscape/i.test(image.name))
|
||||
.sort((imageA, imageB) => imageB.height - imageA.height)
|
||||
.map((image) => {
|
||||
const sources = [image.src, image.highdpi?.['2x'], image.highdpi?.['3x']];
|
||||
// high DPI images for full HD source are huge, only prefer for smaller fallback sources
|
||||
return image.height === 1080 ? sources : sources.reverse();
|
||||
})
|
||||
.flat();
|
||||
}
|
||||
|
||||
function getTeaserFallbacks(teaser) {
|
||||
return teaser
|
||||
.filter(video => /landscape/i.test(video.name))
|
||||
.map(video => ({
|
||||
src: video.src,
|
||||
type: video.type,
|
||||
quality: Number(String(video.height).replace('353', '360')),
|
||||
}));
|
||||
return teaser
|
||||
.filter(video => /landscape/i.test(video.name))
|
||||
.map(video => ({
|
||||
src: video.src,
|
||||
type: video.type,
|
||||
quality: Number(String(video.height).replace('353', '360')),
|
||||
}));
|
||||
}
|
||||
|
||||
function getAvatarFallbacks(avatar) {
|
||||
return avatar
|
||||
.sort((imageA, imageB) => imageB.height - imageA.height)
|
||||
.map(image => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src])
|
||||
.flat();
|
||||
return avatar
|
||||
.sort((imageA, imageB) => imageB.height - imageA.height)
|
||||
.map(image => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src])
|
||||
.flat();
|
||||
}
|
||||
|
||||
async function getTrailer(scene, site, url) {
|
||||
const qualities = [360, 480, 720, 1080, 2160];
|
||||
const qualities = [360, 480, 720, 1080, 2160];
|
||||
|
||||
const tokenRes = await post(`${site.url}/api/__record_tknreq`, {
|
||||
file: scene.previewVideoUrl1080P,
|
||||
sizes: qualities.join('+'),
|
||||
type: 'trailer',
|
||||
}, { referer: url });
|
||||
const tokenRes = await post(`${site.url}/api/__record_tknreq`, {
|
||||
file: scene.previewVideoUrl1080P,
|
||||
sizes: qualities.join('+'),
|
||||
type: 'trailer',
|
||||
}, { referer: url });
|
||||
|
||||
if (!tokenRes.ok) {
|
||||
return null;
|
||||
}
|
||||
if (!tokenRes.ok) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`;
|
||||
const trailersRes = await post(trailerUrl, null, { referer: url });
|
||||
const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`;
|
||||
const trailersRes = await post(trailerUrl, null, { referer: url });
|
||||
|
||||
if (trailersRes.ok) {
|
||||
return qualities.map(quality => (trailersRes.body[quality] ? {
|
||||
src: trailersRes.body[quality].token,
|
||||
quality,
|
||||
} : null)).filter(Boolean);
|
||||
}
|
||||
if (trailersRes.ok) {
|
||||
return qualities.map(quality => (trailersRes.body[quality] ? {
|
||||
src: trailersRes.body[quality].token,
|
||||
quality,
|
||||
} : null)).filter(Boolean);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site, origin) {
|
||||
return scenes.map((scene) => {
|
||||
const release = {};
|
||||
return scenes.map((scene) => {
|
||||
const release = {};
|
||||
|
||||
release.title = scene.title;
|
||||
release.title = scene.title;
|
||||
|
||||
release.entryId = String(scene.newId);
|
||||
release.url = `${site?.url || origin}${scene.targetUrl}`;
|
||||
release.entryId = String(scene.newId);
|
||||
release.url = `${site?.url || origin}${scene.targetUrl}`;
|
||||
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
|
||||
release.actors = scene.models;
|
||||
release.stars = Number(scene.textRating) / 2;
|
||||
release.actors = scene.models;
|
||||
release.stars = Number(scene.textRating) / 2;
|
||||
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeUpcoming(scene, site) {
|
||||
if (!scene || scene.isPreReleasePeriod) return null;
|
||||
if (!scene || scene.isPreReleasePeriod) return null;
|
||||
|
||||
const release = {};
|
||||
const release = {};
|
||||
|
||||
release.title = scene.targetUrl
|
||||
.slice(1)
|
||||
.split('-')
|
||||
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
|
||||
.join(' ');
|
||||
release.title = scene.targetUrl
|
||||
.slice(1)
|
||||
.split('-')
|
||||
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
|
||||
.join(' ');
|
||||
|
||||
release.url = `${site.url}${scene.targetUrl}`;
|
||||
release.url = `${site.url}${scene.targetUrl}`;
|
||||
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
|
||||
release.actors = scene.models;
|
||||
release.actors = scene.models;
|
||||
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
|
||||
release.entryId = (release.poster[0] || release.teaser[0])?.match(/\/(\d+)/)?.[1];
|
||||
release.entryId = (release.poster[0] || release.teaser[0])?.match(/\/(\d+)/)?.[1];
|
||||
|
||||
return [release];
|
||||
return [release];
|
||||
}
|
||||
|
||||
async function scrapeScene(data, url, site, baseRelease) {
|
||||
const scene = data.video;
|
||||
const scene = data.video;
|
||||
|
||||
const release = {
|
||||
url,
|
||||
title: scene.title,
|
||||
description: scene.description,
|
||||
actors: scene.models,
|
||||
director: scene.directorNames,
|
||||
duration: scene.runLength,
|
||||
stars: scene.totalRateVal,
|
||||
tags: scene.tags,
|
||||
};
|
||||
const release = {
|
||||
url,
|
||||
title: scene.title,
|
||||
description: scene.description,
|
||||
actors: scene.models,
|
||||
director: scene.directorNames,
|
||||
duration: scene.runLength,
|
||||
stars: scene.totalRateVal,
|
||||
tags: scene.tags,
|
||||
};
|
||||
|
||||
release.entryId = scene.newId;
|
||||
release.entryId = scene.newId;
|
||||
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
|
||||
release.actors = baseRelease?.actors || scene.models;
|
||||
release.actors = baseRelease?.actors || scene.models;
|
||||
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.photos = data.pictureset.map(photo => photo.main[0].src);
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.photos = data.pictureset.map(photo => photo.main[0].src);
|
||||
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
|
||||
const trailer = await getTrailer(scene, site, url);
|
||||
if (trailer) release.trailer = trailer;
|
||||
const trailer = await getTrailer(scene, site, url);
|
||||
if (trailer) release.trailer = trailer;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchActorReleases(pages, model, origin) {
|
||||
const releasesPerPage = await Promise.map(pages, async (page) => {
|
||||
const url = `${origin}/api${model.targetUrl}?page=${page}`;
|
||||
const res = await get(url);
|
||||
const releasesPerPage = await Promise.map(pages, async (page) => {
|
||||
const url = `${origin}/api${model.targetUrl}?page=${page}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeAll(res.body.data.videos.videos, null, origin);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
return scrapeAll(res.body.data.videos.videos, null, origin);
|
||||
}
|
||||
|
||||
return [];
|
||||
}, { concurrency: 3 });
|
||||
return [];
|
||||
}, { concurrency: 3 });
|
||||
|
||||
return releasesPerPage.flat();
|
||||
return releasesPerPage.flat();
|
||||
}
|
||||
|
||||
async function scrapeProfile(data, origin, withReleases) {
|
||||
const model = data.model;
|
||||
const profile = {};
|
||||
const model = data.model;
|
||||
const profile = {};
|
||||
|
||||
profile.birthdate = new Date(model.dateOfBirth);
|
||||
profile.gender = genderMap[model.sex];
|
||||
profile.birthdate = new Date(model.dateOfBirth);
|
||||
profile.gender = genderMap[model.sex];
|
||||
|
||||
profile.hair = model.hairColour;
|
||||
profile.nationality = model.nationality;
|
||||
profile.hair = model.hairColour;
|
||||
profile.nationality = model.nationality;
|
||||
|
||||
if (model.biography.trim().length > 0) profile.description = model.biography;
|
||||
if (model.biography.trim().length > 0) profile.description = model.biography;
|
||||
|
||||
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
|
||||
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
|
||||
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
|
||||
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
|
||||
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
|
||||
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
|
||||
|
||||
profile.avatar = getAvatarFallbacks(model.images.listing);
|
||||
profile.poster = getAvatarFallbacks(model.images.profile);
|
||||
profile.banner = getAvatarFallbacks(model.images.poster);
|
||||
profile.avatar = getAvatarFallbacks(model.images.listing);
|
||||
profile.poster = getAvatarFallbacks(model.images.profile);
|
||||
profile.banner = getAvatarFallbacks(model.images.poster);
|
||||
|
||||
const releases = scrapeAll(data.videos.videos, null, origin);
|
||||
const releases = scrapeAll(data.videos.videos, null, origin);
|
||||
|
||||
if (withReleases) {
|
||||
const pageCount = Math.ceil(data.videos.count / 6);
|
||||
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
|
||||
if (withReleases) {
|
||||
const pageCount = Math.ceil(data.videos.count / 6);
|
||||
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
|
||||
|
||||
profile.releases = [...releases, ...otherReleases];
|
||||
} else {
|
||||
profile.releases = releases;
|
||||
}
|
||||
profile.releases = [...releases, ...otherReleases];
|
||||
} else {
|
||||
profile.releases = releases;
|
||||
}
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/api/videos?page=${page}`;
|
||||
const res = await get(url);
|
||||
const url = `${site.url}/api/videos?page=${page}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeAll(res.body.data.videos, site);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
return scrapeAll(res.body.data.videos, site);
|
||||
}
|
||||
|
||||
return res.code;
|
||||
return res.code;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
const apiUrl = `${site.url}/api`;
|
||||
const res = await get(apiUrl);
|
||||
const apiUrl = `${site.url}/api`;
|
||||
const res = await get(apiUrl);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeUpcoming(res.body.data.nextScene, site);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
return scrapeUpcoming(res.body.data.nextScene, site);
|
||||
}
|
||||
|
||||
return res.code;
|
||||
return res.code;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, baseRelease) {
|
||||
const { origin, pathname } = new URL(url);
|
||||
const apiUrl = `${origin}/api${pathname}`;
|
||||
const { origin, pathname } = new URL(url);
|
||||
const apiUrl = `${origin}/api${pathname}`;
|
||||
|
||||
const res = await get(apiUrl);
|
||||
const res = await get(apiUrl);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeScene(res.body.data, url, site, baseRelease);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
return scrapeScene(res.body.data, url, site, baseRelease);
|
||||
}
|
||||
|
||||
return res.code;
|
||||
return res.code;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug, site, include) {
|
||||
const origin = `https://www.${scraperSlug}.com`;
|
||||
const actorSlug = slugify(actorName);
|
||||
const url = `${origin}/api/${actorSlug}`;
|
||||
const res = await get(url);
|
||||
const origin = `https://www.${scraperSlug}.com`;
|
||||
const actorSlug = slugify(actorName);
|
||||
const url = `${origin}/api/${actorSlug}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeProfile(res.body.data, origin, include.scenes);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
return scrapeProfile(res.body.data, origin, include.scenes);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user