forked from DebaucheryLibrarian/traxxx
888 lines
26 KiB
JavaScript
Executable File
888 lines
26 KiB
JavaScript
Executable File
'use strict';
|
|
|
|
const unprint = require('unprint');
|
|
|
|
const slugify = require('../utils/slugify');
|
|
|
|
const tagsMap = {
|
|
'body bukkake': ['bukkake'],
|
|
'creampie gangbang': ['gangbang', 'creampie'],
|
|
'cum handjob': ['handjob'],
|
|
'facial bukkake': ['facial', 'bukkake'],
|
|
'massive creampie': ['creampie'],
|
|
'massive cum handjob': ['handjob'],
|
|
'panty cum': ['cum-in-panty'],
|
|
'pussy bukkake': ['cum-on-pussy'],
|
|
};
|
|
|
|
function entryIdFromMedia(release, toLowercase = true) {
|
|
const originalEntryId = [release.poster, release.trailer, ...(release.photos || [])].flat().filter(Boolean)[0]?.match(/(?:(?:preview)|(?:samples)|(?:tour))\/(.*)\//)?.[1];
|
|
|
|
if (!originalEntryId) {
|
|
return null;
|
|
}
|
|
|
|
if (toLowercase) {
|
|
return originalEntryId.toLowerCase();
|
|
}
|
|
|
|
return originalEntryId;
|
|
}
|
|
|
|
function scrapeAll(scenes, tilesByEntryId, channel) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
|
|
// release.url = query.url('.title a');
|
|
|
|
release.title = query.content('.sample-title');
|
|
|
|
// release.date = query.date('.date', 'MMM DD, YYYY');
|
|
release.duration = query.duration('//div[contains(text(), "Runtime")]');
|
|
|
|
release.actors = query.all('a[href*="actress/"]').map((actorEl) => ({ // actors can be only in title or dedicated field
|
|
name: unprint.query.content(actorEl),
|
|
url: unprint.query.url(actorEl, null, { origin: channel.url }),
|
|
}));
|
|
|
|
release.tags = tagsMap[query.content('a[href*="type/"]')?.toLowerCase()];
|
|
|
|
const posterBackground = query.style('.player');
|
|
|
|
if (posterBackground?.background) {
|
|
release.poster = posterBackground.background.match(/url\((.*)\)/)?.[1]?.trim();
|
|
}
|
|
|
|
release.photos = query.all('.sample-thumbs .thumb a').map((linkEl) => [
|
|
unprint.query.url(linkEl, null),
|
|
unprint.query.img(linkEl, 'img'),
|
|
].filter((src) => !src.includes('join')));
|
|
|
|
release.trailer = query.video('.player source');
|
|
|
|
release.photoCount = query.number('//div[contains(text(), "Photos")]');
|
|
release.cumshots = query.number('//div[contains(text(), "Cumshots")]');
|
|
|
|
release.entryId = entryIdFromMedia(release);
|
|
|
|
const tile = tilesByEntryId[release.entryId];
|
|
|
|
if (tile) {
|
|
Object.entries(tile).forEach(([key, value]) => {
|
|
if (!Object.hasOwn(release, key)) {
|
|
release[key] = value;
|
|
}
|
|
});
|
|
} else {
|
|
// most tiles are on the front page, but not all, deep scrape actor's page
|
|
release.path = release.actors[0]?.url;
|
|
}
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
// page has no container divs, select all following siblings until the 'join' link indicating the end of the block
|
|
function composeBlock(element, init = true, acc = '') {
|
|
const newAcc = `${acc}${element.outerHTML}`;
|
|
|
|
// image albums also contain a join link, make sure not to select that one
|
|
if (element.nextElementSibling.className.includes('join')
|
|
|| !!element.nextElementSibling.querySelector('.item-join, .join-link')
|
|
|| !!element.nextElementSibling.querySelector('h2 a[href*="join"]')
|
|
) {
|
|
if (init) {
|
|
return unprint.init(newAcc);
|
|
}
|
|
|
|
return newAcc;
|
|
}
|
|
|
|
return composeBlock(element.nextElementSibling, init, newAcc);
|
|
}
|
|
|
|
// used for both SpermMania and Fellation Japan, but different layouts
|
|
function scrapeAllTiles(tiles, channel) {
|
|
return tiles.map(({ query }) => {
|
|
const release = {};
|
|
const sceneString = query.content();
|
|
|
|
release.title = query.content('.scene-title');
|
|
|
|
release.date = query.date('.scene-date, .sDate', 'YYYY-MM-DD');
|
|
release.duration = query.duration('.data.orange') || unprint.extractDuration(sceneString.match(/([\d:]+)\s*min/)?.[1]);
|
|
|
|
release.actors = query.all('a[href*="actress/"], .sGirl a').map((actorEl) => ({ // actors can be only in title or dedicated field
|
|
name: unprint.query.content(actorEl),
|
|
url: channel.slug === 'fellatiojapan'
|
|
? `${channel.url}/en/girl/${unprint.query.url(actorEl, null)}`
|
|
: unprint.query.url(actorEl, null, { origin: channel.url }),
|
|
}));
|
|
|
|
release.tags = [...query.contents('.data a[href*="/tag"]'), ...(tagsMap[query.content('.scene-type')?.toLowerCase()] || [])].filter(Boolean);
|
|
|
|
const posterBackground = query.style('.scene-img');
|
|
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
|
|
|
|
if (posterUrl) {
|
|
release.poster = [
|
|
posterUrl
|
|
.replace('-sm', '-lg')
|
|
.replace('-med', '-lg'),
|
|
posterUrl.replace('-sm', '-med'),
|
|
posterUrl,
|
|
];
|
|
}
|
|
|
|
const originalEntryId = query.attribute('.scene-hover', 'data-path') || entryIdFromMedia(release, false);
|
|
|
|
release.entryId = originalEntryId?.toLowerCase();
|
|
|
|
release.teaser = originalEntryId && `https://img.${channel.slug}.com/preview/${originalEntryId}/hover.mp4`;
|
|
|
|
release.photoCount = Number(sceneString.match(/(\d+) photos/)?.[1]) || null;
|
|
release.cumshots = Number(sceneString.match(/(\d+) cumshots/)?.[1]) || null;
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
// Sperm Mania
|
|
async function fetchLatestTiles(channel) {
|
|
const res = await unprint.get(`${channel.url}/tour`, { selectAll: '.scene' });
|
|
|
|
if (res.ok) {
|
|
const tiles = scrapeAllTiles(res.context, channel);
|
|
|
|
return Object.fromEntries(tiles.map((tile) => [tile.entryId, tile]));
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
// SpermMania, sample feed with limited info
|
|
async function fetchLatest(channel, page = 1) {
|
|
const url = `${channel.url}/samples?page=${page}`;
|
|
|
|
const [res, tilesByEntryId] = await Promise.all([
|
|
unprint.get(url, { selectAll: '.sample-title, .item-title' }),
|
|
fetchLatestTiles(channel),
|
|
]);
|
|
|
|
if (res.ok) {
|
|
const expandedContext = res.context.map(({ element }) => composeBlock(element));
|
|
|
|
return scrapeAll(expandedContext, tilesByEntryId, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function scrapeAllCospuri(scenes, channel) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
|
|
release.url = query.url('.scene-thumb a');
|
|
release.entryId = new URL(release.url).searchParams.get('id')
|
|
|| new URL(release.url).pathname.match(/\/sample\/(.*)\//)[1];
|
|
|
|
release.title = query.content('.title');
|
|
|
|
release.date = query.date('.date', 'YYYY・MM・DD', { match: /\d{4}・\d{2}・\d{2}/ });
|
|
release.duration = query.duration('.length');
|
|
release.photoCount = query.number('.photos');
|
|
|
|
release.actors = query.all('.model a[href*="/model"]').map((actorEl) => ({
|
|
name: unprint.query.content(actorEl),
|
|
url: unprint.query.url(actorEl, null, { origin: channel.url }),
|
|
}));
|
|
|
|
release.tags = [...query.contents('.tags .tag, .tag-box .tag'), query.content('.model .channel')].filter(Boolean);
|
|
|
|
const posterBackground = query.style('.scene-thumb');
|
|
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
|
|
|
|
if (posterUrl) {
|
|
release.poster = [
|
|
posterUrl
|
|
.replace('-med', '-lg')
|
|
.replace('-sm', '-lg'),
|
|
posterUrl.replace('-sm', '-med'),
|
|
posterUrl,
|
|
];
|
|
}
|
|
|
|
release.teaser = query.video('.scene-hover', { attribute: 'data-path' });
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
// Cospuri, Cute Butts, paginated sample tiles with full info
|
|
async function fetchLatestCospuri(channel, page) {
|
|
const url = `${channel.url}/samples?page=${page}`;
|
|
|
|
const res = await unprint.get(url, { selectAll: '.scene' });
|
|
|
|
if (res.ok) {
|
|
return scrapeAllCospuri(res.context, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function curatePhotos(sources) {
|
|
if (!sources) {
|
|
return null;
|
|
}
|
|
|
|
return sources
|
|
.filter(Boolean).map((src) => [
|
|
src.replace(/(\d+)s.jpg/, (match, photoIndex) => `${photoIndex}.jpg`),
|
|
src,
|
|
].map((url) => ({
|
|
src: url,
|
|
followRedirects: false,
|
|
})));
|
|
}
|
|
|
|
function scrapeAllFellatio(scenes, channel) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
|
|
release.duration = query.duration('.tour-data');
|
|
release.photoCount = query.number('.tour-data', { match: /(\d+) photos/, matchIndex: 1 });
|
|
|
|
release.actors = query.all('.tour-data a[href*="girl/"]').map((actorEl) => ({
|
|
name: unprint.query.content(actorEl),
|
|
url: `${channel.url}/en/${unprint.query.url(actorEl, null)}`,
|
|
}));
|
|
|
|
release.tags = query.contents('.tour-data a[href*="tag/"]');
|
|
|
|
const posterBackground = query.style('.player');
|
|
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
|
|
|
|
if (posterUrl) {
|
|
release.poster = posterUrl;
|
|
}
|
|
|
|
release.photos = curatePhotos(query.imgs('.tour-thumb img'));
|
|
release.trailer = query.video();
|
|
|
|
release.entryId = entryIdFromMedia(release);
|
|
release.path = release.actors[0]?.url;
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
// Fellatio Japan
|
|
async function fetchLatestFellatio(channel, page) {
|
|
const url = `${channel.url}/en/samples/?page=${page}`;
|
|
const res = await unprint.get(url, { selectAll: '.tour-data' });
|
|
|
|
if (res.ok) {
|
|
const expandedContext = res.context.map(({ element }) => composeBlock(element));
|
|
|
|
return scrapeAllFellatio(expandedContext, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function scrapeAllHandjob(scenes, _channel) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
|
|
release.title = query.content('.blurb');
|
|
|
|
release.duration = query.duration('.item-rtitle');
|
|
release.photoCount = query.number('//h3[contains(text(), "Scene Photos")]/strong');
|
|
|
|
release.actors = query.text('.item-ltitle h1')?.split(/,\s*/).map((actor) => actor.trim());
|
|
|
|
const posterBackground = query.style('.player');
|
|
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
|
|
|
|
if (posterUrl) {
|
|
release.poster = posterUrl;
|
|
}
|
|
|
|
release.photos = curatePhotos(query.imgs('img.thumb, img.rthumb'));
|
|
release.trailer = query.video();
|
|
|
|
release.entryId = entryIdFromMedia(release);
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
// Handjob Japan
|
|
async function fetchLatestHandjob(channel, page) {
|
|
const url = `${channel.url}/en/samples/?page=${page}`;
|
|
const res = await unprint.get(url, { selectAll: '.item-title' });
|
|
|
|
if (res.ok) {
|
|
const expandedContext = res.context.map(({ element }) => composeBlock(element));
|
|
|
|
return scrapeAllHandjob(expandedContext, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function scrapeAllLegs(scenes, channel) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
|
|
release.title = query.content('.tContent h3 strong');
|
|
|
|
release.duration = query.duration('//h3[contains(text(), "length")]/strong');
|
|
release.photoCount = query.number('//h3[contains(text(), "photos")]/strong');
|
|
|
|
release.actors = query.all('.tContent a[href*="girl/"]').map((actorEl) => ({
|
|
name: unprint.query.content(actorEl),
|
|
url: `${channel.url}/en/${unprint.query.url(actorEl, null)}`,
|
|
}));
|
|
|
|
release.tags = query.contents('a[href*="tag/"]');
|
|
|
|
const posterBackground = query.style('.player');
|
|
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
|
|
|
|
if (posterUrl) {
|
|
release.poster = posterUrl;
|
|
}
|
|
|
|
release.photos = curatePhotos(query.imgs('.tThumbs img'));
|
|
release.trailer = query.video();
|
|
|
|
release.entryId = entryIdFromMedia(release);
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
// Legs Japan
|
|
async function fetchLatestLegs(channel, page) {
|
|
const url = `${channel.url}/en/samples/?page=${page}`;
|
|
const res = await unprint.get(url, { selectAll: '.player' });
|
|
|
|
if (res.ok) {
|
|
const expandedContext = res.context.map(({ element }) => composeBlock(element));
|
|
|
|
return scrapeAllLegs(expandedContext, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function scrapeAllFacefuck(scenes) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
|
|
release.description = query.content('.infotxt');
|
|
release.actors = query.content('.info h1').split(',').map((actor) => actor.trim());
|
|
|
|
const posterBackground = query.style('.player');
|
|
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
|
|
|
|
if (posterUrl) {
|
|
release.poster = posterUrl;
|
|
}
|
|
|
|
release.photos = curatePhotos(query.imgs('.thumb img'));
|
|
release.trailer = query.video();
|
|
|
|
release.entryId = entryIdFromMedia(release);
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
// Tokyo Facefuck
|
|
async function fetchLatestFacefuck(channel, page) {
|
|
const url = `${channel.url}/en/?page=${page}`;
|
|
const res = await unprint.get(url, { selectAll: '.girl.box' });
|
|
|
|
if (res.ok) {
|
|
return scrapeAllFacefuck(res.context, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function scrapeAllTrans(scenes) {
|
|
return scenes.map(([{ query }, albumContext]) => {
|
|
const release = {};
|
|
|
|
release.title = query.content('.sample-info h1');
|
|
release.actors = query.content('.sample-info a strong').split(',').map((actor) => actor.trim());
|
|
|
|
release.description = query.content('.sample-desc')?.replace('""', '') || null; // usually empty, but let's try it just in case
|
|
|
|
release.duration = query.duration('.sample-info');
|
|
release.photoCount = albumContext?.query.number('.sample-info', { match: /(\d+) photos/i, matchIndex: 1 });
|
|
|
|
const posterBackground = query.style('.player');
|
|
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
|
|
|
|
if (posterUrl) {
|
|
release.poster = [
|
|
posterUrl,
|
|
posterUrl.replace(/-\d.jpg/, '-2.jpg'),
|
|
posterUrl.replace(/-\d.jpg/, '-1.jpg'),
|
|
];
|
|
}
|
|
|
|
release.photos = curatePhotos(albumContext?.query.styles('.sample-lg, .sample-thumb').map((style) => style['background-image']?.match(/url\((.*)\)/)?.[1]));
|
|
release.trailer = query.video();
|
|
|
|
release.entryId = entryIdFromMedia(release);
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
// Trans Sex Japan
|
|
async function fetchLatestTrans(channel, page) {
|
|
const url = `${channel.url}/samples?page=${page}`;
|
|
const res = await unprint.get(url, { select: '.stage' });
|
|
|
|
const videoHeads = unprint.initAll(res.context.element, '//div[contains(@class, "col-1") and .//div[contains(@class, "player")]]');
|
|
const albumHeads = unprint.initAll(res.context.element, '//div[div[contains(@class, "sample-thumbs")]]');
|
|
|
|
if (res.ok) {
|
|
const videoBlocks = videoHeads.map(({ element }) => composeBlock(element));
|
|
const albumBlocks = albumHeads.map(({ element }) => composeBlock(element));
|
|
|
|
const mergedContext = videoBlocks.map((context, index) => [context, albumBlocks[index]]);
|
|
|
|
return scrapeAllTrans(mergedContext, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function scrapeAllLesbianTiles(scenes, channel) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
|
|
release.entryId = query.attribute('.scene-hover', 'data-path');
|
|
|
|
// supplementary data, filter items without entry ID
|
|
if (!release.entryId || query.content('.content-overlay')?.includes('photo')) {
|
|
return null;
|
|
}
|
|
|
|
release.title = query.content('.content-title');
|
|
release.duration = query.duration('.content-size-model');
|
|
|
|
release.actors = query.all('.content-size-model a').map((actorEl) => ({
|
|
name: unprint.query.content(actorEl),
|
|
url: unprint.query.url(actorEl, null, { origin: channel.url }),
|
|
}));
|
|
|
|
release.tags = query.contents('.content-tags a');
|
|
|
|
const posterBackground = query.style('.vidthumb');
|
|
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
|
|
|
|
if (posterUrl) {
|
|
release.poster = [
|
|
posterUrl
|
|
.replace('-sm', '-lg')
|
|
.replace('-med', '-lg'),
|
|
posterUrl.replace('-sm', '-med'),
|
|
posterUrl,
|
|
];
|
|
}
|
|
|
|
release.teaser = `${channel.url}/content/${release.entryId}/hover.mp4`;
|
|
|
|
return release;
|
|
}).filter(Boolean);
|
|
}
|
|
|
|
function scrapeAllLesbian(scenes, channel, tiles) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
|
|
if (query.exists('a[href*="samples"]')) {
|
|
return null;
|
|
}
|
|
|
|
release.actors = query.all('a[href*="model/"]').map((actorEl) => ({
|
|
name: unprint.query.content(actorEl),
|
|
url: `${channel.url}/en/${unprint.query.url(actorEl, null)}`,
|
|
}));
|
|
|
|
release.duration = unprint.extractTimestamp(`${query.content('.tour-datum')?.split(' ').at(-1)}M`);
|
|
release.videoCount = query.number('.tour-datum', { match: /(\d+) hd scenes/i, matchIndex: 1 });
|
|
release.photoCount = query.number('//div[text()[contains(., "Photos")]]', { match: /(\d+) photos/i, matchIndex: 1 });
|
|
|
|
const posterBackground = query.style('.player');
|
|
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
|
|
|
|
if (posterUrl) {
|
|
release.poster = posterUrl;
|
|
}
|
|
|
|
release.trailer = query.video();
|
|
release.photos = curatePhotos(query.imgs('.tour-thumb img'));
|
|
|
|
release.entryId = slugify([entryIdFromMedia(release), ...release.actors.map((actor) => actor.name)]);
|
|
|
|
const relatedTiles = tiles.filter((tile) => tile.actors.length === release.actors.length && tile.actors.every((tileActor) => release.actors.some((releaseActor) => tileActor.name === releaseActor.name)));
|
|
|
|
// if we found the same number of tiles as videos in this set, we can be pretty sure they relate to this set
|
|
// if there are more, we have no way of determining which of the videos belong to this set
|
|
if (relatedTiles.length === release.videoCount) {
|
|
const sortedTiles = relatedTiles.toSorted((tileA, tileB) => tileA.entryId.localeCompare(tileB.entryId)); // entry IDs appear chronological
|
|
|
|
release.tags = relatedTiles.flatMap((tile) => tile.tags);
|
|
|
|
release.chapters = sortedTiles.map((tile, index, array) => {
|
|
const time = array.slice(0, index).reduce((acc, relatedTile) => acc + relatedTile.duration, 0);
|
|
|
|
return {
|
|
title: tile.title,
|
|
time,
|
|
duration: tile.duration,
|
|
tags: tile.tags,
|
|
poster: tile.poster,
|
|
};
|
|
});
|
|
}
|
|
|
|
return release;
|
|
}).filter(Boolean);
|
|
}
|
|
|
|
// Uralesbian
|
|
async function fetchLatestLesbianTiles(channel, _page) {
|
|
// each sample on the samples page represents multiple videos, so for this site we start with the update tiles instead
|
|
// l=0 language, 0 = English, 1 = Japanese
|
|
// s=1 unclear, seems to be some sort of set, s=1 is everything, s=4 is front page
|
|
// c=5000 limit, only seems to apply to 'everything' set, seemingly unlimited by default but apply for good measure
|
|
// no known pagination parameter at this moment, so we try to get everything
|
|
const url = `${channel.url}/getdata.php?l=0&c=5000`;
|
|
const res = await unprint.get(url, { selectAll: '.content-obj' });
|
|
|
|
if (res.ok) {
|
|
return scrapeAllLesbianTiles(res.context, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
// Uralesbian
|
|
async function fetchLatestLesbian(channel, page) {
|
|
const url = `${channel.url}/en/samples?page=${page}`;
|
|
|
|
const [res, tiles] = await Promise.all([
|
|
unprint.get(url, { selectAll: '.tour-obj' }),
|
|
fetchLatestLesbianTiles(channel),
|
|
]);
|
|
|
|
if (res.ok) {
|
|
return scrapeAllLesbian(res.context, channel, tiles);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function scrapeAllBuffet(scenes, channel) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
|
|
release.url = query.url('.video-link');
|
|
release.entryId = new URL(release.url).pathname.match(/sample\/(\w+)\//)[1];
|
|
|
|
release.title = query.content('.video-link');
|
|
release.date = query.date('.date', 'MMM D, YYYY');
|
|
|
|
release.actors = query.all('.model-name a').map((actorEl) => ({
|
|
name: unprint.query.content(actorEl),
|
|
url: unprint.query.url(actorEl, null, { origin: channel.url }),
|
|
}));
|
|
|
|
const posterUrl = query.img('.thumb');
|
|
|
|
if (posterUrl) {
|
|
release.poster = [
|
|
posterUrl
|
|
.replace('-sm', '-lg')
|
|
.replace('-med', '-lg'),
|
|
posterUrl.replace('-sm', '-med'),
|
|
posterUrl,
|
|
];
|
|
}
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
// Uralesbian
|
|
async function fetchLatestBuffet(channel, _page) {
|
|
const url = `${channel.url}/samples`; // no pagination
|
|
const res = await unprint.get(url, { selectAll: '.videos .video' });
|
|
|
|
if (res.ok) {
|
|
return scrapeAllBuffet(res.context, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function scrapeSceneBuffet({ query }, { url, entity }) {
|
|
const release = {};
|
|
|
|
release.entryId = new URL(url).pathname.match(/sample\/(\w+)\//)[1];
|
|
|
|
release.title = query.text('.pg-nav h2');
|
|
|
|
release.actors = query.all('.tags a[href*="girl/"]').map((actorEl) => ({
|
|
name: unprint.query.content(actorEl),
|
|
url: unprint.query.url(actorEl, null, { origin: entity.url }),
|
|
}));
|
|
|
|
release.tags = query.contents('.tag-list a');
|
|
|
|
const posterBackground = query.style('.player');
|
|
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
|
|
|
|
if (posterUrl) {
|
|
release.poster = [
|
|
posterUrl.replace('-sm', '-lg'), // should already be -lg, but just in case
|
|
posterUrl.replace('-lg', '-sm'),
|
|
];
|
|
}
|
|
|
|
release.trailer = query.video('.player source');
|
|
release.photos = query.imgs('.photos .photo', { attribute: 'href' });
|
|
|
|
return release;
|
|
}
|
|
|
|
function scrapeSceneCospuri({ query }, { url, entity }) {
|
|
const release = {};
|
|
|
|
release.entryId = new URL(url).searchParams.get('id')
|
|
|| new URL(url).pathname.match(/\/sample\/(.*)\//)[1];
|
|
|
|
release.description = query.content('.detail-box .description');
|
|
|
|
release.date = query.date([
|
|
'.detail-box .date', // cospuri
|
|
'//div[contains(@class, "details")]//span[strong[contains(text(), "Date")]]', // cute butts
|
|
], 'YYYY・MM・DD', { match: /\d{4}・\d{2}・\d{2}/ });
|
|
|
|
release.duration = query.duration([
|
|
'.detail-box .length',
|
|
'//div[contains(@class, "details")]//span[strong[contains(text(), "Runtime")]]', // cute butts
|
|
]);
|
|
|
|
release.photoCount = query.number([
|
|
'.detail-box .photos',
|
|
'//div[contains(@class, "details")]//span[strong[contains(text(), "Photos")]]', // cute butts
|
|
]);
|
|
|
|
release.actors = query.all('.sample-model a, .model a').map((actorEl) => ({
|
|
name: unprint.query.content(actorEl),
|
|
url: unprint.query.url(actorEl, null, { origin: entity.url }),
|
|
}));
|
|
|
|
release.tags = [...query.contents('.tag'), query.content('.sample-channel')].filter(Boolean);
|
|
|
|
const posterBackground = query.style('.player');
|
|
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
|
|
|
|
if (posterUrl) {
|
|
release.poster = posterUrl;
|
|
}
|
|
|
|
release.photos = query.attributes('.thumb a', 'data-asset').map((photoIndex) => [
|
|
`https://img.${entity.slug}.com/preview/${release.entryId}/${photoIndex}.jpg`,
|
|
`https://img.${entity.slug}.com/preview/${release.entryId}/${photoIndex}s.jpg`,
|
|
]);
|
|
|
|
release.trailer = `https://img.${entity.slug}.com/preview/${release.entryId}/sample.mp4`;
|
|
|
|
if (query.exists('.detail-box .fourK')) {
|
|
release.qualities = [2160];
|
|
}
|
|
|
|
return release;
|
|
}
|
|
|
|
// Sperm Mania, Fellatio Japan
|
|
async function fetchScene(url, channel, baseRelease) {
|
|
if (!baseRelease.entryId || !baseRelease.path) {
|
|
return null;
|
|
}
|
|
|
|
// no dedicated scene page, but there are dates on actor page; use that as 'deep' scrape
|
|
// can't use front page like on Sperm Mania because dates are missing
|
|
const res = await unprint.get(baseRelease.path, { selectAll: '.scene, .scene-obj' });
|
|
|
|
if (res.ok) {
|
|
const tiles = scrapeAllTiles(res.context, channel);
|
|
const sceneTile = tiles.find((tile) => tile.entryId === baseRelease.entryId) || null;
|
|
|
|
return sceneTile;
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function extractSizes(sizes) {
|
|
return {
|
|
cup: sizes.match(/b\d+-(\w+)/i)?.[1],
|
|
bust: unprint.extractNumber(sizes.match(/b(\d+)/i)?.[1]),
|
|
waist: unprint.extractNumber(sizes.match(/w(\d+)/i)?.[1]),
|
|
hip: unprint.extractNumber(sizes.match(/h(\d+)/i)?.[1]),
|
|
};
|
|
}
|
|
|
|
// SpermMania, Handjob Japan
|
|
function scrapeProfile({ query }, channel, url) {
|
|
const profile = { url };
|
|
|
|
const bio = Object.fromEntries(query.all('.actr-item, .profile tr, #profile tr, .profile-info li, .model-detail .item, .model-item').map((bioEl) => [
|
|
slugify(unprint.query.content(bioEl, 'td, b, .model-item-title') || unprint.query.text(bioEl), '_'),
|
|
unprint.query.url(bioEl) || unprint.query.content(bioEl, 'strong, td:last-child, span, .model-item-contents') || unprint.query.text(bioEl), // ensure social links have priority over text
|
|
]));
|
|
|
|
profile.birthPlace = bio.from || bio.country;
|
|
|
|
profile.description = [
|
|
bio.hobbies && `Hobbies: ${bio.hobbies}`,
|
|
bio.skills && `Skills: ${bio.skills}`,
|
|
bio.fun_fact,
|
|
query.content('h2 + p'),
|
|
].filter(Boolean).join('. ') || null;
|
|
|
|
profile.age = unprint.extractNumber(bio.age);
|
|
profile.height = unprint.extractNumber(bio.height);
|
|
|
|
const sizes = bio.sizes || bio.measurements;
|
|
|
|
if (/b\d+/i.test(sizes)) {
|
|
const measurements = extractSizes(sizes);
|
|
|
|
profile.cup = measurements.cup;
|
|
profile.bust = measurements.bust;
|
|
profile.waist = measurements.waist;
|
|
profile.hip = measurements.hip;
|
|
} else {
|
|
profile.measurements = bio.measurements;
|
|
}
|
|
|
|
profile.foot = unprint.extractNumber(bio.foot_size);
|
|
profile.leg = unprint.extractNumber(bio.leg_length);
|
|
profile.thigh = unprint.extractNumber(bio.thigh_width);
|
|
|
|
profile.social = [bio.homepage, bio.twitter].filter(Boolean);
|
|
|
|
const avatar = query.img('.scene-array img[src*="/actress"], img.portrait, .profile-img img')
|
|
|| query.img('.costume-bg', { attribute: 'data-img' })
|
|
|| query.style('.model-profile, #profile, .carousel-item')?.['background-image']?.match(/url\((.*)\)/)?.[1];
|
|
|
|
if (avatar) {
|
|
profile.avatar = [
|
|
avatar.replace('-header.jpg', '.jpg'), // Transex Japan, prefer avatar over header banner
|
|
avatar,
|
|
];
|
|
}
|
|
|
|
profile.photos = [
|
|
...query.imgs('.costume-bg', { attribute: 'data-img' }).slice(1),
|
|
avatar?.includes('-header.jpg') && avatar,
|
|
].filter(Boolean);
|
|
|
|
return profile;
|
|
}
|
|
|
|
function scrapeProfileLesbian({ query, html }, channel, url) {
|
|
const profile = { url };
|
|
|
|
profile.age = query.number('//strong[contains(text(), "Age")]/following-sibling::text()[1]');
|
|
profile.height = query.number('//strong[contains(text(), "Height")]/following-sibling::text()[1]');
|
|
profile.birthPlace = query.content('//img[contains(@src, "from")]/following-sibling::text()[1]')?.replace(/^from/i, '').trim() || null;
|
|
|
|
const sizes = query.content('//strong[contains(text(), "Measurements")]/following-sibling::text()[1]');
|
|
|
|
if (/b\d+/i.test(sizes)) {
|
|
const measurements = extractSizes(sizes);
|
|
|
|
profile.cup = measurements.cup;
|
|
profile.bust = measurements.bust;
|
|
profile.waist = measurements.waist;
|
|
profile.hip = measurements.hip;
|
|
}
|
|
|
|
profile.avatar = html.match(/https:\/\/img.uralesbian.com\/models\/\d+\.jpg/)?.[0];
|
|
|
|
return profile;
|
|
}
|
|
|
|
async function fetchProfile({ slug, url: actorUrl }, { entity, parameters }) {
|
|
const url = actorUrl || (parameters.actors
|
|
? `${parameters.actors}/${slug}`
|
|
: `${entity.url}/actress/${slug}`);
|
|
|
|
const res = await unprint.get(url);
|
|
|
|
if (res.ok) {
|
|
if (parameters.layout === 'lesbian') {
|
|
return scrapeProfileLesbian(res.context, entity, url);
|
|
}
|
|
|
|
return scrapeProfile(res.context, entity, url);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
module.exports = {
|
|
fetchLatest,
|
|
fetchProfile,
|
|
fetchScene,
|
|
cospuri: {
|
|
fetchLatest: fetchLatestCospuri,
|
|
scrapeScene: scrapeSceneCospuri,
|
|
fetchProfile,
|
|
},
|
|
fellatio: {
|
|
fetchLatest: fetchLatestFellatio,
|
|
fetchScene,
|
|
fetchProfile,
|
|
},
|
|
handjob: {
|
|
fetchLatest: fetchLatestHandjob,
|
|
fetchProfile,
|
|
},
|
|
legs: {
|
|
fetchLatest: fetchLatestLegs,
|
|
fetchProfile,
|
|
},
|
|
facefuck: {
|
|
fetchLatest: fetchLatestFacefuck,
|
|
},
|
|
trans: {
|
|
fetchLatest: fetchLatestTrans,
|
|
fetchProfile,
|
|
},
|
|
lesbian: {
|
|
fetchLatest: fetchLatestLesbian,
|
|
fetchProfile,
|
|
},
|
|
buffet: {
|
|
fetchLatest: fetchLatestBuffet,
|
|
scrapeScene: scrapeSceneBuffet,
|
|
fetchProfile,
|
|
},
|
|
};
|