2023-07-02 03:07:38 +00:00
|
|
|
'use strict';
|
|
|
|
|
|
|
|
const unprint = require('unprint');
|
|
|
|
|
|
|
|
const slugify = require('../utils/slugify');
|
|
|
|
const { feetInchesToCm } = require('../utils/convert');
|
|
|
|
|
|
|
|
function getEntryId(release) {
|
|
|
|
return slugify(new URL(release.url).pathname.match(/\/([\w-]+)\.html/)?.[1]
|
|
|
|
|| [unprint.formatDate(release.date, 'YYYY-MM-DD'), release.title, ...release.actors]);
|
|
|
|
}
|
|
|
|
|
|
|
|
function scrapeAll(scenes) {
|
|
|
|
return scenes.map(({ query }) => {
|
|
|
|
const release = {};
|
|
|
|
|
|
|
|
release.url = query.url('a');
|
|
|
|
|
|
|
|
release.title = query.content('a span');
|
|
|
|
|
|
|
|
release.date = query.date('.timeDate', 'YYYY-MM-DD');
|
|
|
|
release.duration = query.duration('.timeDate');
|
|
|
|
|
|
|
|
release.actors = query.all('a[href*="models/"], a[href*="sets.php"]').map((actorEl) => ({
|
|
|
|
name: unprint.query.content(actorEl),
|
|
|
|
url: unprint.query.url(actorEl, null),
|
|
|
|
}));
|
|
|
|
|
2023-07-02 19:06:38 +00:00
|
|
|
const poster = query.img('img.mainThumb');
|
|
|
|
|
|
|
|
if (poster && !/images\/p\d+\.jpe?g/i.test(poster)) {
|
|
|
|
release.poster = poster;
|
|
|
|
}
|
|
|
|
|
2023-07-02 03:07:38 +00:00
|
|
|
release.photoCount = query.number('.timeDate');
|
|
|
|
|
|
|
|
release.entryId = getEntryId(release);
|
|
|
|
|
|
|
|
return release;
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
function scrapeScene({ query, html }, { url }) {
|
|
|
|
const release = { url };
|
|
|
|
|
|
|
|
release.title = query.content('.title h2');
|
|
|
|
release.description = query.content('.description p');
|
|
|
|
|
|
|
|
release.date = query.date('.info p', 'MMMM D, YYYY');
|
|
|
|
release.duration = query.duration('.info p');
|
|
|
|
|
|
|
|
release.actors = query.all('.info a[href*="models/"], .info a[href*="sets.php"]').map((actorEl) => ({
|
|
|
|
name: unprint.query.content(actorEl),
|
|
|
|
url: unprint.query.url(actorEl, null),
|
|
|
|
}));
|
|
|
|
|
|
|
|
release.poster = query.img('.update_thumb') || html.match(/poster="(.*\.jpg)"/)?.[1];
|
|
|
|
release.trailer = html.match(/src="(.*\.mp4)"/)?.[1];
|
|
|
|
|
|
|
|
release.photoCount = query.number('.info', { match: /(\d+) photos/i, matchIndex: 1 });
|
|
|
|
|
|
|
|
release.tags = query.contents('.info .tags a');
|
|
|
|
|
|
|
|
release.entryId = getEntryId(release);
|
|
|
|
|
|
|
|
return release;
|
|
|
|
}
|
|
|
|
|
|
|
|
function scrapeMovie({ query, element }, { entity, url }) {
|
|
|
|
const release = { url };
|
|
|
|
|
|
|
|
release.title = query.content('.title h2');
|
|
|
|
release.description = query.content('.aboutArea p');
|
|
|
|
|
|
|
|
release.covers = [[
|
|
|
|
query.img('.update_thumb', { attribute: 'src0_2x', origin: entity.url }),
|
|
|
|
query.img('.update_thumb', { attribute: 'src0_1x', origin: entity.url }),
|
|
|
|
query.img('.update_thumb', { attribute: 'src0', origin: entity.url }),
|
|
|
|
// usually upscaled
|
|
|
|
query.img('.update_thumb', { attribute: 'src0_4x', origin: entity.url }),
|
|
|
|
query.img('.update_thumb', { attribute: 'src0_3x', origin: entity.url }),
|
|
|
|
].filter(Boolean)];
|
|
|
|
|
|
|
|
release.entryId = getEntryId(release);
|
|
|
|
|
|
|
|
release.scenes = scrapeAll(unprint.initAll(element, '.item-video'));
|
|
|
|
|
|
|
|
return release;
|
|
|
|
}
|
|
|
|
|
|
|
|
function scrapeProfile({ query, element }, { url, entity }) {
|
|
|
|
const profile = { url };
|
|
|
|
|
|
|
|
const bio = Object.fromEntries(query.all('.stats li')
|
|
|
|
.map((row) => [
|
|
|
|
slugify(unprint.query.content(row, '.data-name, span'), '_'),
|
|
|
|
unprint.query.text(row),
|
|
|
|
])
|
|
|
|
.filter(([key, value]) => key && value));
|
|
|
|
|
|
|
|
profile.description = query.content('.aboutArea p');
|
|
|
|
|
|
|
|
profile.birthPlace = bio.place_of_birth;
|
|
|
|
profile.dateOfBirth = unprint.extractDate(bio.age, 'MMMM D, YYYY');
|
|
|
|
|
|
|
|
profile.height = Number(bio.height?.match(/(\d+)\s*cm/)?.[1]) || (/\d fe*t \d+ inch/i.test(bio.height) && feetInchesToCm(bio.height)) || null;
|
|
|
|
profile.measurements = bio.measurements;
|
|
|
|
|
|
|
|
profile.hairColor = bio.hair_color;
|
|
|
|
profile.eyes = bio.eye_color;
|
|
|
|
|
|
|
|
profile.avatar = [
|
|
|
|
query.img('.model_bio_thumb', { attribute: 'src0_4x', origin: entity.url }),
|
|
|
|
query.img('.model_bio_thumb', { attribute: 'src0_3x', origin: entity.url }),
|
|
|
|
query.img('.model_bio_thumb', { attribute: 'src0_2x', origin: entity.url }),
|
|
|
|
query.img('.model_bio_thumb', { attribute: 'src0_1x', origin: entity.url }),
|
|
|
|
query.img('.model_bio_thumb', { attribute: 'src0', origin: entity.url }),
|
|
|
|
].filter(Boolean);
|
|
|
|
|
|
|
|
profile.scenes = scrapeAll(unprint.initAll(element, '.item-video'));
|
|
|
|
|
|
|
|
return profile;
|
|
|
|
}
|
|
|
|
|
|
|
|
async function fetchLatest(channel, page = 1) {
|
|
|
|
const url = `${channel.url}/tour/categories/movies_${page}_d.html`;
|
|
|
|
const res = await unprint.get(url, { selectAll: '.item-video' });
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
return scrapeAll(res.context, channel);
|
|
|
|
}
|
|
|
|
|
|
|
|
return res.status;
|
|
|
|
}
|
|
|
|
|
|
|
|
async function fetchProfile({ name: actorName, url: actorUrl }, { entity, include }) {
|
|
|
|
const res = await [
|
|
|
|
actorUrl,
|
|
|
|
`${entity.url}/tour/models/${slugify(actorName, '-')}.html`,
|
|
|
|
`${entity.url}/tour/models/${slugify(actorName, '')}.html`,
|
|
|
|
].reduce(async (chain, url) => {
|
|
|
|
const prevRes = await chain;
|
|
|
|
|
|
|
|
if (prevRes.ok || !url) {
|
|
|
|
return prevRes;
|
|
|
|
}
|
|
|
|
|
|
|
|
const actorRes = await unprint.get(url);
|
|
|
|
|
|
|
|
if (actorRes.ok) {
|
|
|
|
return {
|
|
|
|
...actorRes,
|
|
|
|
url,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
return prevRes;
|
|
|
|
}, Promise.resolve({ ok: false, status: null }));
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
return scrapeProfile(res.context, { entity, include, url: res.url });
|
|
|
|
}
|
|
|
|
|
|
|
|
return res.status;
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = {
|
|
|
|
fetchLatest,
|
|
|
|
fetchProfile,
|
|
|
|
scrapeScene: {
|
|
|
|
scraper: scrapeScene,
|
|
|
|
unprint: true,
|
|
|
|
},
|
|
|
|
scrapeMovie: {
|
|
|
|
scraper: scrapeMovie,
|
|
|
|
unprint: true,
|
|
|
|
},
|
|
|
|
};
|