2020-02-13 02:44:04 +00:00
|
|
|
'use strict';
|
|
|
|
|
2020-07-20 23:44:51 +00:00
|
|
|
const qu = require('../utils/qu');
|
2020-02-13 02:44:04 +00:00
|
|
|
const slugify = require('../utils/slugify');
|
|
|
|
|
|
|
|
function scrapeAll(scenes, site) {
|
2020-07-20 23:44:51 +00:00
|
|
|
return scenes.map(({ query }) => {
|
|
|
|
const url = query.url('.text-thumb a');
|
2020-05-14 02:26:05 +00:00
|
|
|
const { pathname } = new URL(url);
|
2020-07-20 23:44:51 +00:00
|
|
|
const channelUrl = query.url('.badge');
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-07-20 23:44:51 +00:00
|
|
|
if (site?.parameters?.extract && query.q('.badge', true) !== site.name) {
|
2020-05-14 02:26:05 +00:00
|
|
|
return null;
|
|
|
|
}
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const release = {};
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.url = channelUrl ? `${channelUrl}${pathname}` : url;
|
2020-07-20 23:44:51 +00:00
|
|
|
release.entryId = pathname.match(/\/trailers\/(.*).html/)[1];
|
|
|
|
release.title = query.q('.text-thumb a', true);
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-07-20 23:44:51 +00:00
|
|
|
release.date = query.date('.date', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
|
|
|
release.duration = query.dur('.date', /(\d{2}:)?\d{2}:\d{2}/);
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-07-20 23:44:51 +00:00
|
|
|
release.actors = query.all('.category a', true);
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-07-20 23:44:51 +00:00
|
|
|
release.poster = query.img('img.video_placeholder, .video-images img');
|
|
|
|
release.teaser = { src: query.trailer() };
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return release;
|
|
|
|
}).filter(Boolean);
|
2020-02-13 02:44:04 +00:00
|
|
|
}
|
|
|
|
|
2020-08-02 01:44:14 +00:00
|
|
|
function scrapeScene({ query, html }, url, _site, baseRelease) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const release = { url };
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const { pathname } = new URL(url);
|
|
|
|
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-08-02 01:44:14 +00:00
|
|
|
release.title = query.q('.trailer-block_title', true);
|
|
|
|
release.description = query.q('.info-block:nth-child(3) .text', true);
|
|
|
|
release.date = query.date('.info-block_data .text', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-08-02 01:44:14 +00:00
|
|
|
const duration = baseRelease?.duration || Number(query.q('.info-block_data .text', true).match(/(\d+)\s+min/)?.[1]) * 60;
|
2020-05-14 02:26:05 +00:00
|
|
|
if (duration) release.duration = duration;
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-08-02 01:44:14 +00:00
|
|
|
release.actors = query.all('.info-block_data a[href*="/models"]', true);
|
|
|
|
release.tags = query.all('.info-block a[href*="/categories"]', true);
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-08-02 01:44:14 +00:00
|
|
|
const posterEl = query.q('.update_thumb');
|
|
|
|
const poster = posterEl?.getAttribute('src0_3x') || posterEl?.getAttribute('src0_2x') || posterEl?.dataset.src;
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (poster && baseRelease?.poster) release.photos = [poster];
|
|
|
|
else if (poster) release.poster = poster;
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-08-02 01:44:14 +00:00
|
|
|
const trailer = html.match(/video src="(.*?)"/);
|
|
|
|
|
|
|
|
if (trailer) {
|
|
|
|
release.trailer = trailer[1];
|
|
|
|
}
|
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return release;
|
2020-02-13 02:44:04 +00:00
|
|
|
}
|
|
|
|
|
2020-07-20 23:44:51 +00:00
|
|
|
function scrapeProfile({ query }) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const profile = {};
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-07-20 23:44:51 +00:00
|
|
|
const keys = query.all('.model-descr_line:not(.model-descr_rait) p.text span', true);
|
2021-11-20 22:59:15 +00:00
|
|
|
const values = query.all('.model-descr_line:not(.model-descr_rait) p.text').map((el) => query.text(el));
|
2020-05-14 02:26:05 +00:00
|
|
|
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-07-20 23:44:51 +00:00
|
|
|
if (bio.height) profile.height = Number(bio.height.match(/\((\d+)\s*cm\)/)?.[1]);
|
|
|
|
if (bio.weight) profile.weight = Number(bio.weight.match(/\((\d+)kg\)/)?.[1]);
|
2020-05-14 02:26:05 +00:00
|
|
|
if (bio.race) profile.ethnicity = bio.race;
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-07-20 23:44:51 +00:00
|
|
|
if (bio.date_of_birth) profile.birthdate = qu.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
|
2020-05-14 02:26:05 +00:00
|
|
|
if (bio.birthplace) profile.birthPlace = bio.birthplace;
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (bio.measurements) {
|
|
|
|
const [bust, waist, hip] = bio.measurements.split('-');
|
|
|
|
if (!/\?/.test(bust)) profile.bust = bust;
|
|
|
|
if (!/\?/.test(waist)) profile.waist = waist;
|
|
|
|
if (!/\?/.test(hip)) profile.hip = hip;
|
|
|
|
}
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (bio.hair) profile.hair = bio.hair;
|
|
|
|
if (bio.eyes) profile.eyes = bio.eyes;
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (/various/i.test(bio.tattoos)) profile.hasTattoos = true;
|
|
|
|
else if (/none/i.test(bio.tattoos)) profile.hasTattoos = false;
|
|
|
|
else if (bio.tattoos) {
|
|
|
|
profile.hasTattoos = true;
|
|
|
|
profile.tattoos = bio.tattoos;
|
|
|
|
}
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (/various/i.test(bio.piercings)) profile.hasPiercings = true;
|
|
|
|
else if (/none/i.test(bio.piercings)) profile.hasPiercings = false;
|
|
|
|
else if (bio.piercings) {
|
|
|
|
profile.hasPiercings = true;
|
|
|
|
profile.piercings = bio.piercings;
|
|
|
|
}
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2021-11-20 22:59:15 +00:00
|
|
|
if (bio.aliases) profile.aliases = bio.aliases.split(',').map((alias) => alias.trim());
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-07-20 23:44:51 +00:00
|
|
|
const avatar = query.q('.model-img img');
|
2020-05-14 02:26:05 +00:00
|
|
|
profile.avatar = avatar.getAttribute('src0_3x') || avatar.getAttribute('src0_2x') || avatar.dataset.src;
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-07-20 23:44:51 +00:00
|
|
|
const releases = query.all('.video-thumb');
|
|
|
|
profile.releases = scrapeAll(qu.initAll(releases));
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return profile;
|
2020-02-13 02:44:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
async function fetchLatest(site, page = 1) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const url = site.parameters?.extract
|
|
|
|
? `https://cherrypimps.com/categories/movies_${page}.html`
|
|
|
|
: `${site.url}/categories/movies_${page}.html`;
|
2020-07-20 23:44:51 +00:00
|
|
|
const res = await qu.getAll(url, 'div.video-thumb');
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return res.ok ? scrapeAll(res.items, site) : res.status;
|
2020-02-13 02:44:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
async function fetchScene(url, site, release) {
|
2020-07-20 23:44:51 +00:00
|
|
|
const res = await qu.get(url);
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return res.ok ? scrapeScene(res.item, url, site, release) : res.status;
|
2020-02-13 02:44:04 +00:00
|
|
|
}
|
|
|
|
|
2020-07-20 23:44:51 +00:00
|
|
|
async function fetchProfile({ name: actorName }, { site, network, scraper }) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const actorSlug = slugify(actorName);
|
|
|
|
const actorSlug2 = slugify(actorName, '');
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-05-18 01:22:03 +00:00
|
|
|
const origin = site?.url || network.url;
|
|
|
|
|
|
|
|
const [url, url2] = ['cherrypimps', 'wildoncam'].includes(scraper)
|
|
|
|
? [`${origin}/models/${actorSlug}.html`, `${origin}/models/${actorSlug2}.html`]
|
|
|
|
: [`${origin}/models/${actorSlug}.html`, `${origin}/models/${actorSlug2}.html`];
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-07-20 23:44:51 +00:00
|
|
|
const res = await qu.get(url);
|
2020-05-14 02:26:05 +00:00
|
|
|
if (res.ok) return scrapeProfile(res.item);
|
2020-02-13 02:44:04 +00:00
|
|
|
|
2020-07-20 23:44:51 +00:00
|
|
|
const res2 = await qu.get(url2);
|
2020-05-14 02:26:05 +00:00
|
|
|
return res2.ok ? scrapeProfile(res2.item) : res2.status;
|
2020-02-13 02:44:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = {
|
2020-05-14 02:26:05 +00:00
|
|
|
fetchLatest,
|
|
|
|
fetchScene,
|
|
|
|
fetchProfile,
|
2020-02-13 02:44:04 +00:00
|
|
|
};
|