2020-03-11 23:15:25 +00:00
|
|
|
'use strict';
|
|
|
|
|
2020-03-12 01:19:45 +00:00
|
|
|
const { get, geta, initAll, formatDate } = require('../utils/qu');
|
2020-03-11 23:59:32 +00:00
|
|
|
const slugify = require('../utils/slugify');
|
2020-03-11 23:15:25 +00:00
|
|
|
|
2020-03-12 01:19:45 +00:00
|
|
|
const { feetInchesToCm } = require('../utils/convert');
|
|
|
|
|
|
|
|
function scrapeAll(scenes, site) {
|
2020-05-14 02:26:05 +00:00
|
|
|
return scenes.map(({ qu }) => {
|
|
|
|
const release = {};
|
2020-03-11 23:15:25 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.title = qu.q('h3 a', true);
|
|
|
|
release.url = qu.url('h3 a');
|
2020-03-11 23:15:25 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
|
|
|
release.duration = qu.dur('.item-meta li:nth-child(2)');
|
|
|
|
release.description = qu.q('.description', true);
|
2020-03-11 23:59:32 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.actors = qu.all('a[href*="/models"]', true);
|
|
|
|
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
|
2020-03-13 20:54:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
[release.poster, ...release.photos] = qu.all('.item-thumbs img')
|
|
|
|
.map(source => [
|
|
|
|
source.getAttribute('src0_3x'),
|
|
|
|
source.getAttribute('src0_2x'),
|
|
|
|
source.getAttribute('src0_1x'),
|
|
|
|
]
|
|
|
|
.filter(Boolean)
|
|
|
|
.map(fallback => (/^http/.test(fallback) ? fallback : `${site.url}${fallback}`)));
|
2020-03-11 23:59:32 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
2020-03-11 23:59:32 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return release;
|
|
|
|
});
|
2020-03-11 23:15:25 +00:00
|
|
|
}
|
|
|
|
|
2020-03-11 23:59:32 +00:00
|
|
|
function scrapeScene({ html, qu }, url, site) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const release = { url };
|
2020-03-11 23:59:32 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.title = qu.q('.item-episode h4 a', true);
|
|
|
|
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
|
|
|
release.duration = qu.dur('.item-meta li:nth-child(2)');
|
|
|
|
release.description = qu.q('.description', true);
|
2020-03-11 23:59:32 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.actors = qu.all('.item-episode a[href*="/models"]', true);
|
|
|
|
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
|
2020-03-11 23:59:32 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const posterPath = html.match(/poster="(.*.jpg)"/)?.[1];
|
|
|
|
const trailerPath = html.match(/video src="(.*.mp4)"/)?.[1];
|
2020-03-11 23:59:32 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (posterPath) {
|
|
|
|
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
|
|
|
|
release.poster = [
|
|
|
|
poster.replace('-1x', '-3x'),
|
|
|
|
poster.replace('-1x', '-2x'),
|
|
|
|
poster,
|
|
|
|
];
|
|
|
|
}
|
2020-03-11 23:59:32 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (trailerPath) {
|
|
|
|
const trailer = /^http/.test(trailerPath) ? trailerPath : `${site.url}${trailerPath}`;
|
|
|
|
release.trailer = { src: trailer };
|
|
|
|
}
|
2020-03-11 23:59:32 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
2020-03-11 23:59:32 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return release;
|
2020-03-11 23:59:32 +00:00
|
|
|
}
|
|
|
|
|
2020-03-12 01:19:45 +00:00
|
|
|
async function fetchActorReleases(actorId, site, page = 1, accScenes = []) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const url = `${site.url}/sets.php?id=${actorId}&page=${page}`;
|
|
|
|
const res = await get(url);
|
2020-03-12 01:19:45 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (!res.ok) return [];
|
2020-03-12 01:19:45 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const quReleases = initAll(res.item.el, '.item-episode');
|
|
|
|
const releases = scrapeAll(quReleases, site);
|
2020-03-12 01:19:45 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const nextPage = res.item.qu.q(`a[href*="page=${page + 1}"]`);
|
2020-03-12 01:19:45 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (nextPage) {
|
|
|
|
return fetchActorReleases(actorId, site, page + 1, accScenes.concat(releases));
|
|
|
|
}
|
2020-03-12 01:19:45 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return accScenes.concat(releases);
|
2020-03-12 01:19:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
async function scrapeProfile({ qu }, site, withScenes) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const profile = {};
|
2020-03-12 01:19:45 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const bio = qu.all('.stats li', true).reduce((acc, row) => {
|
|
|
|
const [key, value] = row.split(':');
|
|
|
|
return { ...acc, [slugify(key, '_')]: value.trim() };
|
|
|
|
}, {});
|
2020-03-12 01:19:45 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
|
|
|
if (bio.measurements) {
|
|
|
|
const [bust, waist, hip] = bio.measurements.split('-');
|
2020-03-12 01:19:45 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (bust) profile.bust = bust;
|
|
|
|
if (waist) profile.waist = Number(waist);
|
|
|
|
if (hip) profile.hip = Number(hip);
|
|
|
|
}
|
2020-03-12 01:19:45 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
profile.avatar = [
|
|
|
|
qu.q('.profile-pic img', 'src0_3x'),
|
|
|
|
qu.q('.profile-pic img', 'src0_2x'),
|
|
|
|
qu.q('.profile-pic img', 'src0_1x'),
|
|
|
|
].filter(Boolean).map(source => (/^http/.test(source) ? source : `${site.url}${source}`));
|
2020-03-12 01:19:45 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (withScenes) {
|
|
|
|
const actorId = qu.q('.profile-pic img', 'id')?.match(/set-target-(\d+)/)?.[1];
|
2020-03-12 01:19:45 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (actorId) {
|
|
|
|
profile.releases = await fetchActorReleases(actorId, site);
|
|
|
|
}
|
|
|
|
}
|
2020-03-12 01:19:45 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return profile;
|
2020-03-12 01:19:45 +00:00
|
|
|
}
|
|
|
|
|
2020-03-11 23:15:25 +00:00
|
|
|
async function fetchLatest(site, page = 1) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const url = `${site.url}/categories/movies/${page}/latest/`;
|
|
|
|
const res = await geta(url, '.item-episode');
|
2020-03-11 23:15:25 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return res.ok ? scrapeAll(res.items, site) : res.status;
|
2020-03-11 23:15:25 +00:00
|
|
|
}
|
|
|
|
|
2020-03-11 23:59:32 +00:00
|
|
|
async function fetchScene(url, site) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const res = await get(url);
|
2020-03-11 23:59:32 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
2020-03-11 23:59:32 +00:00
|
|
|
}
|
|
|
|
|
2020-03-12 01:19:45 +00:00
|
|
|
async function fetchProfile(actorName, scraperSlug, site, include) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const actorSlugA = slugify(actorName, '');
|
|
|
|
const actorSlugB = slugify(actorName);
|
2020-03-12 01:19:45 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const resA = await get(`${site.url}/models/${actorSlugA}.html`);
|
|
|
|
const res = resA.ok ? resA : await get(`${site.url}/models/${actorSlugB}.html`);
|
2020-03-12 01:19:45 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return res.ok ? scrapeProfile(res.item, site, include.scenes) : res.status;
|
2020-03-12 01:19:45 +00:00
|
|
|
}
|
|
|
|
|
2020-03-11 23:15:25 +00:00
|
|
|
module.exports = {
|
2020-05-14 02:26:05 +00:00
|
|
|
fetchLatest,
|
|
|
|
fetchScene,
|
|
|
|
fetchProfile,
|
2020-03-11 23:15:25 +00:00
|
|
|
};
|