2020-01-16 20:04:44 +00:00
|
|
|
'use strict';
|
|
|
|
|
|
|
|
const blake2 = require('blake2');
|
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
const qu = require('../utils/qu');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
|
|
|
function getHash(identifier) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const hash = blake2.createHash('blake2b', { digestLength: 8 });
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
hash.update(Buffer.from(identifier));
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return hash.digest('hex');
|
2020-01-16 20:04:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
function extractMaleModelsFromTags(tagContainer) {
|
2020-05-14 02:26:05 +00:00
|
|
|
if (!tagContainer) {
|
|
|
|
return [];
|
|
|
|
}
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-11-20 22:59:15 +00:00
|
|
|
const tagEls = Array.from(tagContainer.childNodes, (node) => ({ type: node.nodeType, text: node.textContent.trim() })).filter((node) => node.text.length > 0);
|
|
|
|
const modelLabelIndex = tagEls.findIndex((node) => node.text === 'Male Models');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (modelLabelIndex > -1) {
|
|
|
|
const nextLabelIndex = tagEls.findIndex((node, index) => index > modelLabelIndex && node.type === 3);
|
|
|
|
const maleModels = tagEls.slice(modelLabelIndex + 1, nextLabelIndex);
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-11-20 22:59:15 +00:00
|
|
|
return maleModels.map((model) => model.text);
|
2020-05-14 02:26:05 +00:00
|
|
|
}
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return [];
|
2020-01-16 20:04:44 +00:00
|
|
|
}
|
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
async function extractChannelFromPhoto(photo, channel) {
|
2021-11-20 22:59:15 +00:00
|
|
|
const siteSlugs = (channel.type === 'network' ? channel.children : channel.parent?.children)?.map((child) => child.slug);
|
2020-05-14 02:26:05 +00:00
|
|
|
const channelMatch = photo.match(new RegExp(siteSlugs.join('|')));
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (channelMatch) {
|
|
|
|
return channelMatch[0];
|
|
|
|
}
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return null;
|
2020-01-16 20:04:44 +00:00
|
|
|
}
|
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
async function scrapeLatest(scenes, site) {
|
|
|
|
return scenes.map(({ query }) => {
|
|
|
|
const release = {};
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
release.title = query.q('a', 'title');
|
|
|
|
release.url = query.url('a', 'href', { origin: site.url });
|
|
|
|
release.date = query.date('.nm-date', 'MM/DD/YYYY');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const slug = new URL(release.url).pathname.split('/')[2];
|
|
|
|
release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`);
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-11-20 22:59:15 +00:00
|
|
|
release.actors = release.title.split('&').map((actor) => actor.trim());
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
[release.poster, ...release.photos] = query.imgs('.bloc-link img');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
release.tags = query.cnts('.dropdown ul a').slice(1);
|
|
|
|
release.duration = query.duration('.dropdown p:first-child');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return release;
|
|
|
|
});
|
2020-01-16 20:04:44 +00:00
|
|
|
}
|
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
async function scrapeScene({ query }, site, url) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const release = { url, site };
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
release.title = query.cnt('#movie-header h2');
|
|
|
|
release.date = query.date('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
release.description = query.cnt('.container .mg-md');
|
|
|
|
release.duration = query.duration('#video-ribbon .container > div > span:nth-child(3)');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
release.actors = query.cnts('#video-info a').concat(extractMaleModelsFromTags(query.q('.tag-container')));
|
|
|
|
release.tags = query.cnts('.tag-container a');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
const uhd = query.cnt('#video-ribbon .container > div > span:nth-child(2)');
|
2020-05-14 02:26:05 +00:00
|
|
|
if (/4K/.test(uhd)) release.tags = release.tags.concat('4k');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-11-20 22:59:15 +00:00
|
|
|
release.photos = query.all('.bxslider_pics img').map((el) => el.dataset.original || el.src);
|
2021-02-01 19:49:08 +00:00
|
|
|
release.poster = query.poster();
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
const trailer = query.trailer();
|
2020-05-14 02:26:05 +00:00
|
|
|
if (trailer) release.trailer = { src: trailer };
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], site);
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (release.channel) {
|
|
|
|
const { pathname } = new URL(url);
|
|
|
|
release.url = `https://${release.channel}.com${pathname}`;
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const slug = pathname.split('/')[2];
|
|
|
|
release.entryId = getHash(`${release.channel}${slug}${release.date.toISOString()}`);
|
|
|
|
}
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return release;
|
2020-01-16 20:04:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
async function fetchLatest(site, page = 1) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const url = `${site.url}/movies/page-${page}`;
|
2021-02-01 19:49:08 +00:00
|
|
|
const res = await qu.getAll(url, '#content-main [class^="item"]');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
if (res.ok) {
|
|
|
|
return scrapeLatest(res.items, site);
|
2020-05-14 02:26:05 +00:00
|
|
|
}
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
return res.status;
|
2020-01-16 20:04:44 +00:00
|
|
|
}
|
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
async function fetchScene(url, channel) {
|
|
|
|
const res = await qu.get(url);
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
if (res.ok) {
|
|
|
|
return scrapeScene(res.item, channel, url);
|
2020-05-14 02:26:05 +00:00
|
|
|
}
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2021-02-01 19:49:08 +00:00
|
|
|
return res.status;
|
2020-01-16 20:04:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = {
|
2020-05-14 02:26:05 +00:00
|
|
|
fetchLatest,
|
|
|
|
fetchScene,
|
2020-01-16 20:04:44 +00:00
|
|
|
};
|