2020-01-16 20:04:44 +00:00
|
|
|
'use strict';
|
|
|
|
|
|
|
|
const blake2 = require('blake2');
|
|
|
|
const knex = require('../knex');
|
|
|
|
|
|
|
|
const { ex, ctxa } = require('../utils/q');
|
2020-11-22 23:05:02 +00:00
|
|
|
const http = require('../utils/http');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
|
|
|
async function getSiteSlugs() {
|
2020-05-14 02:26:05 +00:00
|
|
|
return knex('sites')
|
|
|
|
.pluck('sites.slug')
|
|
|
|
.join('networks', 'networks.id', 'sites.network_id')
|
|
|
|
.where('networks.slug', 'perfectgonzo');
|
2020-01-16 20:04:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
function getHash(identifier) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const hash = blake2.createHash('blake2b', { digestLength: 8 });
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
hash.update(Buffer.from(identifier));
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return hash.digest('hex');
|
2020-01-16 20:04:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
function extractMaleModelsFromTags(tagContainer) {
|
2020-05-14 02:26:05 +00:00
|
|
|
if (!tagContainer) {
|
|
|
|
return [];
|
|
|
|
}
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const tagEls = Array.from(tagContainer.childNodes, node => ({ type: node.nodeType, text: node.textContent.trim() })).filter(node => node.text.length > 0);
|
|
|
|
const modelLabelIndex = tagEls.findIndex(node => node.text === 'Male Models');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (modelLabelIndex > -1) {
|
|
|
|
const nextLabelIndex = tagEls.findIndex((node, index) => index > modelLabelIndex && node.type === 3);
|
|
|
|
const maleModels = tagEls.slice(modelLabelIndex + 1, nextLabelIndex);
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return maleModels.map(model => model.text);
|
|
|
|
}
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return [];
|
2020-01-16 20:04:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
async function extractChannelFromPhoto(photo, metaSiteSlugs) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const siteSlugs = metaSiteSlugs || await getSiteSlugs();
|
|
|
|
const channelMatch = photo.match(new RegExp(siteSlugs.join('|')));
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (channelMatch) {
|
|
|
|
return channelMatch[0];
|
|
|
|
}
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return null;
|
2020-01-16 20:04:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
async function scrapeLatest(html, site) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const siteSlugs = await getSiteSlugs();
|
|
|
|
const { element } = ex(html);
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return ctxa(element, '#content-main .itemm').map(({
|
|
|
|
q, qa, qlength, qdate, qimages,
|
|
|
|
}) => {
|
|
|
|
const release = {
|
|
|
|
site,
|
|
|
|
meta: {
|
|
|
|
siteSlugs,
|
|
|
|
},
|
|
|
|
};
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const sceneLink = q('a');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.title = sceneLink.title;
|
|
|
|
release.url = `${site.url}${sceneLink.href}`;
|
|
|
|
release.date = qdate('.nm-date', 'MM/DD/YYYY');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const slug = new URL(release.url).pathname.split('/')[2];
|
|
|
|
release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`);
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.actors = release.title.split('&').map(actor => actor.trim());
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
[release.poster, ...release.photos] = qimages('.bloc-link img');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.tags = qa('.dropdown ul a', true).slice(1);
|
|
|
|
release.duration = qlength('.dropdown p:first-child');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return release;
|
|
|
|
});
|
2020-01-16 20:04:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
async function scrapeScene(html, site, url, metaSiteSlugs) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const {
|
|
|
|
q, qa, qlength, qdate, qposter, qtrailer,
|
|
|
|
} = ex(html);
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const release = { url, site };
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.title = q('#movie-header h2', true);
|
|
|
|
release.date = qdate('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.description = q('.container .mg-md', true);
|
|
|
|
release.duration = qlength('#video-ribbon .container > div > span:nth-child(3)');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.actors = qa('#video-info a', true).concat(extractMaleModelsFromTags(q('.tag-container')));
|
|
|
|
release.tags = qa('.tag-container a', true);
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const uhd = q('#video-ribbon .container > div > span:nth-child(2)', true);
|
|
|
|
if (/4K/.test(uhd)) release.tags = release.tags.concat('4k');
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.photos = qa('.bxslider_pics img').map(el => el.dataset.original || el.src);
|
|
|
|
release.poster = qposter();
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const trailer = qtrailer();
|
|
|
|
if (trailer) release.trailer = { src: trailer };
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], metaSiteSlugs);
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (release.channel) {
|
|
|
|
const { pathname } = new URL(url);
|
|
|
|
release.url = `https://${release.channel}.com${pathname}`;
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const slug = pathname.split('/')[2];
|
|
|
|
release.entryId = getHash(`${release.channel}${slug}${release.date.toISOString()}`);
|
|
|
|
}
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return release;
|
2020-01-16 20:04:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
async function fetchLatest(site, page = 1) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const url = `${site.url}/movies/page-${page}`;
|
2020-11-22 23:05:02 +00:00
|
|
|
const res = await http.get(url);
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (res.statusCode === 200) {
|
|
|
|
return scrapeLatest(res.body.toString(), site);
|
|
|
|
}
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return [];
|
2020-01-16 20:04:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
async function fetchScene(url, site, release) {
|
2020-11-22 23:05:02 +00:00
|
|
|
const res = await http.get(url);
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (res.statusCode === 200) {
|
|
|
|
return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs);
|
|
|
|
}
|
2020-01-16 20:04:44 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return [];
|
2020-01-16 20:04:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = {
|
2020-05-14 02:26:05 +00:00
|
|
|
fetchLatest,
|
|
|
|
fetchScene,
|
2020-01-16 20:04:44 +00:00
|
|
|
};
|