2021-09-28 18:45:22 +00:00
|
|
|
'use strict';
|
|
|
|
|
|
|
|
const qu = require('../utils/q');
|
|
|
|
const slugify = require('../utils/slugify');
|
|
|
|
|
|
|
|
function genderFromUrl(url) {
|
|
|
|
const { pathname } = new URL(url);
|
|
|
|
|
|
|
|
if (/atores/.test(pathname)) {
|
|
|
|
return 'male';
|
|
|
|
}
|
|
|
|
|
|
|
|
if (/atrizes/.test(pathname)) {
|
|
|
|
return 'female';
|
|
|
|
}
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
function scrapeAll(scenes) {
|
|
|
|
return scenes.map(({ query }) => {
|
|
|
|
const release = {};
|
|
|
|
const subtitle = query.cnt('.subtitle');
|
|
|
|
|
|
|
|
release.url = query.url('a');
|
|
|
|
release.entryId = new URL(release.url).pathname.match(/\/videos\/([\w-]+)/)[1];
|
|
|
|
|
|
|
|
release.title = query.cnt('.title') || query.q('img', 'title');
|
2021-11-20 22:59:15 +00:00
|
|
|
release.actors = subtitle.slice(subtitle.indexOf(':') + 1).split(',').map((actor) => actor.trim()).filter(Boolean);
|
2021-09-28 18:45:22 +00:00
|
|
|
|
|
|
|
release.poster = query.img('.thumb img');
|
|
|
|
|
|
|
|
if (release.poster) {
|
|
|
|
const match = release.poster.match(/\/uploads\/(\d{4})\/(\d{2})/);
|
|
|
|
|
|
|
|
if (match) {
|
|
|
|
release.date = new Date(match[1], match[2] - 1, 1);
|
|
|
|
release.datePrecision = 'month';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return release;
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
function scrapeScene({ query, html }, url, channel) {
|
|
|
|
const release = {};
|
|
|
|
|
|
|
|
const dataString = query.html('.yoast-schema-graph');
|
|
|
|
const data = dataString && JSON.parse(dataString)['@graph'];
|
2021-11-20 22:59:15 +00:00
|
|
|
const pageData = data.find((item) => item['@type'] === 'WebPage');
|
|
|
|
const imageData = data.find((item) => item['@type'] === 'ImageObject');
|
2021-09-28 18:45:22 +00:00
|
|
|
|
|
|
|
release.entryId = new URL(url).pathname.match(/\/videos\/([\w-]+)/)[1];
|
|
|
|
|
|
|
|
release.title = query.cnt('.video .title h1')
|
2021-11-20 22:59:15 +00:00
|
|
|
|| data.find((item) => item['@type'] === 'BreadcrumbList')?.itemListElement.slice(-1)[0].item.name
|
2021-09-28 18:45:22 +00:00
|
|
|
|| pageData?.name.slice(0, pageData.name.lastIndexOf('-')).trim();
|
|
|
|
|
|
|
|
release.description = query.cnt('.video .descript');
|
|
|
|
|
|
|
|
release.date = pageData.datePublished && new Date(pageData.datePublished);
|
|
|
|
|
|
|
|
release.actors = query.all('.video .elenco a').map((el) => {
|
|
|
|
const actorUrl = query.url(el, null);
|
|
|
|
|
|
|
|
return {
|
|
|
|
name: query.cnt(el),
|
|
|
|
url: actorUrl,
|
|
|
|
gender: genderFromUrl(actorUrl),
|
|
|
|
};
|
|
|
|
});
|
|
|
|
|
|
|
|
release.poster = imageData?.url
|
|
|
|
|| query.meta('property="og:image"')
|
|
|
|
|| html.match(/poster: '(http.*\.jpg)'/)?.[1];
|
|
|
|
|
|
|
|
release.photos = query.imgs('.listPostSm a', 'href');
|
|
|
|
release.trailer = query.video('source', 'src', { origin: channel.url });
|
|
|
|
|
|
|
|
release.likes = query.number('.vortex-p-like-counter');
|
|
|
|
release.dislikes = query.number('.vortex-p-dislike-counter');
|
|
|
|
|
|
|
|
if (!release.date && release.poster) {
|
|
|
|
const match = release.poster.match(/\/uploads\/(\d{4})\/(\d{2})/);
|
|
|
|
|
|
|
|
if (match) {
|
|
|
|
release.date = new Date(match[1], match[2] - 1, 1);
|
|
|
|
release.datePrecision = 'month';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return release;
|
|
|
|
}
|
|
|
|
|
|
|
|
function scrapeProfile({ query, el }, entity, url) {
|
|
|
|
const profile = { url };
|
|
|
|
|
|
|
|
profile.gender = genderFromUrl(url);
|
|
|
|
|
|
|
|
profile.description = query.cnt('.about')?.replace(/sobre a atriz:/i, '').trim();
|
|
|
|
profile.avatar = query.img('.left .thumb img');
|
|
|
|
|
|
|
|
profile.scenes = scrapeAll(qu.initAll(el, '.listPostLg .post'));
|
|
|
|
|
|
|
|
return profile;
|
|
|
|
}
|
|
|
|
|
|
|
|
async function fetchLatest(channel, page = 1) {
|
|
|
|
const url = `${channel.url}/videos/page/${page}`;
|
|
|
|
const res = await qu.getAll(url, '.listPostLg .post');
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
return scrapeAll(res.items, channel);
|
|
|
|
}
|
|
|
|
|
|
|
|
return res.status;
|
|
|
|
}
|
|
|
|
|
|
|
|
async function fetchProfilePage({ name, gender }, entity, secondAttempt) {
|
|
|
|
const url = `${entity.url}/${gender === 'male' || secondAttempt ? 'atores' : 'atrizes'}/${slugify(name, '-')}`;
|
|
|
|
const res = await qu.get(url);
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
return { res, url };
|
|
|
|
}
|
|
|
|
|
|
|
|
if (secondAttempt) {
|
|
|
|
return res.status;
|
|
|
|
}
|
|
|
|
|
|
|
|
return fetchProfilePage({ name, gender }, entity, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
async function fetchProfile(baseActor, entity, options) {
|
|
|
|
const { res, url } = await fetchProfilePage(baseActor, entity, false);
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
return scrapeProfile(res.item, entity, url, options);
|
|
|
|
}
|
|
|
|
|
|
|
|
return res.status;
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = {
|
|
|
|
fetchLatest,
|
|
|
|
fetchProfile,
|
|
|
|
scrapeScene,
|
|
|
|
};
|