forked from DebaucheryLibrarian/traxxx
Fixed slug lookup in Perfect Gonzo scraper.
This commit is contained in:
parent
aade7490f8
commit
4b5cd50122
|
@ -3,14 +3,13 @@
|
||||||
const blake2 = require('blake2');
|
const blake2 = require('blake2');
|
||||||
const knex = require('../knex');
|
const knex = require('../knex');
|
||||||
|
|
||||||
const { ex, ctxa } = require('../utils/q');
|
const qu = require('../utils/qu');
|
||||||
const http = require('../utils/http');
|
|
||||||
|
|
||||||
async function getSiteSlugs() {
|
async function getSiteSlugs() {
|
||||||
return knex('sites')
|
return knex('entities')
|
||||||
.pluck('sites.slug')
|
.pluck('entities.slug')
|
||||||
.join('networks', 'networks.id', 'sites.network_id')
|
.join('entities AS parents', 'parents.id', 'entities.parent_id')
|
||||||
.where('networks.slug', 'perfectgonzo');
|
.where('parents.slug', 'perfectgonzo');
|
||||||
}
|
}
|
||||||
|
|
||||||
function getHash(identifier) {
|
function getHash(identifier) {
|
||||||
|
@ -39,8 +38,10 @@ function extractMaleModelsFromTags(tagContainer) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
async function extractChannelFromPhoto(photo, metaSiteSlugs) {
|
async function extractChannelFromPhoto(photo, channel) {
|
||||||
const siteSlugs = metaSiteSlugs || await getSiteSlugs();
|
const siteSlugs = (channel.type === 'network' ? channel.children : channel.parent?.children)?.map(child => child.slug)
|
||||||
|
|| await getSiteSlugs();
|
||||||
|
|
||||||
const channelMatch = photo.match(new RegExp(siteSlugs.join('|')));
|
const channelMatch = photo.match(new RegExp(siteSlugs.join('|')));
|
||||||
|
|
||||||
if (channelMatch) {
|
if (channelMatch) {
|
||||||
|
@ -50,66 +51,50 @@ async function extractChannelFromPhoto(photo, metaSiteSlugs) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeLatest(html, site) {
|
async function scrapeLatest(scenes, site) {
|
||||||
const siteSlugs = await getSiteSlugs();
|
return scenes.map(({ query }) => {
|
||||||
const { element } = ex(html);
|
const release = {};
|
||||||
|
|
||||||
return ctxa(element, '#content-main .itemm').map(({
|
release.title = query.q('a', 'title');
|
||||||
q, qa, qlength, qdate, qimages,
|
release.url = query.url('a', 'href', { origin: site.url });
|
||||||
}) => {
|
release.date = query.date('.nm-date', 'MM/DD/YYYY');
|
||||||
const release = {
|
|
||||||
site,
|
|
||||||
meta: {
|
|
||||||
siteSlugs,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
const sceneLink = q('a');
|
|
||||||
|
|
||||||
release.title = sceneLink.title;
|
|
||||||
release.url = `${site.url}${sceneLink.href}`;
|
|
||||||
release.date = qdate('.nm-date', 'MM/DD/YYYY');
|
|
||||||
|
|
||||||
const slug = new URL(release.url).pathname.split('/')[2];
|
const slug = new URL(release.url).pathname.split('/')[2];
|
||||||
release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`);
|
release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`);
|
||||||
|
|
||||||
release.actors = release.title.split('&').map(actor => actor.trim());
|
release.actors = release.title.split('&').map(actor => actor.trim());
|
||||||
|
|
||||||
[release.poster, ...release.photos] = qimages('.bloc-link img');
|
[release.poster, ...release.photos] = query.imgs('.bloc-link img');
|
||||||
|
|
||||||
release.tags = qa('.dropdown ul a', true).slice(1);
|
release.tags = query.cnts('.dropdown ul a').slice(1);
|
||||||
release.duration = qlength('.dropdown p:first-child');
|
release.duration = query.duration('.dropdown p:first-child');
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeScene(html, site, url, metaSiteSlugs) {
|
async function scrapeScene({ query }, site, url) {
|
||||||
const {
|
|
||||||
q, qa, qlength, qdate, qposter, qtrailer,
|
|
||||||
} = ex(html);
|
|
||||||
|
|
||||||
const release = { url, site };
|
const release = { url, site };
|
||||||
|
|
||||||
release.title = q('#movie-header h2', true);
|
release.title = query.cnt('#movie-header h2');
|
||||||
release.date = qdate('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
release.date = query.date('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||||
|
|
||||||
release.description = q('.container .mg-md', true);
|
release.description = query.cnt('.container .mg-md');
|
||||||
release.duration = qlength('#video-ribbon .container > div > span:nth-child(3)');
|
release.duration = query.duration('#video-ribbon .container > div > span:nth-child(3)');
|
||||||
|
|
||||||
release.actors = qa('#video-info a', true).concat(extractMaleModelsFromTags(q('.tag-container')));
|
release.actors = query.cnts('#video-info a').concat(extractMaleModelsFromTags(query.q('.tag-container')));
|
||||||
release.tags = qa('.tag-container a', true);
|
release.tags = query.cnts('.tag-container a');
|
||||||
|
|
||||||
const uhd = q('#video-ribbon .container > div > span:nth-child(2)', true);
|
const uhd = query.cnt('#video-ribbon .container > div > span:nth-child(2)');
|
||||||
if (/4K/.test(uhd)) release.tags = release.tags.concat('4k');
|
if (/4K/.test(uhd)) release.tags = release.tags.concat('4k');
|
||||||
|
|
||||||
release.photos = qa('.bxslider_pics img').map(el => el.dataset.original || el.src);
|
release.photos = query.all('.bxslider_pics img').map(el => el.dataset.original || el.src);
|
||||||
release.poster = qposter();
|
release.poster = query.poster();
|
||||||
|
|
||||||
const trailer = qtrailer();
|
const trailer = query.trailer();
|
||||||
if (trailer) release.trailer = { src: trailer };
|
if (trailer) release.trailer = { src: trailer };
|
||||||
|
|
||||||
if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], metaSiteSlugs);
|
if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], site);
|
||||||
|
|
||||||
if (release.channel) {
|
if (release.channel) {
|
||||||
const { pathname } = new URL(url);
|
const { pathname } = new URL(url);
|
||||||
|
@ -124,23 +109,23 @@ async function scrapeScene(html, site, url, metaSiteSlugs) {
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1) {
|
async function fetchLatest(site, page = 1) {
|
||||||
const url = `${site.url}/movies/page-${page}`;
|
const url = `${site.url}/movies/page-${page}`;
|
||||||
const res = await http.get(url);
|
const res = await qu.getAll(url, '#content-main [class^="item"]');
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.ok) {
|
||||||
return scrapeLatest(res.body.toString(), site);
|
return scrapeLatest(res.items, site);
|
||||||
}
|
}
|
||||||
|
|
||||||
return [];
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site, release) {
|
async function fetchScene(url, channel) {
|
||||||
const res = await http.get(url);
|
const res = await qu.get(url);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.ok) {
|
||||||
return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs);
|
return scrapeScene(res.item, channel, url);
|
||||||
}
|
}
|
||||||
|
|
||||||
return [];
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|
|
@ -42,39 +42,6 @@ function getAvatarFallbacks(avatar) {
|
||||||
.flat();
|
.flat();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
async function getTrailerLegacy(scene, site, url) {
|
|
||||||
const qualities = [360, 480, 720, 1080, 2160];
|
|
||||||
|
|
||||||
const tokenRes = await http.post(`${site.url}/api/__record_tknreq`, {
|
|
||||||
file: scene.previewVideoUrl1080P,
|
|
||||||
sizes: qualities.join('+'),
|
|
||||||
type: 'trailer',
|
|
||||||
}, {
|
|
||||||
headers: {
|
|
||||||
referer: url,
|
|
||||||
origin: site.url,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!tokenRes.ok) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`;
|
|
||||||
const trailersRes = await http.post(trailerUrl, null, { headers: { referer: url } });
|
|
||||||
|
|
||||||
if (trailersRes.ok) {
|
|
||||||
return qualities.map(quality => (trailersRes.body[quality] ? {
|
|
||||||
src: trailersRes.body[quality].token,
|
|
||||||
quality,
|
|
||||||
} : null)).filter(Boolean);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
async function getTrailer(scene, channel, url) {
|
async function getTrailer(scene, channel, url) {
|
||||||
const res = await http.post(`${channel.url}/graphql`, {
|
const res = await http.post(`${channel.url}/graphql`, {
|
||||||
operationName: 'getToken',
|
operationName: 'getToken',
|
||||||
|
|
Loading…
Reference in New Issue