From 4b5cd50122f6b66b6d43db51be0cb8b9b2cb904f Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Mon, 1 Feb 2021 20:49:08 +0100 Subject: [PATCH] Fixed slug lookup in Perfect Gonzo scraper. --- src/scrapers/perfectgonzo.js | 93 +++++++++++++++--------------------- src/scrapers/vixen.js | 33 ------------- 2 files changed, 39 insertions(+), 87 deletions(-) diff --git a/src/scrapers/perfectgonzo.js b/src/scrapers/perfectgonzo.js index e77a3bb0..4564bfc6 100644 --- a/src/scrapers/perfectgonzo.js +++ b/src/scrapers/perfectgonzo.js @@ -3,14 +3,13 @@ const blake2 = require('blake2'); const knex = require('../knex'); -const { ex, ctxa } = require('../utils/q'); -const http = require('../utils/http'); +const qu = require('../utils/qu'); async function getSiteSlugs() { - return knex('sites') - .pluck('sites.slug') - .join('networks', 'networks.id', 'sites.network_id') - .where('networks.slug', 'perfectgonzo'); + return knex('entities') + .pluck('entities.slug') + .join('entities AS parents', 'parents.id', 'entities.parent_id') + .where('parents.slug', 'perfectgonzo'); } function getHash(identifier) { @@ -39,8 +38,10 @@ function extractMaleModelsFromTags(tagContainer) { return []; } -async function extractChannelFromPhoto(photo, metaSiteSlugs) { - const siteSlugs = metaSiteSlugs || await getSiteSlugs(); +async function extractChannelFromPhoto(photo, channel) { + const siteSlugs = (channel.type === 'network' ? channel.children : channel.parent?.children)?.map(child => child.slug) + || await getSiteSlugs(); + const channelMatch = photo.match(new RegExp(siteSlugs.join('|'))); if (channelMatch) { @@ -50,66 +51,50 @@ async function extractChannelFromPhoto(photo, metaSiteSlugs) { return null; } -async function scrapeLatest(html, site) { - const siteSlugs = await getSiteSlugs(); - const { element } = ex(html); +async function scrapeLatest(scenes, site) { + return scenes.map(({ query }) => { + const release = {}; - return ctxa(element, '#content-main .itemm').map(({ - q, qa, qlength, qdate, qimages, - }) => { - const release = { - site, - meta: { - siteSlugs, - }, - }; - - const sceneLink = q('a'); - - release.title = sceneLink.title; - release.url = `${site.url}${sceneLink.href}`; - release.date = qdate('.nm-date', 'MM/DD/YYYY'); + release.title = query.q('a', 'title'); + release.url = query.url('a', 'href', { origin: site.url }); + release.date = query.date('.nm-date', 'MM/DD/YYYY'); const slug = new URL(release.url).pathname.split('/')[2]; release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`); release.actors = release.title.split('&').map(actor => actor.trim()); - [release.poster, ...release.photos] = qimages('.bloc-link img'); + [release.poster, ...release.photos] = query.imgs('.bloc-link img'); - release.tags = qa('.dropdown ul a', true).slice(1); - release.duration = qlength('.dropdown p:first-child'); + release.tags = query.cnts('.dropdown ul a').slice(1); + release.duration = query.duration('.dropdown p:first-child'); return release; }); } -async function scrapeScene(html, site, url, metaSiteSlugs) { - const { - q, qa, qlength, qdate, qposter, qtrailer, - } = ex(html); - +async function scrapeScene({ query }, site, url) { const release = { url, site }; - release.title = q('#movie-header h2', true); - release.date = qdate('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/); + release.title = query.cnt('#movie-header h2'); + release.date = query.date('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/); - release.description = q('.container .mg-md', true); - release.duration = qlength('#video-ribbon .container > div > span:nth-child(3)'); + release.description = query.cnt('.container .mg-md'); + release.duration = query.duration('#video-ribbon .container > div > span:nth-child(3)'); - release.actors = qa('#video-info a', true).concat(extractMaleModelsFromTags(q('.tag-container'))); - release.tags = qa('.tag-container a', true); + release.actors = query.cnts('#video-info a').concat(extractMaleModelsFromTags(query.q('.tag-container'))); + release.tags = query.cnts('.tag-container a'); - const uhd = q('#video-ribbon .container > div > span:nth-child(2)', true); + const uhd = query.cnt('#video-ribbon .container > div > span:nth-child(2)'); if (/4K/.test(uhd)) release.tags = release.tags.concat('4k'); - release.photos = qa('.bxslider_pics img').map(el => el.dataset.original || el.src); - release.poster = qposter(); + release.photos = query.all('.bxslider_pics img').map(el => el.dataset.original || el.src); + release.poster = query.poster(); - const trailer = qtrailer(); + const trailer = query.trailer(); if (trailer) release.trailer = { src: trailer }; - if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], metaSiteSlugs); + if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], site); if (release.channel) { const { pathname } = new URL(url); @@ -124,23 +109,23 @@ async function scrapeScene(html, site, url, metaSiteSlugs) { async function fetchLatest(site, page = 1) { const url = `${site.url}/movies/page-${page}`; - const res = await http.get(url); + const res = await qu.getAll(url, '#content-main [class^="item"]'); - if (res.statusCode === 200) { - return scrapeLatest(res.body.toString(), site); + if (res.ok) { + return scrapeLatest(res.items, site); } - return []; + return res.status; } -async function fetchScene(url, site, release) { - const res = await http.get(url); +async function fetchScene(url, channel) { + const res = await qu.get(url); - if (res.statusCode === 200) { - return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs); + if (res.ok) { + return scrapeScene(res.item, channel, url); } - return []; + return res.status; } module.exports = { diff --git a/src/scrapers/vixen.js b/src/scrapers/vixen.js index 362ede04..b4cc76f9 100644 --- a/src/scrapers/vixen.js +++ b/src/scrapers/vixen.js @@ -42,39 +42,6 @@ function getAvatarFallbacks(avatar) { .flat(); } -/* -async function getTrailerLegacy(scene, site, url) { - const qualities = [360, 480, 720, 1080, 2160]; - - const tokenRes = await http.post(`${site.url}/api/__record_tknreq`, { - file: scene.previewVideoUrl1080P, - sizes: qualities.join('+'), - type: 'trailer', - }, { - headers: { - referer: url, - origin: site.url, - }, - }); - - if (!tokenRes.ok) { - return null; - } - - const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`; - const trailersRes = await http.post(trailerUrl, null, { headers: { referer: url } }); - - if (trailersRes.ok) { - return qualities.map(quality => (trailersRes.body[quality] ? { - src: trailersRes.body[quality].token, - quality, - } : null)).filter(Boolean); - } - - return null; -} -*/ - async function getTrailer(scene, channel, url) { const res = await http.post(`${channel.url}/graphql`, { operationName: 'getToken',