From ad73c11cb46c6297c4891c162065f27e422ae937 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Mon, 11 Jan 2021 23:31:33 +0100 Subject: [PATCH] Using date and title for Nubiles entry ID. --- src/scrapers/nubiles.js | 79 +++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/src/scrapers/nubiles.js b/src/scrapers/nubiles.js index 9df9aae0..7d08ad30 100644 --- a/src/scrapers/nubiles.js +++ b/src/scrapers/nubiles.js @@ -1,6 +1,6 @@ 'use strict'; -const { get, geta, ctxa } = require('../utils/q'); +const qu = require('../utils/qu'); const slugify = require('../utils/slugify'); const { heightToCm } = require('../utils/convert'); @@ -10,112 +10,115 @@ const slugUrlMap = { }; async function getPhotos(albumUrl) { - const res = await geta(albumUrl, '.photo-thumb'); + const res = await qu.getAll(albumUrl, '.photo-thumb'); return res.ok - ? res.items.map(({ q }) => q('source').srcset) + ? res.items.map(({ query }) => query.q('source').srcset) : []; } function scrapeAll(scenes, site, origin) { - return scenes.map(({ qu }) => { + return scenes.map(({ query }) => { const release = {}; - release.title = qu.q('.title a', true); + release.title = query.q('.title a', true); - const url = qu.url('.title a').split('?')[0]; - const channelUrl = qu.url('.site-link'); + const url = query.url('.title a').split('?')[0]; + const channelUrl = query.url('.site-link'); if (/^http/.test(url)) { const { pathname } = new URL(url); - release.entryId = pathname.split('/')[3]; + // release.entryId = pathname.split('/')[3]; if (channelUrl) release.url = `${channelUrl}${pathname}`; else release.url = url; } else if (!/\/join/.test(url)) { - release.entryId = url.split('/')[3]; + // release.entryId = url.split('/')[3]; if (channelUrl) release.url = `${channelUrl}${url}`; else if (site?.url) release.url = `${site.url}${url}`; else if (origin) release.url = `${origin}${url}`; } else { - release.entryId = qu.q('a img', 'tube_tour_thumb_id'); + // release.entryId = qu.q('a img', 'tube_tour_thumb_id'); } - release.date = qu.date('.date', 'MMM D, YYYY'); - release.actors = qu.all('.models a.model', true); + release.date = query.date('.date', 'MMM D, YYYY'); + release.actors = query.all('.models a.model', true); - const poster = qu.sourceSet('img', 'data-srcset')?.[0]; + // no reliable entry ID between upcoming and released scenes + release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`; + + const poster = query.sourceSet('img', 'data-srcset')?.[0]; release.poster = poster && [ poster.replace('_640', '_1280'), poster, ]; - release.stars = Number(qu.q('.rating', true)); - release.likes = Number(qu.q('.likes', true)); + release.stars = query.number('.rating'); + release.likes = query.number('.likes'); return release; }); } -async function scrapeScene({ qu }, url, site) { +async function scrapeScene({ query }, url, site) { const release = {}; const { origin, pathname } = new URL(url); release.url = `${origin}${pathname}`; release.entryId = new URL(url).pathname.split('/')[3]; - release.title = qu.q('.content-pane-title h2', true); - release.description = qu.q('.content-pane-column div', true); + release.title = query.q('.content-pane-title h2', true); + release.description = query.q('.content-pane-column div', true); - release.date = qu.q('.date', 'MMM D, YYYY'); + release.date = query.q('.date', 'MMM D, YYYY'); - release.actors = qu.all('.content-pane-performers .model', true); - release.tags = qu.all('.categories a', true); + release.actors = query.all('.content-pane-performers .model', true); + release.tags = query.all('.categories a', true); - release.poster = qu.poster() || qu.img('.fake-video-player img'); - release.trailer = qu.all('source').map(source => ({ + release.poster = query.poster() || query.img('.fake-video-player img'); + release.trailer = query.all('source').map(source => ({ src: source.src, quality: Number(source.getAttribute('res')), })); - release.stars = Number(qu.q('.score', true)); - release.likes = Number(qu.q('#likecount', true)); + release.stars = Number(query.q('.score', true)); + release.likes = Number(query.q('#likecount', true)); - const albumLink = qu.url('.content-pane-related-links a[href*="gallery"]'); + const albumLink = query.url('.content-pane-related-links a[href*="gallery"]'); if (albumLink) release.photos = await getPhotos(`${site.url}${albumLink}`); return release; } -function scrapeProfile({ qu }, _actorName, origin) { +function scrapeProfile({ query }, _actorName, origin) { const profile = {}; - const keys = qu.all('.model-profile h5', true); - const values = qu.all('.model-profile h5 + p', true); + const keys = query.all('.model-profile h5', true); + const values = query.all('.model-profile h5 + p', true); const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {}); profile.age = Number(bio.age); - profile.description = qu.q('.model-bio', true); + profile.description = query.q('.model-bio', true); profile.residencePlace = bio.location; profile.height = heightToCm(bio.height); [profile.bust, profile.waist, profile.hip] = bio.figure.split('-').map(v => Number(v) || v); - profile.avatar = qu.img('.model-profile img'); + profile.avatar = query.img('.model-profile img'); - const releases = qu.all('.content-grid-item').filter(el => /video\//.test(qu.url(el, '.img-wrapper a'))); // filter out photos - profile.releases = scrapeAll(ctxa(releases), null, origin); + const releases = query.all('.content-grid-item').filter(el => /video\//.test(query.url(el, '.img-wrapper a'))); // filter out photos + profile.releases = scrapeAll(query.initAll(releases), null, origin); return profile; } async function fetchLatest(site, page = 1) { const url = `${site.url}/video/gallery/${(page - 1) * 12}`; - const res = await geta(url, '.content-grid-item'); + const res = await qu.getAll(url, '.content-grid-item'); return res.ok ? scrapeAll(res.items, site) : res.status; } @@ -123,7 +126,7 @@ async function fetchLatest(site, page = 1) { async function fetchUpcoming(site) { if (site.parameters?.upcoming) { const url = `${site.url}/video/upcoming`; - const res = await geta(url, '.content-grid-item'); + const res = await qu.getAll(url, '.content-grid-item'); return res.ok ? scrapeAll(res.items, site) : res.status; } @@ -132,7 +135,7 @@ async function fetchUpcoming(site) { } async function fetchScene(url, site) { - const res = await get(url); + const res = await qu.get(url); return res.ok ? scrapeScene(res.item, url, site) : res.status; } @@ -142,7 +145,7 @@ async function fetchProfile({ name: actorName }, { site }) { const origin = slugUrlMap[site.slug] || site.url; const url = `${origin}/model/alpha/${firstLetter}`; - const resModels = await get(url); + const resModels = await qu.get(url); if (!resModels.ok) return resModels.status; @@ -150,7 +153,7 @@ async function fetchProfile({ name: actorName }, { site }) { if (modelPath) { const modelUrl = `${origin}${modelPath}`; - const resModel = await get(modelUrl); + const resModel = await qu.get(modelUrl); return resModel.ok ? scrapeProfile(resModel.item, actorName, origin) : resModel.status; }