From 1e03aa37b7c5c405c78c50c40268460c47d15ced Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Mon, 8 Jul 2024 01:58:11 +0200 Subject: [PATCH] Expanded Naughty America scraper to support Tonight's Girlfriend (and possibly other native sites). --- seeds/02_sites.js | 6 +++- src/scrapers/naughtyamerica.js | 50 +++++++++++++++++++++------------- src/scrapers/scrapers.js | 1 + 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 32f04e3a..96f9da59 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -7237,8 +7237,12 @@ const sites = [ slug: 'tonightsgirlfriend', alias: ['togc', 'tog'], name: "Tonight's Girlfriend", - url: 'https://www.naughtyamerica.com/site/tonight-s-girlfriend', + url: 'https://www.tonightsgirlfriend.com', parent: 'naughtyamerica', + parameters: { + scenes: '/scenes', + useActorUrl: false, + }, }, { slug: 'wivesonvacation', diff --git a/src/scrapers/naughtyamerica.js b/src/scrapers/naughtyamerica.js index 10d3891c..f5d2448e 100755 --- a/src/scrapers/naughtyamerica.js +++ b/src/scrapers/naughtyamerica.js @@ -4,7 +4,8 @@ const unprint = require('unprint'); const slugify = require('../utils/slugify'); -function scrapeLatest(scenes) { +// Naughty America network +function scrapeLatest(scenes, channel) { return scenes.map(({ query }) => { const release = {}; const url = query.url('a'); @@ -12,23 +13,26 @@ function scrapeLatest(scenes) { release.url = url; release.entryId = query.attribute('a', 'data-scene-id') || (url && new URL(url).pathname.match(/-(\d+)$/)?.[1]) || null; - release.date = query.date('.entry-date', 'MMM D, YYYY'); + release.date = query.date('.entry-date, .scene-date', 'MMM D, YYYY'); release.duration = query.duration('.scene-runtime'); - release.actors = query.all('.contain-actors a').map((actorEl) => ({ + release.actors = query.all('.contain-actors a, .scene-actors a').map((actorEl) => ({ name: unprint.query.content(actorEl), - url: unprint.query.url(actorEl, null), + url: channel.parameters?.useActorUrl === false + ? null // actor URL is scene link in native layout + : unprint.query.url(actorEl, null), })); release.poster = [ ...(query.sourceSet('source[data-srcset*="scenes/"][type="image/jpeg"]', 'data-srcset') || []), query.img('.main-scene-img', { attribute: 'data-srcset' }), + query.img('.scene-thumb'), ]; release.tags = query.contents('.flag-bg'); release.qualities = [ - query.exists('//a[contains(@class, "label-four-k") and contains(text(), "4K")]') && 2160, // label-four-k is also used for non-4K tags + query.exists('//a[contains(@class, "label-four-k") and contains(text(), "4K")] | //span[contains(@class, "bug-4k")]') && 2160, // label-four-k is also used for non-4K tags query.exists('//a[contains(@class, "label-hd") and contains(text(), "HD")]') && 720, ].filter(Boolean); @@ -42,20 +46,24 @@ function scrapeScene({ query }, { url }) { const release = {}; release.entryId = new URL(url).pathname.match(/-(\d+)$/)?.[1]; - release.title = query.content('.scene-title'); - release.description = query.text('.synopsis'); + release.title = query.content('.scene-title, .grey-title'); + release.description = query.text('.synopsis, .scene-description'); - release.date = query.date('.entry-date', 'MMM D, YYYY'); + release.date = query.date('.entry-date, .released-date', ['MMM D, YYYY', 'MM/DD/YY']); release.duration = query.duration('.duration'); - release.actors = query.all('.performer-list a').map((actorEl) => ({ - name: unprint.query.content(actorEl), - url: unprint.query.url(actorEl, null), - })); + release.actors = query.exists('.performer-list') + ? query.all('.performer-list a, .grey-performers a').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null), + })) + : query.content('.grey-performers')?.split(',').map((actorName) => actorName.trim()); release.poster = [ - ...(query.sourceSet('source[data-srcset*="scenes/"][type="image/jpeg"]', 'data-srcset') || []), + ...(query.sourceSet('.play-trailer source[data-srcset*="scenes/"][type="image/jpeg"]', 'data-srcset') || []), + ...(query.sourceSet('.scenepage-video source[srcset*="scenes/"][type="image/jpeg"]', 'srcset') || []), query.img('.play-trailer img[data-srcset*="scenes/"]', { attribute: 'data-srcset' }), + query.img('.scenepage-video .playcard'), ]; release.photos = query.els('.contain-scene-images.desktop-only .scene-image').map((imgEl) => [ @@ -77,7 +85,7 @@ function scrapeScene({ query }, { url }) { release.channel = slugify(query.content('.site-title'), ''); - release.tags = query.contents('.categories a'); + release.tags = query.contents('.categories a, .category a'); release.qualities = [ query.exists('//a[contains(@class, "label-four-k") and contains(text(), "4K")]') && 2160, // label-four-k is also used for non-4K tags @@ -92,19 +100,23 @@ async function scrapeProfile({ query }) { const profile = {}; profile.description = query.content('.bio_about_text'); - profile.avatar = query.img('img.performer-pic'); + profile.avatar = query.img('img.performer-pic, img.performer-img, img.peformer-img'); // sic peformer return profile; } async function fetchLatest(site, page = 1) { - const res = await unprint.get(`${site.url}?page=${page}`, { selectAll: '.site-list .scene-item' }); + const res = await unprint.get(`${site.url}${site.parameters?.scenes || ''}?page=${page}`, { selectAll: '.site-list .scene-item, .panel-body' }); - return scrapeLatest(res.context, site); + if (res.ok) { + return scrapeLatest(res.context, site); + } + + return res.status; } -async function fetchProfile({ slug }) { - const res = await unprint.get(`https://www.naughtyamerica.com/pornstar/${slug}`, { select: '.bio-info' }); +async function fetchProfile({ slug }, { channel }) { + const res = await unprint.get(`${channel.url}/pornstar/${slug}`, { select: '.bio-info, .performer-details' }); if (res.ok) { return scrapeProfile(res.context); diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 86a35bcb..466a35eb 100755 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -266,6 +266,7 @@ const scrapers = { mylf: teamskeet, mugfucked: fullpornnetwork, naughtyamerica, + tonightsgirlfriend: naughtyamerica, nebraskacoeds: elevatedx, nfbusty: nubiles, nubilefilms: nubiles,