From b679ae95f2130a9cb4df7c0bf6098ab5ee050c77 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Mon, 18 Nov 2024 23:58:55 +0100 Subject: [PATCH] Moved LetsDoeIt to Aylo, updated PornDoe scraper. --- seeds/01_networks.js | 4 ++-- seeds/02_sites.js | 37 ++++++++++++++++++++----------------- src/scrapers/aylo.js | 4 ++-- src/scrapers/porndoe.js | 37 ++++++++++++++++++++----------------- src/scrapers/scrapers.js | 2 -- 5 files changed, 44 insertions(+), 40 deletions(-) diff --git a/seeds/01_networks.js b/seeds/01_networks.js index 267a0678..5eed8870 100755 --- a/seeds/01_networks.js +++ b/seeds/01_networks.js @@ -464,8 +464,8 @@ const networks = [ { slug: 'letsdoeit', name: 'LetsDoeIt', - url: 'https://letsdoeit.com', - parent: 'porndoe', + url: 'https://www.letsdoeit.com', + parent: 'aylo', }, { slug: 'littlecapricedreams', diff --git a/seeds/02_sites.js b/seeds/02_sites.js index ba950ae5..4db294f0 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -12691,111 +12691,114 @@ const sites = [ { name: 'The White Boxxx', slug: 'thewhiteboxxx', - url: 'https://letsdoeit.com/channels/the-white-boxxx.en.html', + url: 'https://letsdoeit.com/sites/the-white-boxxx', tags: ['glamcore'], parent: 'letsdoeit', }, { name: 'A Girl Knows', slug: 'agirlknows', - url: 'https://letsdoeit.com/channels/a-girl-knows.en.html', + url: 'https://letsdoeit.com/sites/a-girl-knows', tags: ['lesbian'], parent: 'letsdoeit', }, { name: 'Her Limit', slug: 'herlimit', - url: 'https://letsdoeit.com/channels/her-limit.en.html', + url: 'https://letsdoeit.com/sites/her-limit', tags: ['rough'], parent: 'letsdoeit', + parameters: { + siteId: 118091, + }, }, { name: 'Horny Hostel', slug: 'hornyhostel', - url: 'https://letsdoeit.com/channels/horny-hostel.en.html', + url: 'https://letsdoeit.com/sites/horny-hostel', parent: 'letsdoeit', }, { name: 'xChimera', slug: 'xchimera', - url: 'https://letsdoeit.com/channels/xchimera.en.html', + url: 'https://letsdoeit.com/sites/xchimera', parent: 'letsdoeit', }, { name: 'Scam Angels', slug: 'scamangels', - url: 'https://letsdoeit.com/channels/scam-angels.en.html', + url: 'https://letsdoeit.com/sites/scam-angels', parent: 'letsdoeit', }, { name: 'Bitches Abroad', slug: 'bitchesabroad', - url: 'https://letsdoeit.com/channels/bitches-abroad.en.html', + url: 'https://letsdoeit.com/sites/bitches-abroad', parent: 'letsdoeit', }, { name: 'Porno Academie', slug: 'pornoacademie', - url: 'https://letsdoeit.com/channels/porno-academie.en.html', + url: 'https://letsdoeit.com/sites/porno-academie', parent: 'letsdoeit', }, { name: 'Bums Bus', slug: 'bumsbus', - url: 'https://letsdoeit.com/channels/bums-bus.en.html', + url: 'https://letsdoeit.com/sites/bums-bus', tags: ['van'], parent: 'letsdoeit', }, { name: 'XXX Shades', slug: 'xxxshades', - url: 'https://letsdoeit.com/channels/xxx-shades.en.html', + url: 'https://letsdoeit.com/sites/xxx-shades', parent: 'letsdoeit', }, { name: 'Doe Projects', slug: 'doeprojects', - url: 'https://letsdoeit.com/channels/doe-projects.en.html', + url: 'https://letsdoeit.com/sites/doe-projects', parent: 'letsdoeit', }, { name: 'Relaxxxed', slug: 'relaxxxed', - url: 'https://letsdoeit.com/channels/relaxxxed.en.html', + url: 'https://letsdoeit.com/sites/relaxxxed', tags: ['van'], parent: 'letsdoeit', }, { name: 'Kinky Inlaws', slug: 'kinkyinlaws', - url: 'https://letsdoeit.com/channels/kinky-inlaws.en.html', + url: 'https://letsdoeit.com/sites/kinky-inlaws', tags: ['family'], parent: 'letsdoeit', }, { name: 'My Naughty Album', slug: 'mynaughtyalbum', - url: 'https://letsdoeit.com/channels/my-naughty-album.en.html', + url: 'https://letsdoeit.com/sites/my-naughty-album', parent: 'letsdoeit', }, { name: 'Bums Buero', slug: 'bumsbuero', - url: 'https://letsdoeit.com/channels/bums-buero.en.html', + url: 'https://letsdoeit.com/sites/bums-buero', tags: ['office'], parent: 'letsdoeit', }, { name: 'Quest For Orgasm', slug: 'questfororgasm', - url: 'https://letsdoeit.com/channels/quest-for-orgasm.en.html', + url: 'https://letsdoeit.com/sites/quest-for-orgasm', tags: ['solo'], parent: 'letsdoeit', }, { name: 'Bums Besuch', slug: 'bumsbesuch', - url: 'https://letsdoeit.com/channels/bums-besuch.en.html', + url: 'https://letsdoeit.com/sites/bums-besuch', parent: 'letsdoeit', }, // LITTLE CAPRICE diff --git a/src/scrapers/aylo.js b/src/scrapers/aylo.js index 4215fc2f..850ddf08 100755 --- a/src/scrapers/aylo.js +++ b/src/scrapers/aylo.js @@ -204,7 +204,7 @@ function getUrl(site) { return `${site.parent.url}/scenes?site=${site.parameters.siteId}`; } - throw new Error(`Mind Geek site '${site.name}' (${site.url}) not supported`); + throw new Error(`Aylo site '${site.name}' (${site.url}) not supported`); } async function getSession(site, parameters, url) { @@ -241,7 +241,7 @@ async function getSession(site, parameters, url) { } } - throw new Error(`Failed to acquire MindGeek session (${res.statusCode})`); + throw new Error(`Failed to acquire Aylo session (${res.statusCode})`); } async function fetchLatest(site, page = 1, options) { diff --git a/src/scrapers/porndoe.js b/src/scrapers/porndoe.js index 884aa072..ded163c9 100755 --- a/src/scrapers/porndoe.js +++ b/src/scrapers/porndoe.js @@ -1,6 +1,7 @@ 'use strict'; -const qu = require('../utils/qu'); +const unprint = require('unprint'); + const slugify = require('../utils/slugify'); function scrapeAll(scenes) { @@ -43,14 +44,15 @@ function scrapeScene({ query }, url) { release.description = query.meta('name=description') || query.q('read-even-more', true); - release.date = query.date('.h5-published', 'MMM DD, YYYY', /\w{3} \d{1,2}, \d{4}/); - release.actors = query.all('.video-top-details .actors a[href*="/models"]').map((el) => ({ + release.date = query.date('.-mvd-grid-stats', 'MMM DD, YYYY', /\w{3} \d{1,2}, \d{4}/); + + release.actors = query.all('.video-top-details a[href*="/models"]').map((el) => ({ name: query.cnt(el), url: query.url(el, null), })); release.duration = query.dur('meta[itemprop="duration"]', null, 'content'); - release.tags = query.all('.video-top-details a[href*="/categories"], .video-top-details a[href*="/tags"]', true); + release.tags = query.all('.-vpta-bottom a[href*="/categories"], .-vpta-bottom a[href*="/tags"]', true); release.poster = query.img('.poster img') || query.meta('itemprop=thumbnailUrl'); release.photos = query.imgs('#gallery-thumbs [class*="thumb"]', 'data-bg').slice(1).map((photo) => [ // first image is poster @@ -60,17 +62,17 @@ function scrapeScene({ query }, url) { release.trailer = query.meta('itemprop=contentURL'); - release.channel = slugify(query.q('.video-top-details .actors a[href*="/channels"] strong', true), ''); + release.channel = slugify(query.q('.video-top-details a[href*="/channels"]', true), ''); return release; } async function fetchActorReleases({ query }, url, page = 1, accReleases = []) { - const releases = scrapeAll(qu.initAll(query.all('.main-content .card-video, .main-content .global-video-card'))); + const releases = scrapeAll(unprint.initAll(query.all('.main-content .card-video, .main-content .global-video-card'))); const hasNextPage = query.exists('.pages a.active + a'); if (hasNextPage) { - const res = await qu.get(`${url}?page=${page + 1}`); + const res = await unprint.get(`${url}?page=${page + 1}`); if (res.ok) { return fetchActorReleases(res.item, url, page + 1, accReleases.concat(releases)); @@ -85,10 +87,10 @@ async function scrapeProfile({ query }, url, include) { const bio = query.all('[class*="list"] [class*="list-item"]').reduce((acc, el) => ({ ...acc, - [slugify(query.text(el), '_')]: query.cnt(el, 'span'), + [slugify(unprint.query.text(el), '_')]: unprint.query.content(el, 'span'), }), {}); - const tags = query.cnts(` + const tags = query.contents(` [class$="description"] [class*="more-less"] a[href*="/categories"], [class$="description"] [class*="more-less"] a[href*="/tags"], [class*="seo-text"] a[href*="/categories"], @@ -97,8 +99,9 @@ async function scrapeProfile({ query }, url, include) { profile.nationality = bio.nationality; profile.placeOfBirth = bio.birth_place; + profile.age = unprint.extractNumber(bio.age); - profile.dateOfBirth = qu.extractDate(bio.birth_date, 'MMM D, YYYY'); + profile.dateOfBirth = unprint.extractDate(bio.birth_date, 'MMM D, YYYY'); if (/enhanced/i.test(bio.tits_type)) profile.naturalBoobs = false; if (/natural/i.test(bio.tits_type)) profile.naturalBoobs = true; @@ -120,22 +123,22 @@ async function scrapeProfile({ query }, url, include) { } async function fetchLatest(channel, page = 1) { - const res = await qu.getAll(`${channel.url}?page=${page}`, '.main-content .card-video, .main-content .global-video-card'); + const res = await unprint.get(`${channel.url}?page=${page}`, { selectAll: '.main-content .card-video, .main-content .global-video-card' }); - return res.ok ? scrapeAll(res.items, channel) : res.status; + return res.ok ? scrapeAll(res.context, channel) : res.status; } async function fetchScene(url, channel) { - const res = await qu.get(url, '.main-content'); + const res = await unprint.get(url, { select: '.main-content' }); - return res.ok ? scrapeScene(res.item, url, channel) : res.status; + return res.ok ? scrapeScene(res.context, url, channel) : res.status; } async function fetchProfile({ name: actorName }, entity, include) { - const url = `http://letsdoeit.com/models/${slugify(actorName)}.en.html`; - const res = await qu.get(url); + const url = `${entity.url}/models/${slugify(actorName)}.en.html`; + const res = await unprint.get(url); - return res.ok ? scrapeProfile(res.item, url, include) : res.status; + return res.ok ? scrapeProfile(res.context, url, include) : res.status; } module.exports = { diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 0e8ce818..8898fb56 100755 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -137,7 +137,6 @@ const scrapers = { kinkvr: badoink, // analvids, analvids: pornbox, - letsdoeit: porndoe, littlecapricedreams, loveherfilms, mamacitaz: porndoe, @@ -270,7 +269,6 @@ const scrapers = { shelovesblack: loveherfilms, // analvids, analvids: pornbox, - letsdoeit: porndoe, littlecapricedreams, mamacitaz: porndoe, men: aylo,