diff --git a/src/scrapers/pierrewoodman.js b/src/scrapers/pierrewoodman.js index fe7a9b49..9cd28ab7 100755 --- a/src/scrapers/pierrewoodman.js +++ b/src/scrapers/pierrewoodman.js @@ -1,154 +1,165 @@ 'use strict'; -const qu = require('../utils/qu'); +const unprint = require('unprint'); + const slugify = require('../utils/slugify'); const capitalize = require('../utils/capitalize'); -function removeImageBorder(source) { - if (!source) { - return null; - } - - const sourceNoId = source.replace(/_[a-z0-9]+(_v\d)?\.jpg/, '.jpg'); - - return [ - sourceNoId - .replace(/actoravatar_/, 'actoravatarnoborder_') - .replace(/scenedefault/, 'scenenoborder'), - sourceNoId, - source, - ]; -} - -function mapActor(actorEl, query, entity) { - const avatar = query.img(actorEl); - - return { - name: capitalize(query.cnt(actorEl, '.name, .informations p'), { uncapitalize: true }), - url: query.url(actorEl, null, 'href', { origin: entity.url }), - avatar: removeImageBorder(avatar), - }; -} - function scrapeAll(scenes, channel, discard = true) { - return scenes.map(({ query, el }) => { + return scenes.reduce((acc, { query, element }) => { const release = {}; - release.url = query.url(el, null, 'href', { origin: channel.url }); + release.url = unprint.query.url(element, null, { attribute: 'href', origin: channel.url }); + const { hostname, pathname } = new URL(release.url); release.entryId = pathname.match(/_(\d+)/)?.[1]; release.channel = hostname.match(/(\w+)\.com/)?.[1]; if (discard && release.channel !== channel.slug) { - return null; + acc.unextracted.concat(release); + + return acc; } release.title = query.content('.title, .informations h3'); - release.duration = query.duration('.duration, .timer'); + release.duration = query.duration('.duration, .timer, .infos'); - release.actors = query.cnt('.sub')?.split(/,\s*/); + release.actors = query.content('.sub')?.split(/,\s*/); - release.poster = removeImageBorder(query.img('.thumb, picture img')); + release.poster = query.img('.thumb, picture img'); - return release; - }).filter(Boolean); + acc.scenes.concat(release); + + return acc; + }, { + scenes: [], + unextracted: [], + }); } -function scrapeScene({ query, html }, url, entity) { +async function fetchLatest(channel, page) { + const res = await unprint.get(channel.parameters?.latest + ? `${channel.parameters.latest}?page=${page}` + : `${channel.url}/videos?page=${page}`, { selectAll: '.items .scene' }); + + if (res.ok) { + return scrapeAll(res.context, channel); + } + + return res.status; +} + +function scrapeScene({ query, html }, { url, entity }) { const release = {}; release.entryId = new URL(url).pathname.match(/_(\d+)/)?.[1]; - const title = query.cnt('.page_title h1, h2'); + const title = query.content('.page_title h1, h2'); const wunfTitle = title.match(/wunf \d+/i)?.[0]; release.title = wunfTitle ? wunfTitle.toUpperCase() : title; - release.description = query.cnt('.info_container .description'); + release.description = query.content('.info_container .description'); - release.date = query.date('.info_container .info_line:nth-child(1)', 'YYYY-MM-DD') || query.date('.description', 'DD MMMM YYYY', /\d{1,2} \w+ \d{4}/); - release.actors = query.all('.girl_item, .starring .item').map((actorEl) => mapActor(actorEl, query, entity)); + release.date = query.date('.info_container .info_line:nth-child(1)', 'YYYY-MM-DD') + || query.date('.description', 'D MMMM YYYY', { match: /\d{1,2} \w+ \d{4}/ }); + + release.actors = query.all('.girl_item, .starring .item').map((actorEl) => { + const avatar = unprint.query.img(actorEl); + + return { + name: capitalize(unprint.query.content(actorEl, '.name, .informations p'), { uncapitalize: true }), + url: unprint.query.url(actorEl, null, { origin: entity.url }), + avatar, + }; + }); release.duration = query.duration('.infos .description'); if (!release.duration) { - const duration = query.cnt('.info_container .info_line:nth-child(2)'); + const duration = query.content('.info_container .info_line:nth-child(2)'); release.duration = (duration.match(/(\d+) hour/)?.[1] || 0) * 3600 + (duration.match(/(\d+) minutes/)?.[1] || 0) * 60; } - release.tags = query.cnts('.tags a:not(.more_tag)'); - release.poster = removeImageBorder(html.match(/image: "(.*?)"/)?.[1]); + release.tags = query.contents('.tags a:not(.more_tag)'); + release.poster = html.match(/image: "(.*?)"/)?.[1]; release.trailer = html.match(/url: "(.*mp4.*)"/g)?.map((src) => ({ src: src.match(/"(.*)"/)?.[1], quality: Number(src.match(/[-/](\d+)p/)?.[1]), })); + if (query.exists('.download-icon-4k')) { + release.qualities = [2160]; + } + return release; } function scrapeProfile({ query }, entity) { const profile = {}; - profile.avatar = removeImageBorder(query.img('.actor img')); - profile.nationality = query.cnt('.nationality, .nationnality'); // sic + profile.avatar = query.img('.actor img'); + profile.nationality = query.content('.nationality, .nationnality'); // sic - profile.scenes = scrapeAll(qu.initAll(query.all('.videos .item')), entity, false); + profile.scenes = scrapeAll(unprint.initAll(query.all('.videos .item')), entity, false); return profile; } -async function fetchLatest(channel, page) { - const res = await qu.getAll(channel.parameters?.latest - ? `${channel.parameters.latest}?page=${page}` - : `${channel.url}/videos?page=${page}`, '.items .scene'); - - if (res.ok) { - return scrapeAll(res.items, channel); +async function getActorUrl(actor) { + if (actor.url) { + return actor.url; } - return res.status; -} - -async function fetchProfile(baseActor, entity) { - const res = await qu.get('https://www.woodmancastingx.com'); + // Wake up'n Fuck has higher quality images, but not all performers are available, vice versa they are + const res = await unprint.get('https://www.woodmancastingx.com'); if (!res.ok) { return res.status; } - const searchUrl = qu.prefixUrl(res.html.match(/"(.*searchCompletion\.js)"/)?.[1], 'https://www.woodmancastingx.com'); + const searchUrl = unprint.prefixUrl(res.context.html.match(/"(.*searchCompletion\.js)"/)?.[1], 'https://www.woodmancastingx.com'); if (!searchUrl) { return null; } - const searchRes = await qu.get(searchUrl, null, null, { decodeJSON: true }); + const searchRes = await unprint.get(searchUrl); if (!searchRes.ok) { return searchRes.status; } - const [actorId] = searchRes.body.actors.find(([_actorId, actorName]) => slugify(actorName) === baseActor.slug) || []; + const [actorId] = searchRes.data.actors.find(([_actorId, actorName]) => slugify(actorName) === actor.slug) || []; if (!actorId) { return null; } - const actorRes = await qu.get(`https://www.woodmancastingx.com/search/redirection/actors/${actorId}`); + return `https://www.woodmancastingx.com/search/redirection/actors/${actorId}`; +} - if (actorRes.ok) { - return scrapeProfile(actorRes.item, entity); +async function fetchProfile(actor, entity) { + const actorUrl = await getActorUrl(actor); + + if (typeof actorUrl !== 'string') { + return actorUrl; } - return actorRes.status; + const res = await unprint.get(actorUrl); + + if (res.ok) { + return scrapeProfile(res.context, entity); + } + + return res.status; } module.exports = { fetchLatest, scrapeScene, fetchProfile, - deprecated: true, }; diff --git a/src/scrapers/score.js b/src/scrapers/score.js index 3861d2f9..876469a6 100755 --- a/src/scrapers/score.js +++ b/src/scrapers/score.js @@ -24,7 +24,7 @@ function resizeSrc(src) { function deriveDate(query) { const now = new Date(); - // Nov. 12th + // Nov. 2025 const dateMY = query.date('.i-date', 'MMM. YYYY', { match: /(\w+\.? \d{4})/ }); if (dateMY) { @@ -34,6 +34,7 @@ function deriveDate(query) { }; } + // Nov. 12th const dateMDo = query.date('.i-date', 'MMM. Do', { match: /(\w+\.? \d{1,2}\w+)/ }); if (dateMDo) { @@ -47,6 +48,7 @@ function deriveDate(query) { }; } + // 8 Weeks Ago const dateAgo = query.dateAgo('.i-date'); if (dateAgo) { diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 94338d1c..5d2f1b0f 100755 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -279,7 +279,7 @@ const scrapers = { wankzvr, tranzvr: wankzvr, milfvr: wankzvr, - // nubilus + // nubiles anilos: nubiles, brattysis: nubiles, deeplush: nubiles, @@ -298,6 +298,9 @@ const scrapers = { aziani, '2poles1hole': aziani, creampiled: aziani, + // woodman + pierrewoodman, + wakeupnfuck: pierrewoodman, // etc '18vr': badoink, theflourishxxx: theflourish, @@ -362,7 +365,6 @@ const scrapers = { pervcity, dpdiva: pervcity, pervertgallery: fullpornnetwork, - pierrewoodman, porncz, pornhub, pornworld, diff --git a/tests/profiles.js b/tests/profiles.js index a1d4df2d..83313e04 100644 --- a/tests/profiles.js +++ b/tests/profiles.js @@ -176,6 +176,7 @@ const actors = [ { entity: 'sexlikereal', name: 'Agatha Vega', fields: ['avatar', 'birthPlace', 'height', 'weight', 'description'] }, { entity: 'porncz', name: 'Kama Oxi', fields: ['avatar', 'gender', 'birthCountry', 'ethnicity', 'age', 'hairColor', 'cup', 'naturalBoobs', 'hasTattoos'] }, { entity: 'score', name: 'Vanessa Blue', fields: ['avatar', 'gender', 'placeOfResidence', 'ethnicity', 'height', 'weight', 'measurements', 'hairColor', 'dateOfBirth'] }, + { entity: 'pierrewoodman', name: 'Makayla Cox', fields: ['avatar', 'nationality'] }, ]; const actorScrapers = scrapers.actors;