From b0b8295629df7b8275c18c27a56ad42091b7a2f8 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 21 Jan 2026 23:00:28 +0100 Subject: [PATCH] Refactored Love Her Films, using API. Fixed Naughty America scraper for Tonight's Girlfriend. --- seeds/01_networks.js | 3 + seeds/02_sites.js | 10 ++ src/scrapers/actors.js | 14 +- src/scrapers/loveherfilms.js | 237 +++++++++++++++++++++------------ src/scrapers/naughtyamerica.js | 8 +- src/utils/convert.js | 2 +- tests/profiles.js | 10 +- 7 files changed, 184 insertions(+), 100 deletions(-) diff --git a/seeds/01_networks.js b/seeds/01_networks.js index 9c979568..df6cd38a 100755 --- a/seeds/01_networks.js +++ b/seeds/01_networks.js @@ -526,6 +526,9 @@ const networks = [ slug: 'loveherfilms', name: 'Love Her Films', url: 'https://www.loveherfilms.com', + parameters: { + xSiteId: '677d3f422e587cf94d1a9e5d', + }, }, { slug: 'mamacitaz', diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 191780aa..df6dd288 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -7006,12 +7006,18 @@ const sites = [ url: 'https://www.loveherfeet.com', tags: ['feet'], parent: 'loveherfilms', + parameters: { + xSiteId: '677d3f422e587cf94d1a9e5a', + }, }, { name: 'Love Her Boobs', slug: 'loveherboobs', url: 'https://www.loveherboobs.com', parent: 'loveherfilms', + parameters: { + xSiteId: '677d3f422e587cf94d1a9e5b', + }, }, { name: 'She Loves Black', @@ -7019,6 +7025,9 @@ const sites = [ url: 'https://www.shelovesblack.com', tags: ['interracial', 'pov'], parent: 'loveherfilms', + parameters: { + xSiteId: '677d3f422e587cf94d1a9e5c', + }, }, // MAMACITAZ { @@ -8458,6 +8467,7 @@ const sites = [ name: "Tonight's Girlfriend", url: 'https://www.tonightsgirlfriend.com', parent: 'naughtyamerica', + independent: true, parameters: { scenes: '/scenes', useActorUrl: false, diff --git a/src/scrapers/actors.js b/src/scrapers/actors.js index de68d154..e3844d06 100644 --- a/src/scrapers/actors.js +++ b/src/scrapers/actors.js @@ -177,7 +177,14 @@ module.exports = { // naughty america naughtyamerica, tonightsgirlfriend: naughtyamerica, - // full porn network + // kink + kink, + kinkmen: kink, + // love her films + loveherfilms, + loveherfeet: loveherfilms, + loveherboobs: loveherfilms, + shelovesblack: loveherfilms, // etc '18vr': badoink, theflourishxxx: theflourish, @@ -212,11 +219,6 @@ module.exports = { karups, kellymadison, '8kmembers': kellymadison, - kink, - kinkmen: kink, - loveherfilms, - loveherfeet: loveherfilms, - shelovesblack: loveherfilms, // analvids, analvids: pornbox, littlecapricedreams, diff --git a/src/scrapers/loveherfilms.js b/src/scrapers/loveherfilms.js index 9a8d2984..89078338 100755 --- a/src/scrapers/loveherfilms.js +++ b/src/scrapers/loveherfilms.js @@ -1,91 +1,148 @@ 'use strict'; -const qu = require('../utils/q'); +const unprint = require('unprint'); + const slugify = require('../utils/slugify'); -const { feetInchesToCm, lbsToKg, femaleFeetUsToEu } = require('../utils/convert'); -const { getImageWithFallbacks } = require('./elevatedx'); +const { convert } = require('../utils/convert'); +// const { getImageWithFallbacks } = require('./elevatedx'); -function composeEntryId(release) { - return `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${release.actors.map((actor) => slugify(actor)).join('-')}`; -} - -function scrapeAll(scenes, channel) { - return scenes.map(({ query }) => { - const release = {}; - - release.url = query.url('a.item-video-overlay, a.item-episode-overlay'); - release.title = query.cnt('.item-title'); - - release.date = query.date('.video-date', 'MMM D, YYYY'); - release.duration = query.duration('.video-time'); - release.actors = query.contents('.information a[href*="models/"]').map((actor) => actor.match(/[a-z ]+/i)?.[0].trim()).filter(Boolean); - - release.poster = getImageWithFallbacks(query.q, 'img.mainThumb', channel); - release.teaser = query.video('img.mainThumb', 'data-vid'); - - release.entryId = composeEntryId(release); - - return release; - }); -} - -function scrapeScene({ query }, url, channel) { - const release = {}; - - release.title = query.content('h1.title'); - release.description = query.content('p.description'); - - release.date = query.date('.date', 'MMM D, YYYY'); - release.duration = query.duration('.time'); - - release.actors = query.contents('.featured .model'); - - const poster = query.img('.video video', 'poster', { origin: channel.url }); - - if (poster) { - release.poster = [ - poster.replace('-4x', '-2x'), // 4x appears to be upscaled beyond original resolution - poster.replace('-4x', '-1x'), - poster, - ]; +function curateSources(item) { + if (!item) { + return null; } - release.photos = query.imgs('.photos .thumbs[data-src]'); - release.trailer = query.video('.video source'); + if (item.sources) { + return item.sources + .toSorted((sourceA, sourceB) => sourceB.resolution - sourceA.resolution) + .map((source) => source.path); + } - release.tags = query.contents('.video-tags a[href*="categories/"]'); + return item.previewImage; +} - release.entryId = composeEntryId(release); +function scrapeScene(data, channel) { + const release = {}; + + release.entryId = data.slug?.toLowerCase(); // _id can't be used for API lookup + release.url = data.slug && `${channel.origin}/tour/trailers/${data.slug}.html`; + + release.title = data.title; + release.description = data.description; + + release.date = new Date(data.releaseDateVideo); + + release.actors = data.models?.map((actor) => ({ + name: actor.modelName, + url: actor.slug && `${channel.origin}/tour/models/${actor.slug}.html`, + entryId: actor._id, + })); + + release.tags = data.categories?.map((category) => category.title); + + if (data.type?.toLowerCase() === 'bts') { + release.tags = release.tags.concat('bts'); + } + + release.poster = curateSources(data.thumb); + release.photos = [...data.photos?.map((photo) => curateSources(photo)) || []].filter(Boolean); + + if (data.thumbHover && data.thumbHover.baseName !== '849') { // placeholder image + release.photos = release.photos.concat([curateSources(data.thumbHover)]); + } + + const trailerType = data.trailer?.type === 'previewTrailer' + ? 'teaser' + : 'trailer'; + + release[trailerType] = data.trailer?.sources?.map((source) => ({ + src: source.path, + quality: source.quality || source.height || null, // only available on teaser + expectType: { + 'application/octet-stream': 'video/mp4', + }, + })); return release; } -function scrapeProfile({ query, el }, url, entity, _include) { +async function fetchLatest(channel, page = 1, { parameters }) { + // bonus-type scenes are third-party, don't include + const url = `${channel.origin.replace('www.', 'api.')}/v1/content-sets?types[]=Content&types[]=Tease&types[]=BTS&sort=latest&limit=27&offset=${(page - 1) * 27}`; + + const res = await unprint.get(url, { + interface: 'request', + headers: { + 'X-Site-Id': parameters.xSiteId, + }, + }); + + if (res.ok && res.data?.items) { + return res.data.items.map((item) => scrapeScene(item, channel)); + } + + return res.status; +} + +/* no entry ID, enable when two entry IDs are supported +async function fetchUpcoming(channel, _page, { parameters }) { + // unsure if site ID parameter is necessary when the header is present, but the site uses it + const url = `${channel.origin.replace('www.', 'api.')}/v1/content-sets/upcoming?siteId${parameters.xSiteId}`; + + const res = await unprint.get(url, { + interface: 'request', + headers: { + 'X-Site-Id': parameters.xSiteId, + }, + }); + + if (res.ok && res.data) { + return res.data.map((item) => scrapeScene(item, channel)); + } + + return res.status; +} +*/ + +async function fetchScene(sceneUrl, entity, _baseRelease, { parameters }) { + const slug = new URL(sceneUrl).pathname.match(/\/trailers\/(.*?)\.html/)[1]; + const url = `${entity.origin.replace('www.', 'api.')}/v1/content-sets/${slug}`; + + const res = await unprint.get(url, { + interface: 'request', + headers: { + 'X-Site-Id': parameters.xSiteId, + }, + }); + + if (res.ok && res.data) { + return scrapeScene(res.data, entity); + } + + return res.status; +} + +function scrapeProfile({ query }, url) { const profile = { url }; - const bio = Array.from(Array.from(query.html('.stats script').matchAll(/totalStats\.push\(.*\)/g))).reduce((acc, match) => { - const { query: statQuery } = qu.extract(match[0].slice(match[0].indexOf('`'), match[0].lastIndexOf('`'))); + const bio = Object.fromEntries(query.all('ul[class*="HeroModel_list"] li').map((bioEl) => [ + slugify(unprint.query.content(bioEl, 'p:first-child'), '_'), + unprint.query.content(bioEl, 'p:last-child'), + ])); - return { - ...acc, - [slugify(statQuery.content('span'), '_')]: statQuery.text('p'), - }; - }, {}); - - profile.description = query.cnt('.about p'); + profile.description = query.content('p[class*="HeroModel_text"]'); profile.birthPlace = bio.place_of_birth; - profile.dateOfBirth = qu.extractDate(bio.date_of_birth, ['MMMM DD, YYYY', 'MM/DD/YYYY']); + profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, ['MMMM DD, YYYY', 'MM/DD/YYYY']); profile.ethnicity = bio.ethnicity; profile.measurements = bio.measurements; - profile.height = feetInchesToCm(bio.height); - profile.weight = lbsToKg(bio.weight); + profile.height = convert(bio.height, 'cm'); + profile.weight = convert(bio.weight, 'lb', 'kg'); profile.hairColor = bio.hair_color; profile.eyes = bio.eye_color; - profile.feet = femaleFeetUsToEu(bio.feet_size); + profile.foot = unprint.extractNumber(bio.feet_size, { match: /\((\d+(.\d+)?) eur\)/i, matchIndex: 1 }); if (/(natural)|(real)/i.test(bio.boob_type || bio.tits_type)) { profile.naturalBoobs = true; @@ -105,51 +162,55 @@ function scrapeProfile({ query, el }, url, entity, _include) { profile.piercings = profile.hasPiercings ? bio.piercings : null; } - profile.avatar = query.img('.picture img'); - profile.scenes = scrapeAll(qu.initAll(el, '.scene, .latest-scene .item-episode'), entity); - - console.log(bio); - console.log(profile); + profile.avatar = query.sourceSet('picture[class*="modelImage"] img'); return profile; } -async function fetchLatest(channel, page = 1) { - const url = `${channel.url}/tour/categories/movies/${page}/latest/`; - const res = await qu.getAll(url, '.main .item-video'); +async function getActorUrl(actor, { entity, parameters }) { + if (actor.url) { + return actor.url; + } + + const url = `${entity.origin.replace('www.', 'api.')}/v1/models?limit=12&offset=0&query=&sort=latest&modelsNames[]=${slugify(actor.name, '+')}`; + + const res = await unprint.get(url, { + interface: 'request', + headers: { + 'X-Site-Id': parameters.xSiteId, + }, + }); if (res.ok) { - return scrapeAll(res.items, channel); + const actorSlug = res.data.items?.find((item) => slugify(item.modelName) === actor.slug)?.slug; + + if (actorSlug) { + return `${entity.origin}/tour/models/${actorSlug}.html`; + } } - return res.status; + return null; } -async function fetchProfile({ name: actorName }, entity, include) { - const searchRes = await qu.get(`${entity.url}/tour/search.php?model_name=${actorName}`); - - if (!searchRes.ok) { - return searchRes.status; - } - - const actorUrl = searchRes.item.query.url(`.item-portrait a[title="${actorName}"]`); +async function fetchProfile(actor, context) { + const actorUrl = await getActorUrl(actor, context); if (!actorUrl) { return null; } - const actorRes = await qu.get(actorUrl); + const res = await unprint.get(actorUrl); - if (actorRes.ok) { - return scrapeProfile(actorRes.item, actorUrl, entity, include); + if (res.ok) { + return scrapeProfile(res.context, actorUrl, context); } - return actorRes.status; + return res.status; } module.exports = { fetchLatest, + // fetchUpcoming, fetchProfile, - scrapeScene, - deprecated: true, + fetchScene, }; diff --git a/src/scrapers/naughtyamerica.js b/src/scrapers/naughtyamerica.js index c9210b95..ab7856e4 100755 --- a/src/scrapers/naughtyamerica.js +++ b/src/scrapers/naughtyamerica.js @@ -52,7 +52,7 @@ async function fetchLatest(channel, page = 1) { const res = await unprint.browserRequest(url, { selectAll: '.site-list .scene-item, .panel-body', async control(ctx) { - await ctx.locator('.site-list').hover({ trial: true, timeout: 10000 }); // wait for overview to initialize + await ctx.locator('.site-list, .grid-three').hover({ trial: true, timeout: 10000 }); // wait for overview to initialize }, }); @@ -82,7 +82,7 @@ function scrapeScene({ query }, { url }) { name: unprint.query.content(actorEl), url: stripQuery(unprint.query.url(actorEl, null)), })) - : query.content('.grey-performers')?.split(',').map((actorName) => actorName.trim()); + : query.content('.grey-performers')?.split(',').map((actorName) => actorName.trim()); // not all performers are linked release.poster = [ ...(query.sourceSet('.play-trailer source[data-srcset*="scenes/"][type="image/jpeg"]', 'data-srcset') || []), @@ -119,13 +119,15 @@ function scrapeScene({ query }, { url }) { query.exists('//a[contains(@class, "label-hd") and contains(text(), "HD")]') && 720, ].filter(Boolean); + console.log(release); + return release; } async function fetchScene(url, _channel) { const res = await unprint.browserRequest(url, { async control(ctx) { - await ctx.locator('.scene-info').hover({ trial: true, timeout: 30000 }); // wait for trailer to initialize + await ctx.locator('.scene-info, .scene').hover({ trial: true, timeout: 30000 }); // wait for trailer to initialize }, }); diff --git a/src/utils/convert.js b/src/utils/convert.js index 55ad9a22..5bafc8d6 100755 --- a/src/utils/convert.js +++ b/src/utils/convert.js @@ -61,7 +61,7 @@ function kgToLbs(kgs) { function curateConvertInput(string) { if (/['’]|(fe*o*t)/.test(string)) { - const result = string.match(/(\d+).*(\d+)/); + const result = string.match(/(\d+).*?(\d+)/); if (result) { return `${result[1]}ft ${result[2]}in`; diff --git a/tests/profiles.js b/tests/profiles.js index c27b689a..19de1f3e 100644 --- a/tests/profiles.js +++ b/tests/profiles.js @@ -186,6 +186,14 @@ const actors = [ // kink { entity: 'kink', name: 'Remy LaCroix', fields: ['avatar', 'description', 'hairColor', 'naturalBoobs', 'ethnicity'] }, { entity: 'kinkmen', name: 'Christian Wilde', fields: ['avatar', 'description', 'hairColor', 'hasTattoos', 'isCircumcised'] }, + // love her films + { entity: 'loveherboobs', name: 'Melody Marks', fields: ['avatar', 'description', 'dateOfBirth', 'ethnicity', 'measurements', 'height', 'weight', 'hairColor', 'eyes', 'foot'] }, + { entity: 'loveherfeet', name: 'Canela Skin', fields: ['avatar', 'description', 'dateOfBirth', 'ethnicity', 'measurements', 'height', 'weight', 'hairColor', 'eyes', 'foot'] }, + { entity: 'shelovesblack', name: 'Nicole Aria', fields: ['avatar', 'description', 'dateOfBirth', 'ethnicity', 'measurements', 'height', 'weight', 'hairColor', 'eyes', 'foot'] }, + { entity: 'loveherfilms', name: 'Maddy May', fields: ['avatar', 'description', 'dateOfBirth', 'ethnicity', 'measurements', 'height', 'weight', 'hairColor', 'eyes', 'foot'] }, + // naughty america + { entity: 'naughtyamerica', name: 'Nicole Aniston', fields: ['avatar', 'description'] }, + { entity: 'tonightsgirlfriend', name: 'Abella Danger', fields: ['avatar'] }, // etc. { entity: 'analvids', name: 'Veronica Leal', fields: ['avatar', 'gender', 'birthCountry', 'nationality', 'age', 'aliases', 'nationality'] }, { entity: 'bangbros', name: 'Kira Perez', fields: ['avatar', 'gender', 'ethnicity', 'hairColor'] }, @@ -205,8 +213,6 @@ const actors = [ { entity: 'pierrewoodman', name: 'Abby Lee Brazil', fields: ['avatar', 'nationality'] }, { entity: 'dorcelclub', name: 'Clea Gaultier', fields: ['avatar'] }, { entity: 'hitzefrei', name: 'Jolee Love', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hair', 'description'] }, - { entity: 'naughtyamerica', name: 'Nicole Aniston', fields: ['avatar', 'description'] }, - { entity: 'tonightsgirlfriend', name: 'Abella Danger', fields: ['avatar'] }, { entity: 'mariskax', name: 'Honey Demon', fields: ['avatar', 'gender', 'dateOfBirth', 'placeOfBirth', 'measurements', 'height', 'weight', 'hairColor', 'eyes'] }, { entity: 'pornhub', name: 'Lexi Luna', fields: ['avatar', 'gender', 'ethnicity', 'description', 'birthPlace', 'measurements', 'naturalBoobs', 'height', 'weight', 'hairColor', 'hasPiercings', 'hasTattoos'] }, { entity: 'fullpornnetwork', name: 'Kenzie Reeves', fields: ['avatar', 'description'] },