From 114d7bdff4652e7578c7a79a01052bda2cb933df Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sun, 27 Nov 2022 04:43:27 +0100 Subject: [PATCH] Added scene avatars and improved HTML fallback to Bang! scraper. --- src/scrapers/bang.js | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/src/scrapers/bang.js b/src/scrapers/bang.js index 9c9387ed..eca616e2 100755 --- a/src/scrapers/bang.js +++ b/src/scrapers/bang.js @@ -34,6 +34,19 @@ function decodeId(id) { .toString('hex'); } +function getAvatarFallback(url) { + try { + const { origin, pathname } = new URL(url); + + return [ + `${origin}${pathname}`, + url, + ]; + } catch (error) { + return null; + } +} + function scrapeAll(scenes, entity) { return scenes.map(({ query }) => { const release = {}; @@ -77,10 +90,19 @@ async function scrapeScene({ query }, { url, entity }) { release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD'); release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span'); - release.actors = data?.actor.map((actor) => ({ - name: actor.name, - url: actor.url, - })) || query.contents('.expanded a[href*="/pornstar"]'); + if (data) { + release.actors = data.actor.map((actor) => ({ + name: actor.name, + url: actor.url, + avatar: getAvatarFallback(query.img(`.video-actors img[alt="${actor.name}"]`)), + })); + } else { + release.actors = query.elements('//div[contains(@class, "video-actors")]//a[img]').map((element) => ({ + name: unprint.query.attribute(element, 'img', 'alt'), + url: unprint.query.url(element, null, { origin: entity.url }), + avatar: getAvatarFallback(unprint.query.img(element, 'img')), + })); + } release.tags = query.contents('.expanded .genres'); @@ -114,7 +136,7 @@ async function fetchActorScenes(element, url, entity, page = 1, acc = []) { } async function scrapeProfile({ query, element }, url, entity, include) { - const profile = {}; + const profile = { url }; profile.dateOfBirth = query.date('//text()[contains(., "Born")]/following-sibling::span[contains(@class, "font-bold")][1]', 'MMMM D, YYYY'); profile.birthPlace = query.content('//text()[contains(., "in")]/following-sibling::span[contains(@class, "font-bold")][1]'); @@ -124,16 +146,7 @@ async function scrapeProfile({ query, element }, url, entity, include) { profile.hairColor = query.content('//text()[contains(., "Hair Color")]/following-sibling::span[contains(@class, "font-bold")][1]'); profile.eyes = query.content('//text()[contains(., "Eye Color")]/following-sibling::span[contains(@class, "font-bold")][1]'); - const avatar = query.img('img[alt*="profile"][src*="https://i.bang.com/pornstars/"]'); - - if (avatar) { - const { origin, pathname } = new URL(avatar); - - profile.avatar = [ - `${origin}${pathname}`, // full size - avatar, - ]; - } + profile.avatar = getAvatarFallback(query.img('img[alt*="profile"][src*="https://i.bang.com/pornstars/"]')); if (include.scenes) { profile.scenes = await fetchActorScenes(element, url, entity);