Added scene avatars and improved HTML fallback to Bang! scraper.

This commit is contained in:
DebaucheryLibrarian 2022-11-27 04:43:27 +01:00
parent 6ab73bfd74
commit 114d7bdff4
1 changed files with 28 additions and 15 deletions

View File

@ -34,6 +34,19 @@ function decodeId(id) {
.toString('hex'); .toString('hex');
} }
function getAvatarFallback(url) {
try {
const { origin, pathname } = new URL(url);
return [
`${origin}${pathname}`,
url,
];
} catch (error) {
return null;
}
}
function scrapeAll(scenes, entity) { function scrapeAll(scenes, entity) {
return scenes.map(({ query }) => { return scenes.map(({ query }) => {
const release = {}; const release = {};
@ -77,10 +90,19 @@ async function scrapeScene({ query }, { url, entity }) {
release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD'); release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD');
release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span'); release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span');
release.actors = data?.actor.map((actor) => ({ if (data) {
name: actor.name, release.actors = data.actor.map((actor) => ({
url: actor.url, name: actor.name,
})) || query.contents('.expanded a[href*="/pornstar"]'); url: actor.url,
avatar: getAvatarFallback(query.img(`.video-actors img[alt="${actor.name}"]`)),
}));
} else {
release.actors = query.elements('//div[contains(@class, "video-actors")]//a[img]').map((element) => ({
name: unprint.query.attribute(element, 'img', 'alt'),
url: unprint.query.url(element, null, { origin: entity.url }),
avatar: getAvatarFallback(unprint.query.img(element, 'img')),
}));
}
release.tags = query.contents('.expanded .genres'); release.tags = query.contents('.expanded .genres');
@ -114,7 +136,7 @@ async function fetchActorScenes(element, url, entity, page = 1, acc = []) {
} }
async function scrapeProfile({ query, element }, url, entity, include) { async function scrapeProfile({ query, element }, url, entity, include) {
const profile = {}; const profile = { url };
profile.dateOfBirth = query.date('//text()[contains(., "Born")]/following-sibling::span[contains(@class, "font-bold")][1]', 'MMMM D, YYYY'); profile.dateOfBirth = query.date('//text()[contains(., "Born")]/following-sibling::span[contains(@class, "font-bold")][1]', 'MMMM D, YYYY');
profile.birthPlace = query.content('//text()[contains(., "in")]/following-sibling::span[contains(@class, "font-bold")][1]'); profile.birthPlace = query.content('//text()[contains(., "in")]/following-sibling::span[contains(@class, "font-bold")][1]');
@ -124,16 +146,7 @@ async function scrapeProfile({ query, element }, url, entity, include) {
profile.hairColor = query.content('//text()[contains(., "Hair Color")]/following-sibling::span[contains(@class, "font-bold")][1]'); profile.hairColor = query.content('//text()[contains(., "Hair Color")]/following-sibling::span[contains(@class, "font-bold")][1]');
profile.eyes = query.content('//text()[contains(., "Eye Color")]/following-sibling::span[contains(@class, "font-bold")][1]'); profile.eyes = query.content('//text()[contains(., "Eye Color")]/following-sibling::span[contains(@class, "font-bold")][1]');
const avatar = query.img('img[alt*="profile"][src*="https://i.bang.com/pornstars/"]'); profile.avatar = getAvatarFallback(query.img('img[alt*="profile"][src*="https://i.bang.com/pornstars/"]'));
if (avatar) {
const { origin, pathname } = new URL(avatar);
profile.avatar = [
`${origin}${pathname}`, // full size
avatar,
];
}
if (include.scenes) { if (include.scenes) {
profile.scenes = await fetchActorScenes(element, url, entity); profile.scenes = await fetchActorScenes(element, url, entity);