'use strict'; const unprint = require('unprint'); const slugify = require('../utils/slugify'); const { convert } = require('../utils/convert'); function scrapeAll(scenes) { return scenes.map(({ query }) => { const release = {}; release.url = query.url('.img-div a[href*="/trailers"], .content-div h4 a[href*="/trailers"]'); // empty anchor in markup for some reason release.entryId = new URL(release.url).pathname.match(/\/trailers\/(.*)\.html/)[1].toLowerCase(); release.title = query.content('.content-div h4 a[href]'); release.date = query.date('.more-info-div', 'MMM DD, YYYY'); release.duration = query.duration('.more-info-div'); release.photoCount = query.number('.more-info-div', { match: /(\d+) photos/i, matchIndex: 1 }) || query.number('//i[contains(@class, "fa-camera")]//following-sibling::text()[1]'); const poster = query.img('.video_placeholder') || query.poster(); if (poster) { release.poster = [ poster.replace('-1x', '-2x'), poster.replace('-1x', '-3x'), poster, poster.replace('-1x', '-4x'), // too big, only use as fallback ]; } release.teaser = query.video(); return release; }); } function scrapeScene({ query }, { url }) { const release = {}; release.entryId = new URL(url).pathname.match(/\/trailers\/(.*)\.html/)[1].toLowerCase(); // ExGoGiGirls deviates most from the other sites release.title = query.content('.video-player .section-title, #scene-info h1') || query.content('.bio-article .section-title'); // model-name class not on all sites release.description = (query.content('.descriptionFull') || query.content('.description'))?.replace(/(read more)|(read less)/i, '').trim(); // querying text nodes breaks a lot of descriptions release.date = query.date('//*[strong[contains(text(), "Released")]]', 'MMMM D, YYYY'); release.duration = query.duration('//*[strong[contains(text(), "Runtime")]]'); release.photoCount = query.number('//*[strong[contains(text(), "Runtime")]]', { match: /(\d+) photos/i, matchIndex: 1 }); release.actors = query.all('.models-list-thumbs li, [id="model bio"] .card').map((actorEl) => { // not all actors have links const actorUrl = unprint.query.url(actorEl); return { name: unprint.query.content(actorEl, 'span, .model-name'), url: actorUrl, entryId: actorUrl && new URL(actorUrl).pathname.match(/\/models\/(.*)\.html/)?.[1].toLowerCase(), avatar: [ unprint.query.img(actorEl, 'img', { attribute: 'src0_2x' }), unprint.query.img(actorEl, 'img', { attribute: 'src0_1x' }), unprint.query.img(actorEl, 'img', { attribute: 'src0_3x' }), // too big ], }; }); release.tags = query.contents('.tags a[href]'); release.poster = query.img('.update_thumb', { attribute: 'src0_1x' }); return release; } function scrapeProfile({ query }, _entity) { const profile = {}; const bio = Object.fromEntries(query.all('.detail-div p').map((detailEl) => [ slugify(unprint.query.content(detailEl, 'strong'), '_'), unprint.query.text(detailEl), ])); profile.age = Number(bio.age) || null; profile.height = convert(bio.height, 'cm'); profile.measurements = bio.measurements; profile.description = [ bio.favorite_position && `Favorite position: ${bio.favorite_position}`, bio.likes && `Likes: ${bio.likes}`, ].filter(Boolean).join('\n'); profile.avatar = [ query.img('.model_bio_thumb', { attribute: 'src0_2x' }), query.img('.model_bio_thumb', { attribute: 'src0_1x' }), query.img('.model_bio_thumb', { attribute: 'src0_3x' }), // too big ]; return profile; } async function fetchLatest(channel, page = 1) { const url = `${channel.url}/categories/movies_${page}_d.html`; const res = await unprint.get(url, { selectAll: '.main-article .item-update' }); if (res.ok) { return scrapeAll(res.context, channel); } return res.status; } async function fetchProfile({ url }, entity) { if (!url) { // ExploitedX has loads of performers with the same name, don't search for the name, only use known URLs return null; } const res = await unprint.get(url); if (res.ok) { return scrapeProfile(res.context, entity); } return res.status; } module.exports = { fetchLatest, fetchProfile, scrapeScene, };