Added deciated ExploitedX scraper.
This commit is contained in:
131
src/scrapers/exploitedx.js
Executable file
131
src/scrapers/exploitedx.js
Executable file
@@ -0,0 +1,131 @@
|
||||
'use strict';
|
||||
|
||||
const unprint = require('unprint');
|
||||
|
||||
const slugify = require('../utils/slugify');
|
||||
const { convert } = require('../utils/convert');
|
||||
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('.img-div a[href*="/trailers"], .content-div h4 a[href*="/trailers"]'); // empty anchor in markup for some reason
|
||||
release.entryId = new URL(release.url).pathname.match(/\/trailers\/(.*)\.html/)[1].toLowerCase();
|
||||
|
||||
release.title = query.content('.content-div h4 a[href]');
|
||||
|
||||
release.date = query.date('.more-info-div', 'MMM DD, YYYY');
|
||||
release.duration = query.duration('.more-info-div');
|
||||
|
||||
release.photoCount = query.number('.more-info-div', { match: /(\d+) photos/i, matchIndex: 1 })
|
||||
|| query.number('//i[contains(@class, "fa-camera")]//following-sibling::text()[1]');
|
||||
|
||||
const poster = query.img('.video_placeholder') || query.poster();
|
||||
|
||||
if (poster) {
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster.replace('-1x', '-3x'),
|
||||
poster,
|
||||
poster.replace('-1x', '-4x'), // too big, only use as fallback
|
||||
];
|
||||
}
|
||||
|
||||
release.teaser = query.video();
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, { url }) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/trailers\/(.*)\.html/)[1].toLowerCase();
|
||||
|
||||
// ExGoGiGirls deviates most from the other sites
|
||||
release.title = query.content('.video-player .section-title, #scene-info h1') || query.content('.bio-article .section-title'); // model-name class not on all sites
|
||||
release.description = (query.content('.descriptionFull') || query.content('.description'))?.replace(/(read more)|(read less)/i, '').trim(); // querying text nodes breaks a lot of descriptions
|
||||
|
||||
release.date = query.date('//*[strong[contains(text(), "Released")]]', 'MMMM D, YYYY');
|
||||
release.duration = query.duration('//*[strong[contains(text(), "Runtime")]]');
|
||||
release.photoCount = query.number('//*[strong[contains(text(), "Runtime")]]', { match: /(\d+) photos/i, matchIndex: 1 });
|
||||
|
||||
release.actors = query.all('.models-list-thumbs li, [id="model bio"] .card').map((actorEl) => { // not all actors have links
|
||||
const actorUrl = unprint.query.url(actorEl);
|
||||
|
||||
return {
|
||||
name: unprint.query.content(actorEl, 'span, .model-name'),
|
||||
url: actorUrl,
|
||||
entryId: actorUrl && new URL(actorUrl).pathname.match(/\/models\/(.*)\.html/)?.[1].toLowerCase(),
|
||||
avatar: [
|
||||
unprint.query.img(actorEl, 'img', { attribute: 'src0_2x' }),
|
||||
unprint.query.img(actorEl, 'img', { attribute: 'src0_1x' }),
|
||||
unprint.query.img(actorEl, 'img', { attribute: 'src0_3x' }), // too big
|
||||
],
|
||||
};
|
||||
});
|
||||
|
||||
release.tags = query.contents('.tags a[href]');
|
||||
|
||||
release.poster = query.img('.update_thumb', { attribute: 'src0_1x' });
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, _entity) {
|
||||
const profile = {};
|
||||
|
||||
const bio = Object.fromEntries(query.all('.detail-div p').map((detailEl) => [
|
||||
slugify(unprint.query.content(detailEl, 'strong'), '_'),
|
||||
unprint.query.text(detailEl),
|
||||
]));
|
||||
|
||||
profile.age = Number(bio.age) || null;
|
||||
profile.height = convert(bio.height, 'cm');
|
||||
profile.measurements = bio.measurements;
|
||||
|
||||
profile.description = [
|
||||
bio.favorite_position && `Favorite position: ${bio.favorite_position}`,
|
||||
bio.likes && `Likes: ${bio.likes}`,
|
||||
].filter(Boolean).join('\n');
|
||||
|
||||
profile.avatar = [
|
||||
query.img('.model_bio_thumb', { attribute: 'src0_2x' }),
|
||||
query.img('.model_bio_thumb', { attribute: 'src0_1x' }),
|
||||
query.img('.model_bio_thumb', { attribute: 'src0_3x' }), // too big
|
||||
];
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}/categories/movies_${page}_d.html`;
|
||||
const res = await unprint.get(url, { selectAll: '.main-article .item-update' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile({ url }, entity) {
|
||||
if (!url) {
|
||||
// ExploitedX has loads of performers with the same name, don't search for the name, only use known URLs
|
||||
return null;
|
||||
}
|
||||
|
||||
const res = await unprint.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.context, entity);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
scrapeScene,
|
||||
};
|
||||
Reference in New Issue
Block a user