Added Porn World to replace DDF scraper.
This commit is contained in:
113
src/scrapers/pornworld.js
Executable file
113
src/scrapers/pornworld.js
Executable file
@@ -0,0 +1,113 @@
|
||||
'use strict';
|
||||
|
||||
const unprint = require('unprint');
|
||||
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('.card-title a');
|
||||
release.entryId = new URL(release.url).pathname.match(/\/watch\/([\d-]+)/)[1];
|
||||
|
||||
release.title = query.content('.card-title a');
|
||||
|
||||
release.date = query.date('.release-date', 'YYYY MMMM, DD', { match: /\d{4} \w+, \d{1,2}/i });
|
||||
release.duration = query.duration('.video-duration');
|
||||
|
||||
release.actors = query.all('.starring a').map((actorEl) => ({
|
||||
name: unprint.query.content(actorEl),
|
||||
url: unprint.query.url(actorEl, null),
|
||||
}));
|
||||
|
||||
const poster = query.img('.thumbnail-pic .card-img');
|
||||
|
||||
if (poster) {
|
||||
const { origin, pathname } = new URL(poster);
|
||||
|
||||
release.poster = [
|
||||
`${origin}${pathname}`,
|
||||
poster,
|
||||
];
|
||||
}
|
||||
|
||||
release.trailer = query.video('.thumbnail-pic', { attribute: 'data-video-src' }); // actually the full trailer with audio, not a teaser
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, { url }) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/watch\/([\d-]+)/)[1];
|
||||
|
||||
release.title = query.content('.scene__title');
|
||||
release.description = query.text('//p[span[contains(text(), "Description:")]]');
|
||||
|
||||
release.date = query.date('//p[strong[contains(text(), "Publication date:")]]/span', 'YYYY, MMMM D', { match: /\d{4}, \w+ \d{1,2}/i });
|
||||
release.duration = query.duration('//p[i[contains(@class, "bi-clock-fill")]]');
|
||||
|
||||
release.actors = query.all('p a[href*="model/"]').map((actorEl) => ({
|
||||
name: unprint.query.content(actorEl),
|
||||
url: unprint.query.url(actorEl, null),
|
||||
}));
|
||||
|
||||
release.tags = query.contents('p a[href*="?tags"]');
|
||||
|
||||
release.poster = query.poster('.video-player');
|
||||
release.trailer = query.video('.video-player source');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }) {
|
||||
const profile = {};
|
||||
|
||||
profile.nationality = query.content('//h3[contains(text(), "Nationality:")]/span') || null;
|
||||
profile.age = query.number('//h3[contains(text(), "Age:")]/span');
|
||||
|
||||
profile.avatar = query.img();
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}/videos?page=${page}`;
|
||||
const res = await unprint.get(url, { selectAll: '.card.scene' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, entity) {
|
||||
const searchUrl = `${entity.url}/models?name=${actorName}&sort=popularity`;
|
||||
const searchRes = await unprint.get(searchUrl);
|
||||
|
||||
if (searchRes.ok) {
|
||||
const actorEl = searchRes.context.query.all('.pagination-items .model a').find((resultEl) => unprint.query.attribute(resultEl, null, 'title') === actorName);
|
||||
const actorUrl = unprint.query.url(actorEl, null);
|
||||
|
||||
if (actorUrl) {
|
||||
const res = await unprint.get(actorUrl, { select: '.model-detail-card' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.context, actorName, entity);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return searchRes.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
scrapeScene,
|
||||
};
|
||||
Reference in New Issue
Block a user