Switched to tabs. Adding missing actor entries when scraping actors, with batch ID.
This commit is contained in:
@@ -4,93 +4,93 @@ const { get, geta, ctxa } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map(({ el, qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ el, qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = el.dataset.setid || qu.q('.update_thumb', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
||||
release.url = qu.url('.title');
|
||||
release.entryId = el.dataset.setid || qu.q('.update_thumb', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
||||
release.url = qu.url('.title');
|
||||
|
||||
release.title = qu.q('.title', true);
|
||||
release.description = qu.q('.title', 'title');
|
||||
release.title = qu.q('.title', true);
|
||||
release.description = qu.q('.title', 'title');
|
||||
|
||||
release.date = qu.date('.video-data > span:last-child', 'YYYY-MM-DD');
|
||||
release.duration = qu.dur('.video-data > span');
|
||||
release.date = qu.date('.video-data > span:last-child', 'YYYY-MM-DD');
|
||||
release.duration = qu.dur('.video-data > span');
|
||||
|
||||
release.actors = qu.all('.update_models a', true);
|
||||
release.actors = qu.all('.update_models a', true);
|
||||
|
||||
const poster = qu.q('.update_thumb', 'src0_1x');
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
const poster = qu.q('.update_thumb', 'src0_1x');
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ q, qa, qd, qtx }, url, _site) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
release.entryId = q('#image_parent img', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
||||
release.entryId = q('#image_parent img', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
||||
|
||||
release.title = q('.trailer_title', true);
|
||||
release.description = qtx('.text p');
|
||||
release.date = qd('span[data-dateadded]', 'YYYY-MM-DD', null, 'data-dateadded');
|
||||
release.title = q('.trailer_title', true);
|
||||
release.description = qtx('.text p');
|
||||
release.date = qd('span[data-dateadded]', 'YYYY-MM-DD', null, 'data-dateadded');
|
||||
|
||||
release.actors = qa('.update_models a', true);
|
||||
release.tags = qa('.video-info a[href*="/categories"]', true);
|
||||
release.actors = qa('.update_models a', true);
|
||||
release.tags = qa('.video-info a[href*="/categories"]', true);
|
||||
|
||||
const poster = q('#image_parent img', 'src0_1x');
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
const poster = q('#image_parent img', 'src0_1x');
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ el, q, qtx }) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const description = qtx('.model-bio');
|
||||
if (description) profile.description = description;
|
||||
const description = qtx('.model-bio');
|
||||
if (description) profile.description = description;
|
||||
|
||||
profile.avatar = [
|
||||
q('.model-image img', 'src0_2x'),
|
||||
q('.model-image img', 'src0_1x'),
|
||||
];
|
||||
profile.avatar = [
|
||||
q('.model-image img', 'src0_2x'),
|
||||
q('.model-image img', 'src0_1x'),
|
||||
];
|
||||
|
||||
profile.releases = scrapeAll(ctxa(el, '.update'));
|
||||
profile.releases = scrapeAll(ctxa(el, '.update'));
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/categories/movies_${page}_d.html`;
|
||||
const res = await geta(url, '.latest-updates .update');
|
||||
const url = `${site.url}/categories/movies_${page}_d.html`;
|
||||
const res = await geta(url, '.latest-updates .update');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await get(url, '.content-wrapper');
|
||||
const res = await get(url, '.content-wrapper');
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug) {
|
||||
const actorSlug = slugify(actorName, '');
|
||||
const url = scraperSlug === 'povperverts'
|
||||
? `https://povperverts.net/models/${actorSlug}.html`
|
||||
: `https://${scraperSlug}.com/models/${actorSlug}.html`;
|
||||
const actorSlug = slugify(actorName, '');
|
||||
const url = scraperSlug === 'povperverts'
|
||||
? `https://povperverts.net/models/${actorSlug}.html`
|
||||
: `https://${scraperSlug}.com/models/${actorSlug}.html`;
|
||||
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeProfile(res.item, actorName) : res.status;
|
||||
return res.ok ? scrapeProfile(res.item, actorName) : res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user