Switched to tabs. Adding missing actor entries when scraping actors, with batch ID.

This commit is contained in:
2020-05-14 04:26:05 +02:00
parent f1eb29c713
commit 11eb66f834
178 changed files with 16594 additions and 16929 deletions

View File

@@ -3,83 +3,83 @@
const { get, initAll } = require('../utils/qu');
function scrapeLatest(scenes, dates, site) {
return scenes.map(({ qu }, index) => {
const release = {};
return scenes.map(({ qu }, index) => {
const release = {};
const path = qu.url('a');
release.url = `${site.url}/visitors/${path}`;
release.entryId = path.match(/videos\/([a-zA-Z0-9]+)(?:_hd)?_trailer/)?.[1];
const path = qu.url('a');
release.url = `${site.url}/visitors/${path}`;
release.entryId = path.match(/videos\/([a-zA-Z0-9]+)(?:_hd)?_trailer/)?.[1];
if (dates && dates[index]) {
release.date = dates[index].qu.date(null, 'MM/DD/YYYY');
}
if (dates && dates[index]) {
release.date = dates[index].qu.date(null, 'MM/DD/YYYY');
}
release.description = qu.q('tbody tr:nth-child(3) font', true);
release.description = qu.q('tbody tr:nth-child(3) font', true);
const infoLine = qu.q('font[color="#663366"]', true);
if (infoLine) release.duration = Number(infoLine.match(/(\d+) min/)[1]) * 60;
const infoLine = qu.q('font[color="#663366"]', true);
if (infoLine) release.duration = Number(infoLine.match(/(\d+) min/)[1]) * 60;
const poster = qu.img('img[src*="photos/"][width="400"]');
release.poster = `${site.url}/visitors/${poster}`;
release.photos = qu.imgs('img[src*="photos/"]:not([width="400"])').map(source => `${site.url}/visitors/${source}`);
const poster = qu.img('img[src*="photos/"][width="400"]');
release.poster = `${site.url}/visitors/${poster}`;
release.photos = qu.imgs('img[src*="photos/"]:not([width="400"])').map(source => `${site.url}/visitors/${source}`);
return release;
});
return release;
});
}
function scrapeScene({ qu }, url, site) {
const release = { url };
const release = { url };
const { pathname } = new URL(url);
release.entryId = pathname.match(/videos\/(\w+)_hd_trailer/)[1];
const { pathname } = new URL(url);
release.entryId = pathname.match(/videos\/(\w+)_hd_trailer/)[1];
const actor = qu.q('font[color="#990033"] strong', true);
release.actors = [actor];
const actor = qu.q('font[color="#990033"] strong', true);
release.actors = [actor];
const hdTrailer = qu.url('a[href*="hd_trailer.mp4"]');
const sdTrailer = qu.url('a[href*="hd_trailer_mobile.mp4"]');
const hdTrailer = qu.url('a[href*="hd_trailer.mp4"]');
const sdTrailer = qu.url('a[href*="hd_trailer_mobile.mp4"]');
release.trailer = [
{
src: `${site.url}/visitors/videos/${hdTrailer}`,
quality: 1080,
},
{
src: `${site.url}/visitors/videos/${sdTrailer}`,
quality: 270,
},
];
release.trailer = [
{
src: `${site.url}/visitors/videos/${hdTrailer}`,
quality: 1080,
},
{
src: `${site.url}/visitors/videos/${sdTrailer}`,
quality: 270,
},
];
return release;
return release;
}
async function fetchLatest(site, page = 1) {
const url = `https://jesseloadsmonsterfacials.com/visitors/tour_${page.toString().padStart(2, '0')}.html`;
const res = await get(url);
const url = `https://jesseloadsmonsterfacials.com/visitors/tour_${page.toString().padStart(2, '0')}.html`;
const res = await get(url);
if (!res.ok) {
return res.status;
}
if (!res.ok) {
return res.status;
}
const { el } = res.item;
const { el } = res.item;
const scenes = initAll(el, 'table[width="880"]');
const dates = initAll(el, 'font[color="#000000"] strong:not(:empty)');
const scenes = initAll(el, 'table[width="880"]');
const dates = initAll(el, 'font[color="#000000"] strong:not(:empty)');
return scrapeLatest(scenes, dates, site);
return scrapeLatest(scenes, dates, site);
}
async function fetchScene(url, site) {
const res = await get(url);
const res = await get(url);
if (res.ok) {
return scrapeScene(res.item, url, site);
}
if (res.ok) {
return scrapeScene(res.item, url, site);
}
return res.status;
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};