Switched to tabs. Adding missing actor entries when scraping actors, with batch ID.
This commit is contained in:
@@ -5,86 +5,86 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { origin } = new URL(site.url);
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { origin } = new URL(site.url);
|
||||
|
||||
const videos = Array.from(document.querySelectorAll('.video-releases-list')).slice(-1)[0];
|
||||
const videos = Array.from(document.querySelectorAll('.video-releases-list')).slice(-1)[0];
|
||||
|
||||
return Array.from(videos.querySelectorAll('.card'), (scene) => {
|
||||
const release = { site };
|
||||
return Array.from(videos.querySelectorAll('.card'), (scene) => {
|
||||
const release = { site };
|
||||
|
||||
release.url = `${origin}${scene.querySelector(':scope > a').href}`;
|
||||
release.entryId = scene.dataset.videoId;
|
||||
release.title = scene.querySelector('.card-title').textContent;
|
||||
release.date = moment.utc(scene.dataset.date, 'MMMM DD, YYYY').toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.actors a'), el => el.textContent);
|
||||
release.url = `${origin}${scene.querySelector(':scope > a').href}`;
|
||||
release.entryId = scene.dataset.videoId;
|
||||
release.title = scene.querySelector('.card-title').textContent;
|
||||
release.date = moment.utc(scene.dataset.date, 'MMMM DD, YYYY').toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.actors a'), el => el.textContent);
|
||||
|
||||
release.poster = `https:${scene.querySelector('.single-image').src}`;
|
||||
release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), el => `https:${el.dataset.src}`);
|
||||
release.poster = `https:${scene.querySelector('.single-image').src}`;
|
||||
release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), el => `https:${el.dataset.src}`);
|
||||
|
||||
const trailerEl = scene.querySelector('source');
|
||||
if (trailerEl) release.trailer = { src: trailerEl.dataset.src };
|
||||
const trailerEl = scene.querySelector('source');
|
||||
if (trailerEl) release.trailer = { src: trailerEl.dataset.src };
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene(html, site, url) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { site };
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { site };
|
||||
|
||||
const scene = document.querySelector('#t2019-2col');
|
||||
const scene = document.querySelector('#t2019-2col');
|
||||
|
||||
release.url = url;
|
||||
release.title = scene.querySelector('.t2019-stitle').textContent.trim();
|
||||
release.description = scene.querySelector('#t2019-description').textContent.trim();
|
||||
release.actors = Array.from(scene.querySelectorAll('#t2019-models a'), el => el.textContent);
|
||||
release.url = url;
|
||||
release.title = scene.querySelector('.t2019-stitle').textContent.trim();
|
||||
release.description = scene.querySelector('#t2019-description').textContent.trim();
|
||||
release.actors = Array.from(scene.querySelectorAll('#t2019-models a'), el => el.textContent);
|
||||
|
||||
const durationEls = Array.from(scene.querySelectorAll('#t2019-stime span'));
|
||||
const durationEls = Array.from(scene.querySelectorAll('#t2019-stime span'));
|
||||
|
||||
if (durationEls.length > 1) {
|
||||
release.date = moment.utc(durationEls[0].textContent, 'MMMM DD, YYYY').toDate();
|
||||
release.duration = Number(durationEls[1].textContent.match(/\d+/)[0]) * 60;
|
||||
} else {
|
||||
release.duration = Number(durationEls[0].textContent.match(/\d+/)[0]) * 60;
|
||||
}
|
||||
if (durationEls.length > 1) {
|
||||
release.date = moment.utc(durationEls[0].textContent, 'MMMM DD, YYYY').toDate();
|
||||
release.duration = Number(durationEls[1].textContent.match(/\d+/)[0]) * 60;
|
||||
} else {
|
||||
release.duration = Number(durationEls[0].textContent.match(/\d+/)[0]) * 60;
|
||||
}
|
||||
|
||||
release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), el => `https:${el.src}`);
|
||||
release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), el => `https:${el.src}`);
|
||||
|
||||
const posterEl = scene.querySelector('#no-player-image');
|
||||
const videoEl = scene.querySelector('video');
|
||||
const posterEl = scene.querySelector('#no-player-image');
|
||||
const videoEl = scene.querySelector('video');
|
||||
|
||||
if (posterEl) release.poster = `https:${posterEl.src}`;
|
||||
else if (videoEl) release.poster = `https:${videoEl.poster}`;
|
||||
if (posterEl) release.poster = `https:${posterEl.src}`;
|
||||
else if (videoEl) release.poster = `https:${videoEl.poster}`;
|
||||
|
||||
const trailerEl = scene.querySelector('#t2019-video source');
|
||||
if (trailerEl) release.trailer = { src: trailerEl.src };
|
||||
const trailerEl = scene.querySelector('#t2019-video source');
|
||||
if (trailerEl) release.trailer = { src: trailerEl.src };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}?page=${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
const url = `${site.url}?page=${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), site, url);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), site, url);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user