Adapter Score scraper for Score Videos.

This commit is contained in:
ThePendulum 2020-02-03 00:39:43 +01:00
parent 6f5ba925c1
commit a45bebddac
4 changed files with 192 additions and 169 deletions

File diff suppressed because it is too large Load Diff

View File

@ -279,17 +279,13 @@ async function fetchTagReleases(queryObject, options = {}) {
function accumulateActors(releases) { function accumulateActors(releases) {
return releases.reduce((acc, release) => { return releases.reduce((acc, release) => {
if (!release.actors) return acc; if (!Array.isArray(release.actors)) return acc;
release.actors.forEach((actor) => { release.actors.forEach((actor) => {
const trimmedActor = actor.trim(); const actorName = actor.name ? actor.name.trim() : actor.trim();
if (acc[trimmedActor]) { if (!acc[actorName]) acc[actorName] = [];
acc[trimmedActor] = acc[trimmedActor].concat(release.id); acc[actorName].push(release.id);
return;
}
acc[trimmedActor] = [release.id];
}); });
return acc; return acc;

View File

@ -32,10 +32,9 @@ function scrapeAll(html) {
return exa(html, '.container .video').map(({ q, qa, qd, ql }) => { return exa(html, '.container .video').map(({ q, qa, qd, ql }) => {
const release = {}; const release = {};
const linkEl = q('a.i-title'); release.title = q('.title, .i-title', true);
release.title = linkEl.textContent.trim();
const linkEl = q('a');
const url = new URL(linkEl.href); const url = new URL(linkEl.href);
release.url = `${url.origin}${url.pathname}`; release.url = `${url.origin}${url.pathname}`;
@ -44,14 +43,18 @@ function scrapeAll(html) {
[release.entryId] = url.pathname.split('/').slice(-2); [release.entryId] = url.pathname.split('/').slice(-2);
release.date = qd('.i-date', 'MMM DD', /\w+ \d{1,2}$/); release.date = qd('.i-date', 'MMM DD', /\w+ \d{1,2}$/)
release.actors = qa('.i-model', true); || qd('.dt-box', 'MMM.DD YYYY');
release.actors = qa('.model, .i-model', true);
release.duration = ql('.i-amount'); release.duration = ql('.i-amount');
const posterEl = q('.item-img img'); const posterEl = q('.item-img img');
if (posterEl) { if (posterEl) {
release.poster = `https:${posterEl.src}`; release.poster = `https:${posterEl.src}`;
}
if (posterEl?.dataset.gifPreview) {
release.teaser = { release.teaser = {
src: `https:${posterEl.dataset.gifPreview}`, src: `https:${posterEl.dataset.gifPreview}`,
}; };
@ -62,15 +65,15 @@ function scrapeAll(html) {
} }
async function scrapeScene(html, url) { async function scrapeScene(html, url) {
const { q, qa, qd, ql, qu, qp, qt } = ex(html, '#videos-page'); const { q, qa, qtext, qd, ql, qu, qis, qp, qt } = ex(html, '#videos-page, #content');
const release = {}; const release = {};
[release.entryId] = new URL(url).pathname.split('/').slice(-2); [release.entryId] = new URL(url).pathname.split('/').slice(-2);
release.title = q('#breadcrumb-top + h1', true); release.title = q('h2.text-uppercase, h2.title', true);
release.description = q('.p-desc', true); release.description = qtext('.p-desc, .desc');
release.actors = qa('a[href*=models]', true); release.actors = qa('.value a[href*=models], .value a[href*=performer]', true);
release.tags = qa('a[href*=tag]', true); release.tags = qa('a[href*=tag]', true);
const dateEl = qa('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent)); const dateEl = qa('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent));
@ -80,8 +83,13 @@ async function scrapeScene(html, url) {
release.duration = ql(durationEl); release.duration = ql(durationEl);
const photosUrl = qu('a[href*=photos]'); const photosUrl = qu('a[href*=photos]');
if (photosUrl) {
release.photos = await fetchPhotos(photosUrl); release.photos = await fetchPhotos(photosUrl);
release.poster = qp('video'); // _800.jpg is larger than _xl.jpg in landscape release.poster = qp('video'); // _800.jpg is larger than _xl.jpg in landscape
} else {
release.photos = qis('img[src*=ThumbNails]');
}
const trailer = qt(); const trailer = qt();
release.trailer = [ release.trailer = [
@ -154,7 +162,8 @@ function scrapeProfile(html) {
} }
async function fetchLatest(site, page = 1) { async function fetchLatest(site, page = 1) {
const url = `${site.url}/big-boob-videos?page=${page}`; const latestPath = site.parameters?.path || '/big-boob-videos';
const url = `${site.url}${latestPath}?page=${page}`;
const res = await bhttp.get(url); const res = await bhttp.get(url);
if (res.statusCode === 200) { if (res.statusCode === 200) {

View File

@ -35,6 +35,19 @@ function qall(context, selector, attrArg, trim = true) {
return Array.from(context.querySelectorAll(selector)); return Array.from(context.querySelectorAll(selector));
} }
function qtext(context, selector, trim = true) {
const el = q(context, selector, false, trim);
if (!el) return null;
const text = Array.from(el.childNodes)
.filter(node => node.nodeName === '#text')
.map(node => (trim ? node.textContent : node.textContent.trim()))
.join(' ');
if (trim) return text.trim();
return text;
}
function qmeta(context, selector, attrArg = 'content', trim = true) { function qmeta(context, selector, attrArg = 'content', trim = true) {
return q(context, selector, attrArg, trim); return q(context, selector, attrArg, trim);
} }
@ -95,6 +108,8 @@ function qtrailer(context, selector = 'source', attr = 'src', protocol = 'https'
function qlength(context, selector, attr = 'textContent') { function qlength(context, selector, attr = 'textContent') {
const durationString = q(context, selector, attr); const durationString = q(context, selector, attr);
if (!durationString) return null;
const duration = durationString.match(/(\d+:)?\d+:\d+/); const duration = durationString.match(/(\d+:)?\d+:\d+/);
if (duration) { if (duration) {
@ -115,6 +130,7 @@ const funcs = {
qposter, qposter,
qlength, qlength,
qmeta, qmeta,
qtext,
qtrailer, qtrailer,
qurls, qurls,
qurl, qurl,
@ -126,6 +142,7 @@ const funcs = {
ql: qlength, ql: qlength,
qm: qmeta, qm: qmeta,
qt: qtrailer, qt: qtrailer,
qtx: qtext,
qu: qurl, qu: qurl,
qus: qurls, qus: qurls,
}; };