Added optional sequential scraping and acc release injection. Added Hush Pass and Interracial Pass logos.

This commit is contained in:
2020-03-05 23:01:03 +01:00
parent 6719d805d3
commit 3889faee26
4 changed files with 88 additions and 60 deletions

View File

@@ -6,6 +6,14 @@ const knex = require('../knex');
const { get, geta, fd } = require('../utils/q');
const slugify = require('../utils/slugify');
async function getChannelRegExp(site) {
if (!['hushpass', 'interracialpass'].includes(site.network.slug)) return null;
const sites = await knex('sites').where('network_id', site.network.id);
return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i');
}
function deriveEntryId(release) {
return `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
}
@@ -54,7 +62,7 @@ function scrapeLatest(scenes, site) {
});
}
function scrapeLatestT1(scenes, site) {
function scrapeLatestT1(scenes, site, accSiteReleases) {
return scenes.map(({ q, qi, qd, ql, qu }) => {
const release = {};
@@ -79,8 +87,13 @@ function scrapeLatestT1(scenes, site) {
// release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
if (site.parameters?.accFilter && accSiteReleases?.map(accRelease => accRelease.entryId).includes(release.entryId)) {
// filter out releases that were already scraped from a categorized site
return null;
}
return release;
});
}).filter(Boolean);
}
function scrapeLatestTour(scenes) {
@@ -199,15 +212,7 @@ function scrapeSceneTour({ html, q, qd, qa, qis }, site, url) {
return release;
}
async function getChannelRegExp(site) {
if (!['hushpass', 'interracialpass'].includes(site.network.slug)) return null;
const sites = await knex('sites').where('network_id', site.network.id);
return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i');
}
async function fetchLatest(site, page = 1) {
async function fetchLatest(site, page = 1, _beforeFetchLatest, accSiteReleases) {
const url = (site.parameters?.latest && util.format(site.parameters.latest, page))
|| (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`)
|| `${site.url}/categories/movies_${page}_d.html`;
@@ -215,10 +220,10 @@ async function fetchLatest(site, page = 1) {
const qLatest = await geta(url, '.modelfeature, .item-video, .updateItem');
if (!qLatest) return null;
if (site.parameters?.t1) return scrapeLatestT1(qLatest, site);
if (site.parameters?.tour) return scrapeLatestTour(qLatest, site);
if (site.parameters?.t1) return scrapeLatestT1(qLatest, site, accSiteReleases);
if (site.parameters?.tour) return scrapeLatestTour(qLatest, site, accSiteReleases);
return scrapeLatest(qLatest, site);
return scrapeLatest(qLatest, site, accSiteReleases);
}
async function fetchScene(url, site, baseRelease, beforeFetchLatest) {