Allowing scrapers to return raw tags and site URLs or slugs, to gradually remove site and tag fetching from individual scrapers. Added media and deep fetchin support to Perv City scraper.

This commit is contained in:
2019-12-05 01:26:22 +01:00
parent 7840af2843
commit 71cb85c3e1
8 changed files with 182 additions and 54 deletions

View File

@@ -12,6 +12,7 @@ const {
storePhotos,
storeTrailer,
} = require('./media');
const { fetchSites, findSiteByUrl } = require('./sites');
async function curateRelease(release) {
const [actors, tags, media] = await Promise.all([
@@ -91,8 +92,23 @@ function curateReleases(releases) {
return Promise.all(releases.map(async release => curateRelease(release)));
}
function curateScrapedRelease(release) {
return {
async function getChannelSite(release) {
try {
const site = await findSiteByUrl(release.channel);
return site || null;
} catch (error) {
const [site] = await fetchSites({
name: release.channel,
slug: release.channel,
});
return site || null;
}
}
async function curateScrapedRelease(release) {
const curatedRelease = {
site_id: release.site.id,
studio_id: release.studio ? release.studio.id : null,
shoot_id: release.shootId || null,
@@ -108,6 +124,17 @@ function curateScrapedRelease(release) {
rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
deep: Boolean(argv.deep && release.url && !release.upcoming),
};
if (release.site.isFallback && release.channel) {
const site = await getChannelSite(release);
if (site) {
curatedRelease.site_id = site.id;
return curatedRelease;
}
}
return curatedRelease;
}
function commonQuery(queryBuilder, {
@@ -138,7 +165,9 @@ function commonQuery(queryBuilder, {
.andWhereRaw('tags_associated.release_id = releases.id');
})
.andWhere('date', '>', after)
.orWhere('releases.created_at', '>', after)
.andWhere('date', '<=', before)
.orWhere('releases.created_at', '<=', before)
.orderBy([{ column: 'date', order: 'desc' }, { column: 'created_at', order: 'desc' }])
.limit(limit);
}
@@ -206,7 +235,7 @@ async function storeReleaseAssets(release, releaseId) {
async function storeRelease(release) {
const existingRelease = await knex('releases').where('entry_id', release.entryId).first();
const curatedRelease = curateScrapedRelease(release);
const curatedRelease = await curateScrapedRelease(release);
if (existingRelease && !argv.redownload) {
return existingRelease.id;
@@ -256,6 +285,8 @@ async function storeReleases(releases) {
});
const actors = storedReleases.reduce((acc, release) => {
if (!release.actors) return acc;
release.actors.forEach((actor) => {
const trimmedActor = actor.trim();
@@ -274,6 +305,8 @@ async function storeReleases(releases) {
associateActors(actors, storedReleases),
Promise.all(storedReleases.map(async release => storeReleaseAssets(release, release.id))),
]);
return storedReleases;
}
module.exports = {