Allowing scrapers to return raw tags and site URLs or slugs, to gradually remove site and tag fetching from individual scrapers. Added media and deep fetchin support to Perv City scraper.
This commit is contained in:
@@ -12,6 +12,7 @@ const {
|
||||
storePhotos,
|
||||
storeTrailer,
|
||||
} = require('./media');
|
||||
const { fetchSites, findSiteByUrl } = require('./sites');
|
||||
|
||||
async function curateRelease(release) {
|
||||
const [actors, tags, media] = await Promise.all([
|
||||
@@ -91,8 +92,23 @@ function curateReleases(releases) {
|
||||
return Promise.all(releases.map(async release => curateRelease(release)));
|
||||
}
|
||||
|
||||
function curateScrapedRelease(release) {
|
||||
return {
|
||||
async function getChannelSite(release) {
|
||||
try {
|
||||
const site = await findSiteByUrl(release.channel);
|
||||
|
||||
return site || null;
|
||||
} catch (error) {
|
||||
const [site] = await fetchSites({
|
||||
name: release.channel,
|
||||
slug: release.channel,
|
||||
});
|
||||
|
||||
return site || null;
|
||||
}
|
||||
}
|
||||
|
||||
async function curateScrapedRelease(release) {
|
||||
const curatedRelease = {
|
||||
site_id: release.site.id,
|
||||
studio_id: release.studio ? release.studio.id : null,
|
||||
shoot_id: release.shootId || null,
|
||||
@@ -108,6 +124,17 @@ function curateScrapedRelease(release) {
|
||||
rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
|
||||
deep: Boolean(argv.deep && release.url && !release.upcoming),
|
||||
};
|
||||
|
||||
if (release.site.isFallback && release.channel) {
|
||||
const site = await getChannelSite(release);
|
||||
|
||||
if (site) {
|
||||
curatedRelease.site_id = site.id;
|
||||
return curatedRelease;
|
||||
}
|
||||
}
|
||||
|
||||
return curatedRelease;
|
||||
}
|
||||
|
||||
function commonQuery(queryBuilder, {
|
||||
@@ -138,7 +165,9 @@ function commonQuery(queryBuilder, {
|
||||
.andWhereRaw('tags_associated.release_id = releases.id');
|
||||
})
|
||||
.andWhere('date', '>', after)
|
||||
.orWhere('releases.created_at', '>', after)
|
||||
.andWhere('date', '<=', before)
|
||||
.orWhere('releases.created_at', '<=', before)
|
||||
.orderBy([{ column: 'date', order: 'desc' }, { column: 'created_at', order: 'desc' }])
|
||||
.limit(limit);
|
||||
}
|
||||
@@ -206,7 +235,7 @@ async function storeReleaseAssets(release, releaseId) {
|
||||
|
||||
async function storeRelease(release) {
|
||||
const existingRelease = await knex('releases').where('entry_id', release.entryId).first();
|
||||
const curatedRelease = curateScrapedRelease(release);
|
||||
const curatedRelease = await curateScrapedRelease(release);
|
||||
|
||||
if (existingRelease && !argv.redownload) {
|
||||
return existingRelease.id;
|
||||
@@ -256,6 +285,8 @@ async function storeReleases(releases) {
|
||||
});
|
||||
|
||||
const actors = storedReleases.reduce((acc, release) => {
|
||||
if (!release.actors) return acc;
|
||||
|
||||
release.actors.forEach((actor) => {
|
||||
const trimmedActor = actor.trim();
|
||||
|
||||
@@ -274,6 +305,8 @@ async function storeReleases(releases) {
|
||||
associateActors(actors, storedReleases),
|
||||
Promise.all(storedReleases.map(async release => storeReleaseAssets(release, release.id))),
|
||||
]);
|
||||
|
||||
return storedReleases;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
Reference in New Issue
Block a user