forked from DebaucheryLibrarian/traxxx
516 lines
18 KiB
JavaScript
516 lines
18 KiB
JavaScript
'use strict';
|
|
|
|
const config = require('config');
|
|
const Promise = require('bluebird');
|
|
const moment = require('moment');
|
|
|
|
const logger = require('./logger')(__filename);
|
|
const knex = require('./knex');
|
|
const argv = require('./argv');
|
|
const whereOr = require('./utils/where-or');
|
|
const { associateTags } = require('./tags');
|
|
const { associateActors, scrapeBasicActors } = require('./actors');
|
|
const {
|
|
pluckItems,
|
|
storeMedia,
|
|
associateMedia,
|
|
} = require('./media');
|
|
const { fetchSites, findSiteByUrl } = require('./sites');
|
|
const slugify = require('./utils/slugify');
|
|
const capitalize = require('./utils/capitalize');
|
|
|
|
function commonQuery(queryBuilder, {
|
|
filter = [],
|
|
after = new Date(0), // January 1970
|
|
before = new Date(2 ** 44), // May 2109
|
|
limit = 100,
|
|
}) {
|
|
const finalFilter = [].concat(filter); // ensure filter is array
|
|
|
|
queryBuilder
|
|
.leftJoin('sites', 'releases.site_id', 'sites.id')
|
|
.leftJoin('studios', 'releases.studio_id', 'studios.id')
|
|
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
|
.select(
|
|
'releases.*',
|
|
'sites.name as site_name', 'sites.slug as site_slug', 'sites.url as site_url', 'sites.network_id', 'sites.parameters as site_parameters',
|
|
'studios.name as studio_name', 'sites.slug as site_slug', 'studios.url as studio_url',
|
|
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description',
|
|
)
|
|
.whereNotExists((builder) => {
|
|
// apply tag filters
|
|
builder
|
|
.select('*')
|
|
.from('tags_associated')
|
|
.leftJoin('tags', 'tags_associated.tag_id', 'tags.id')
|
|
.whereIn('tags.slug', finalFilter)
|
|
.where('tags_associated.domain', 'releases')
|
|
.whereRaw('tags_associated.target_id = releases.id');
|
|
})
|
|
.andWhere('releases.date', '>', after)
|
|
.andWhere('releases.date', '<=', before)
|
|
.orderBy([{ column: 'date', order: 'desc' }, { column: 'created_at', order: 'desc' }])
|
|
.limit(limit);
|
|
}
|
|
|
|
async function curateRelease(release) {
|
|
const [actors, tags, media] = await Promise.all([
|
|
knex('actors_associated')
|
|
.select(
|
|
'actors.id', 'actors.name', 'actors.gender', 'actors.slug', 'actors.birthdate',
|
|
'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'birth_countries.alias as birth_country_alias',
|
|
'media.thumbnail as avatar',
|
|
)
|
|
.where({ release_id: release.id })
|
|
.leftJoin('actors', 'actors.id', 'actors_associated.actor_id')
|
|
.leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2')
|
|
.leftJoin('media', (builder) => {
|
|
builder
|
|
.on('media.target_id', 'actors.id')
|
|
.andOnVal('media.domain', 'actors')
|
|
.andOnVal('media.index', '0');
|
|
})
|
|
.orderBy('actors.gender'),
|
|
knex('tags_associated')
|
|
.select('tags.name', 'tags.slug')
|
|
.where({
|
|
domain: 'releases',
|
|
target_id: release.id,
|
|
})
|
|
.leftJoin('tags', 'tags.id', 'tags_associated.tag_id')
|
|
.orderBy('tags.priority', 'desc'),
|
|
knex('media')
|
|
.where({
|
|
target_id: release.id,
|
|
domain: 'releases',
|
|
})
|
|
.orderBy(['role', 'index']),
|
|
]);
|
|
|
|
const curatedRelease = {
|
|
id: release.id,
|
|
type: release.type,
|
|
title: release.title,
|
|
date: release.date,
|
|
dateAdded: release.created_at,
|
|
description: release.description,
|
|
url: release.url,
|
|
shootId: release.shoot_id,
|
|
entryId: release.entry_id,
|
|
actors: actors.map(actor => ({
|
|
id: actor.id,
|
|
slug: actor.slug,
|
|
name: actor.name,
|
|
gender: actor.gender,
|
|
birthdate: actor.birthdate,
|
|
age: moment().diff(actor.birthdate, 'years'),
|
|
ageThen: moment(release.date).diff(actor.birthdate, 'years'),
|
|
avatar: actor.avatar,
|
|
origin: actor.birth_country_alpha2
|
|
? {
|
|
country: {
|
|
name: actor.birth_country_alias,
|
|
alpha2: actor.birth_country_alpha2,
|
|
},
|
|
}
|
|
: null,
|
|
})),
|
|
director: release.director,
|
|
tags,
|
|
duration: release.duration,
|
|
photos: media.filter(item => item.role === 'photo'),
|
|
poster: media.filter(item => item.role === 'poster')[0],
|
|
covers: media.filter(item => item.role === 'cover'),
|
|
trailer: media.filter(item => item.role === 'trailer')[0],
|
|
site: {
|
|
id: release.site_id,
|
|
name: release.site_name,
|
|
independent: !!release.site_parameters?.independent,
|
|
slug: release.site_slug,
|
|
url: release.site_url,
|
|
},
|
|
studio: release.studio_id
|
|
? {
|
|
id: release.studio_id,
|
|
name: release.studio_name,
|
|
slug: release.studio_slug,
|
|
url: release.studio_url,
|
|
}
|
|
: null,
|
|
network: {
|
|
id: release.network_id,
|
|
name: release.network_name,
|
|
description: release.network_description,
|
|
slug: release.network_slug,
|
|
url: release.network_url,
|
|
},
|
|
};
|
|
|
|
return curatedRelease;
|
|
}
|
|
|
|
function curateReleases(releases) {
|
|
return Promise.all(releases.map(async release => curateRelease(release)));
|
|
}
|
|
|
|
async function attachChannelSite(release) {
|
|
if (!release.site?.isFallback && !release.channel?.force) {
|
|
return release;
|
|
}
|
|
|
|
if (!release.channel) {
|
|
throw new Error(`Unable to derive channel site from generic URL: ${release.url}`);
|
|
}
|
|
|
|
const [site] = await fetchSites({
|
|
name: release.channel.name || release.channel,
|
|
slug: release.channel.slug || release.channel,
|
|
});
|
|
|
|
if (site) {
|
|
return {
|
|
...release,
|
|
site,
|
|
};
|
|
}
|
|
|
|
try {
|
|
const urlSite = await findSiteByUrl(release.channel.url || release.channel);
|
|
|
|
return {
|
|
...release,
|
|
site: urlSite,
|
|
};
|
|
} catch (error) {
|
|
throw new Error(`Unable to derive channel site from generic URL: ${release.url}`);
|
|
}
|
|
}
|
|
|
|
async function attachStudio(release) {
|
|
if (!release.studio) {
|
|
return release;
|
|
}
|
|
|
|
const studio = await knex('studios')
|
|
.where('name', release.studio)
|
|
.orWhere('slug', release.studio)
|
|
.orWhere('url', release.studio)
|
|
.first();
|
|
|
|
return {
|
|
...release,
|
|
studio,
|
|
};
|
|
}
|
|
|
|
async function curateReleaseEntry(release, batchId, existingRelease) {
|
|
const slug = slugify(release.title, {
|
|
encode: true,
|
|
limit: config.titleSlugLength,
|
|
});
|
|
|
|
const curatedRelease = {
|
|
site_id: release.site.id,
|
|
studio_id: release.studio ? release.studio.id : null,
|
|
shoot_id: release.shootId || null,
|
|
entry_id: release.entryId || null,
|
|
type: release.type,
|
|
url: release.url,
|
|
title: release.title,
|
|
slug,
|
|
date: release.date,
|
|
description: release.description,
|
|
// director: release.director,
|
|
duration: release.duration,
|
|
// likes: release.rating && release.rating.likes,
|
|
// dislikes: release.rating && release.rating.dislikes,
|
|
// rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
|
|
deep: typeof release.deep === 'boolean' ? release.deep : false,
|
|
deep_url: release.deepUrl,
|
|
updated_batch_id: batchId,
|
|
...(!existingRelease && { created_batch_id: batchId }),
|
|
};
|
|
|
|
return curatedRelease;
|
|
}
|
|
|
|
async function fetchReleases(queryObject = {}, options = {}) {
|
|
const releases = await knex('releases')
|
|
.modify(commonQuery, options)
|
|
.andWhere(builder => whereOr(queryObject, 'releases', builder));
|
|
|
|
return curateReleases(releases);
|
|
}
|
|
|
|
async function fetchSiteReleases(queryObject, options = {}) {
|
|
const releases = await knex('releases')
|
|
.modify(commonQuery, options)
|
|
.where(builder => whereOr(queryObject, 'sites', builder));
|
|
|
|
return curateReleases(releases);
|
|
}
|
|
|
|
async function fetchNetworkReleases(queryObject, options = {}) {
|
|
const releases = await knex('releases')
|
|
.modify(commonQuery, options)
|
|
.where(builder => whereOr(queryObject, 'networks', builder));
|
|
|
|
return curateReleases(releases);
|
|
}
|
|
|
|
async function fetchActorReleases(queryObject, options = {}) {
|
|
const releases = await knex('actors_associated')
|
|
.leftJoin('releases', 'actors_associated.release_id', 'releases.id')
|
|
.leftJoin('actors', 'actors_associated.actor_id', 'actors.id')
|
|
.select(
|
|
'actors.name as actor_name',
|
|
)
|
|
.modify(commonQuery, options)
|
|
.where(builder => whereOr(queryObject, 'actors', builder));
|
|
|
|
return curateReleases(releases);
|
|
}
|
|
|
|
async function fetchTagReleases(queryObject, options = {}) {
|
|
const releases = await knex('tags_associated')
|
|
.leftJoin('releases', 'tags_associated.target_id', 'releases.id')
|
|
.leftJoin('tags', 'tags_associated.tag_id', 'tags.id')
|
|
.select(
|
|
'tags.name as tag_name',
|
|
)
|
|
.modify(commonQuery, options)
|
|
.where('tags_associated.domain', 'releases')
|
|
.where(builder => whereOr(queryObject, 'tags', builder));
|
|
|
|
return curateReleases(releases);
|
|
}
|
|
|
|
function accumulateActors(releases) {
|
|
return releases.reduce((acc, release) => {
|
|
if (!Array.isArray(release.actors)) return acc;
|
|
|
|
release.actors.forEach((actor) => {
|
|
const actorName = actor.name ? actor.name.trim() : actor.trim();
|
|
const actorSlug = slugify(actorName);
|
|
|
|
if (!actorSlug) return;
|
|
|
|
if (!acc[actorSlug]) {
|
|
acc[actorSlug] = {
|
|
name: actorName,
|
|
slug: actorSlug,
|
|
releaseIds: new Set(),
|
|
avatars: [],
|
|
};
|
|
}
|
|
|
|
acc[actorSlug].releaseIds.add(release.id);
|
|
|
|
if (actor.name) acc[actorSlug] = { ...acc[actorSlug], ...actor }; // actor input contains profile info
|
|
if (actor.avatar) {
|
|
const avatar = Array.isArray(actor.avatar)
|
|
? actor.avatar.map(avatarX => ({
|
|
src: avatarX.src || avatarX,
|
|
copyright: avatarX.copyright === undefined ? capitalize(release.site?.network?.name) : avatarX.copyright,
|
|
}))
|
|
: {
|
|
src: actor.avatar.src || actor.avatar,
|
|
copyright: actor.avatar.copyright === undefined ? capitalize(release.site?.network?.name) : actor.avatar.copyright,
|
|
};
|
|
|
|
acc[actorSlug].avatars = acc[actorSlug].avatars.concat([avatar]); // don't flatten fallbacks
|
|
}
|
|
});
|
|
|
|
return acc;
|
|
}, {});
|
|
}
|
|
|
|
async function storeReleaseAssets(releases) {
|
|
if (!argv.media) {
|
|
return;
|
|
}
|
|
|
|
const releasePostersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.poster] }), {});
|
|
const releaseCoversById = releases.reduce((acc, release) => ({ ...acc, [release.id]: release.covers }), {});
|
|
const releaseTrailersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.trailer] }), {});
|
|
const releaseTeasersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.teaser] }), {});
|
|
const releasePhotosById = releases.reduce((acc, release) => ({
|
|
...acc,
|
|
[release.id]: pluckItems(release.photos),
|
|
}), {});
|
|
|
|
if (argv.images && argv.posters) {
|
|
const posters = await storeMedia(Object.values(releasePostersById).flat(), 'release', 'poster');
|
|
if (posters) await associateMedia(releasePostersById, posters, 'release', 'poster');
|
|
}
|
|
|
|
if (argv.images && argv.covers) {
|
|
const covers = await storeMedia(Object.values(releaseCoversById).flat(), 'release', 'cover');
|
|
if (covers) await associateMedia(releaseCoversById, covers, 'release', 'cover');
|
|
}
|
|
|
|
if (argv.images && argv.photos) {
|
|
const photos = await storeMedia(Object.values(releasePhotosById).flat(), 'release', 'photo');
|
|
if (photos) await associateMedia(releasePhotosById, photos, 'release', 'photo');
|
|
}
|
|
|
|
if (argv.videos && argv.trailers) {
|
|
const trailers = await storeMedia(Object.values(releaseTrailersById).flat(), 'release', 'trailer');
|
|
if (trailers) await associateMedia(releaseTrailersById, trailers, 'release', 'trailer');
|
|
}
|
|
|
|
if (argv.videos && argv.teasers) {
|
|
const teasers = await storeMedia(Object.values(releaseTeasersById).flat(), 'release', 'teaser');
|
|
if (teasers) await associateMedia(releaseTeasersById, teasers, 'release', 'teaser');
|
|
}
|
|
}
|
|
|
|
async function updateReleasesSearch(releaseIds) {
|
|
const documents = await knex.raw(`
|
|
SELECT
|
|
releases.id as release_id,
|
|
to_tsvector(
|
|
'traxxx',
|
|
releases.title || ' ' ||
|
|
sites.name || ' ' ||
|
|
sites.slug || ' ' ||
|
|
networks.name || ' ' ||
|
|
networks.slug || ' ' ||
|
|
coalesce(releases.shoot_id, '') || ' ' ||
|
|
EXTRACT(YEAR FROM releases.date) || ' ' ||
|
|
CAST(EXTRACT(MONTH FROM releases.date) AS VARCHAR) || ' ' ||
|
|
CAST(EXTRACT(DAY FROM releases.date) AS VARCHAR) || ' ' ||
|
|
SUBSTRING(CAST(EXTRACT(YEAR FROM releases.date) AS VARCHAR) FROM 3 for 2) || ' ' ||
|
|
LPAD(CAST(EXTRACT(MONTH FROM releases.date) AS VARCHAR), 2, '0') || ' ' ||
|
|
LPAD(CAST(EXTRACT(DAY FROM releases.date) AS VARCHAR), 2, '0') || ' ' ||
|
|
string_agg(coalesce(actors.name, ''), ' ') || ' ' ||
|
|
string_agg(coalesce(tags.name, ''), ' ') || ' ' ||
|
|
string_agg(coalesce(tags_aliases.name, ''), ' ')
|
|
) as document
|
|
FROM releases
|
|
LEFT JOIN sites ON releases.site_id = sites.id
|
|
LEFT JOIN networks ON sites.network_id = networks.id
|
|
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
|
|
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
|
|
LEFT JOIN actors ON local_actors.actor_id = actors.id
|
|
LEFT JOIN tags ON local_tags.tag_id = tags.id
|
|
LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for
|
|
WHERE releases.id = ANY(?)
|
|
GROUP BY releases.id, sites.name, sites.slug, networks.name, networks.slug;
|
|
`, [releaseIds]);
|
|
|
|
if (documents.rows?.length > 0) {
|
|
const query = knex('releases_search').insert(documents.rows).toString();
|
|
await knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
|
|
}
|
|
}
|
|
|
|
async function storeRelease(release, batchId) {
|
|
if (!release.site) {
|
|
throw new Error(`Missing site, unable to store "${release.title}" (${release.url})`);
|
|
}
|
|
|
|
if (!release.entryId) {
|
|
logger.warn(`Missing entry ID, unable to store "${release.title}" (${release.url})`);
|
|
return null;
|
|
}
|
|
|
|
const existingRelease = await knex('releases')
|
|
.where({
|
|
entry_id: release.entryId,
|
|
site_id: release.site.id,
|
|
})
|
|
.first();
|
|
|
|
const curatedRelease = await curateReleaseEntry(release, batchId, existingRelease);
|
|
|
|
if (existingRelease && !argv.redownload) {
|
|
return existingRelease;
|
|
}
|
|
|
|
if (existingRelease && argv.redownload) {
|
|
const [updatedRelease] = await knex('releases')
|
|
.where('id', existingRelease.id)
|
|
.update({
|
|
...existingRelease,
|
|
...curatedRelease,
|
|
})
|
|
.returning('*');
|
|
|
|
if (updatedRelease) {
|
|
await associateTags(release, updatedRelease.id);
|
|
logger.info(`Updated release "${release.title}" (${existingRelease.id}, ${release.site.name})`);
|
|
}
|
|
|
|
await associateTags(release, existingRelease.id);
|
|
|
|
return existingRelease;
|
|
}
|
|
|
|
const [releaseEntry] = await knex('releases')
|
|
.insert(curatedRelease)
|
|
.returning('*');
|
|
|
|
await associateTags(release, releaseEntry.id);
|
|
|
|
logger.info(`Stored release "${release.title}" (${releaseEntry.id}, ${release.site.name})`);
|
|
|
|
return releaseEntry;
|
|
}
|
|
|
|
async function storeReleases(releases) {
|
|
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
|
|
|
|
const storedReleases = await Promise.map(releases, async (release) => {
|
|
try {
|
|
const releaseWithChannelSite = await attachChannelSite(release);
|
|
const releaseWithStudio = await attachStudio(releaseWithChannelSite);
|
|
const storedRelease = await storeRelease(releaseWithStudio, batchId);
|
|
|
|
return storedRelease && {
|
|
id: storedRelease.id,
|
|
slug: storedRelease.slug,
|
|
...releaseWithChannelSite,
|
|
};
|
|
} catch (error) {
|
|
logger.error(error);
|
|
|
|
return null;
|
|
}
|
|
}, {
|
|
concurrency: 10,
|
|
}).filter(Boolean);
|
|
|
|
logger.info(`Stored ${storedReleases.length} new releases`);
|
|
|
|
const actors = accumulateActors(storedReleases);
|
|
|
|
await associateActors(actors, storedReleases);
|
|
|
|
await Promise.all([
|
|
// actors need to be stored before generating search
|
|
updateReleasesSearch(storedReleases.map(release => release.id)),
|
|
storeReleaseAssets(storedReleases),
|
|
]);
|
|
|
|
if (argv.withProfiles && Object.keys(actors).length > 0) {
|
|
await scrapeBasicActors();
|
|
}
|
|
|
|
return {
|
|
releases: storedReleases,
|
|
actors,
|
|
};
|
|
}
|
|
|
|
module.exports = {
|
|
fetchReleases,
|
|
fetchActorReleases,
|
|
fetchSiteReleases,
|
|
fetchNetworkReleases,
|
|
fetchTagReleases,
|
|
storeRelease,
|
|
storeReleases,
|
|
};
|