Move tag posters and photos to media database.

This commit is contained in:
2019-12-04 21:58:08 +01:00
parent cf81aa99e0
commit 55e3130062
51 changed files with 861 additions and 184 deletions

View File

@@ -375,28 +375,31 @@ async function scrapeBasicActors() {
return scrapeActors(basicActors.map(actor => actor.name));
}
async function associateActors(release, releaseId) {
const actorEntries = await knex('actors').whereIn('name', release.actors);
const newActors = release.actors
.map(actorName => actorName.trim())
.filter(actorName => !actorEntries.some(actor => actor.name === actorName));
const [newActorEntries, associatedActors] = await Promise.all([
Promise.all(newActors.map(async actorName => storeActor({ name: actorName }))),
knex('actors_associated').where('release_id', releaseId),
async function associateActors(mappedActors, releases) {
const [existingActorEntries, existingAssociationEntries] = await Promise.all([
knex('actors').whereIn('name', Object.keys(mappedActors)),
knex('actors_associated').whereIn('release_id', releases.map(release => release.id)),
]);
const newlyAssociatedActors = actorEntries
.concat(newActorEntries)
.filter(actorEntry => !associatedActors.some(actor => actorEntry.id === actor.id))
.map(actor => ({
release_id: releaseId,
actor_id: actor.id,
}));
const associations = await Promise.map(Object.entries(mappedActors), async ([actorName, releaseIds]) => {
const actorEntry = existingActorEntries.find(actor => actor.name === actorName)
|| await storeActor({ name: actorName });
await knex('actors_associated')
.insert(newlyAssociatedActors);
return releaseIds
.map(releaseId => ({
release_id: releaseId,
actor_id: actorEntry.id,
}))
.filter(association => !existingAssociationEntries
// remove associations already in database
.some(associationEntry => associationEntry.actor_id === association.actor_id
&& associationEntry.release_id === association.release_id));
});
await Promise.all([
knex('actors_associated').insert(associations.flat()),
scrapeBasicActors(),
]);
}
module.exports = {

View File

@@ -29,7 +29,7 @@ async function getThumbnail(buffer) {
}
async function createReleaseMediaDirectory(release, releaseId) {
if (release.poster || (release.photos && release.photos.length)) {
if (release.poster || (release.photos && release.photos.length) || release.trailer) {
await fs.mkdir(
path.join(config.media.path, 'releases', release.site.network.slug, release.site.slug, releaseId.toString()),
{ recursive: true },
@@ -133,7 +133,7 @@ async function storePhotos(release, releaseId) {
return null;
}
}, {
concurrency: 2,
concurrency: 10,
});
await knex('media')
@@ -225,7 +225,7 @@ async function storeAvatars(profile, actor) {
return null;
}
}, {
concurrency: 2,
concurrency: 10,
});
const avatars = files.filter(file => file);

View File

@@ -197,7 +197,6 @@ async function storeReleaseAssets(release, releaseId) {
await createReleaseMediaDirectory(release, releaseId);
await Promise.all([
associateActors(release, releaseId),
associateTags(release, releaseId),
storePhotos(release, releaseId),
storePoster(release, releaseId),
@@ -222,36 +221,59 @@ async function storeRelease(release) {
})
.returning('*');
await storeReleaseAssets(release, existingRelease.id);
// await storeReleaseAssets(release, existingRelease.id);
console.log(`Updated release "${release.title}" (${existingRelease.id}, ${release.site.name})`);
return updatedRelease || existingRelease;
return updatedRelease ? updatedRelease.id : existingRelease.id;
}
const [releaseEntry] = await knex('releases')
.insert(curatedRelease)
.returning('*');
await storeReleaseAssets(release, releaseEntry.id);
// await storeReleaseAssets(release, releaseEntry.id);
console.log(`Stored release "${release.title}" (${releaseEntry.id}, ${release.site.name})`);
return releaseEntry.id;
}
async function storeReleases(releases) {
return Promise.map(releases, async (release) => {
const storedReleases = await Promise.map(releases, async (release) => {
try {
const releaseId = await storeRelease(release);
return releaseId;
return {
id: releaseId,
...release,
};
} catch (error) {
console.error(error);
return null;
}
}, {
concurrency: 2,
concurrency: 10,
});
const actors = storedReleases.reduce((acc, release) => {
release.actors.forEach((actor) => {
const trimmedActor = actor.trim();
if (acc[trimmedActor]) {
acc[trimmedActor] = acc[trimmedActor].concat(release.id);
return;
}
acc[trimmedActor] = [release.id];
});
return acc;
}, {});
await Promise.all([
associateActors(actors, storedReleases),
Promise.all(storedReleases.map(async release => storeReleaseAssets(release, release.id))),
]);
}
module.exports = {

View File

@@ -7,7 +7,6 @@ const scrapers = require('./scrapers/scrapers');
const { storeReleases } = require('./releases');
const { findSiteByUrl } = require('./sites');
const { findNetworkByUrl } = require('./networks');
const { scrapeBasicActors } = require('./actors');
async function findSite(url, release) {
const site = (release && release.site) || await findSiteByUrl(url);
@@ -50,7 +49,6 @@ async function scrapeRelease(url, release, deep = false) {
if (!deep && argv.save) {
// don't store release when called by site scraper
const [releaseId] = await storeReleases([scene]);
await scrapeBasicActors();
console.log(`http://${config.web.host}:${config.web.port}/scene/${releaseId}`);
}

View File

@@ -9,7 +9,6 @@ const { fetchIncludedSites } = require('./sites');
const scrapers = require('./scrapers/scrapers');
const scrapeRelease = require('./scrape-release');
const { storeReleases } = require('./releases');
const { scrapeBasicActors } = require('./actors');
function getAfterDate() {
return moment
@@ -103,40 +102,39 @@ async function scrapeSiteReleases(scraper, site) {
}
async function scrapeReleases() {
const sites = await fetchIncludedSites();
const networks = await fetchIncludedSites();
console.log(`Found ${sites.length} sites in database`);
await Promise.map(sites, async (site) => {
const scrapedReleases = await Promise.map(networks, async network => Promise.map(network.sites, async (site) => {
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
if (!scraper) {
console.warn(`No scraper found for '${site.name}' (${site.slug})`);
return;
return [];
}
try {
const siteReleases = await scrapeSiteReleases(scraper, site);
const siteActors = siteReleases.reduce((acc, release) => [...acc, ...release.actors], []);
console.log(siteActors);
if (argv.save) {
await storeReleases(siteReleases);
}
return await scrapeSiteReleases(scraper, site);
} catch (error) {
if (argv.debug) {
console.error(`${site.id}: Failed to scrape releases`, error);
return;
}
console.warn(`${site.id}: Failed to scrape releases`);
return [];
}
}, {
// 2 network sites at a time
concurrency: 2,
}),
{
// 5 networks at a time
concurrency: 5,
});
await scrapeBasicActors();
if (argv.save) {
await storeReleases(scrapedReleases.flat(2));
}
}
module.exports = scrapeReleases;

View File

@@ -72,6 +72,25 @@ async function findSiteByUrl(url) {
return null;
}
function sitesByNetwork(sites) {
const networks = sites.reduce((acc, site) => {
if (acc[site.network.slug]) {
acc[site.network.slug].sites = acc[site.network.slug].sites.concat(site);
return acc;
}
acc[site.network.slug] = {
...site.network,
sites: [site],
};
return acc;
}, {});
return Object.values(networks);
}
async function fetchSitesFromArgv() {
const rawSites = await knex('sites')
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.parameters as network_parameters')
@@ -79,7 +98,10 @@ async function fetchSitesFromArgv() {
.orWhereIn('networks.slug', argv.networks || [])
.leftJoin('networks', 'sites.network_id', 'networks.id');
return curateSites(rawSites, true);
const curatedSites = await curateSites(rawSites, true);
console.log(`Found ${curatedSites.length} sites in database`);
return sitesByNetwork(curatedSites);
}
async function fetchSitesFromConfig() {
@@ -94,7 +116,10 @@ async function fetchSitesFromConfig() {
.orWhereIn('network_id', networkIds)
.leftJoin('networks', 'sites.network_id', 'networks.id');
return curateSites(rawSites, true);
const curatedSites = await curateSites(rawSites, true);
console.log(`Found ${curatedSites.length} sites in database`);
return sitesByNetwork(curatedSites);
}
async function fetchIncludedSites() {

View File

@@ -4,13 +4,21 @@ const knex = require('./knex');
const whereOr = require('./utils/where-or');
async function curateTag(tag) {
const aliases = await knex('tags').where({ alias_for: tag.id });
const [aliases, media] = await Promise.all([
knex('tags').where({ alias_for: tag.id }),
knex('media')
.where('domain', 'tags')
.andWhere('target_id', tag.id)
.orderBy('index'),
]);
return {
id: tag.id,
name: tag.name,
slug: tag.slug,
description: tag.description,
poster: media.find(photo => photo.role === 'poster'),
photos: media.filter(photo => photo.role === 'photo'),
group: {
id: tag.group_id,
name: tag.group_name,
@@ -31,15 +39,20 @@ async function associateTags(release, releaseId) {
return;
}
await knex('tags_associated').insert(release.tags.map(tagId => ({
tag_id: tagId,
release_id: releaseId,
})));
try {
await knex('tags_associated').insert(release.tags.map(tagId => ({
tag_id: tagId,
release_id: releaseId,
})));
} catch (error) {
console.log(release, error);
}
}
async function fetchTags(queryObject, limit = 100) {
async function fetchTags(queryObject, groupsQueryObject, limit = 100) {
const tags = await knex('tags')
.where(builder => whereOr(queryObject, 'tags', builder))
.orWhere(builder => whereOr(groupsQueryObject, 'tags_groups', builder))
.andWhere({ 'tags.alias_for': null })
.select(
'tags.*',

10
src/utils/escape-html.js Normal file
View File

@@ -0,0 +1,10 @@
function escapeHtml(text) {
return text
.replace(/&/g, '&')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
}
module.exports = escapeHtml;

View File

@@ -10,7 +10,7 @@ async function fetchTagsApi(req, res) {
const tags = await fetchTags({
id: tagId,
slug: tagSlug,
}, req.query.limit);
}, null, req.query.limit);
if (tags.length > 0) {
res.send(tags[0]);
@@ -21,9 +21,16 @@ async function fetchTagsApi(req, res) {
return;
}
const tags = await fetchTags({
priority: req.query.priority.split(','),
}, req.query.limit);
const query = {};
const groupsQuery = {};
if (req.query.priority) query.priority = req.query.priority.split(',');
if (req.query.slug) query.slug = req.query.slug.split(',');
if (req.query.group) {
groupsQuery.slug = req.query.group.split(',');
}
const tags = await fetchTags(query, groupsQuery, req.query.limit);
res.send(tags);
}