Move tag posters and photos to media database.
This commit is contained in:
@@ -375,28 +375,31 @@ async function scrapeBasicActors() {
|
||||
return scrapeActors(basicActors.map(actor => actor.name));
|
||||
}
|
||||
|
||||
async function associateActors(release, releaseId) {
|
||||
const actorEntries = await knex('actors').whereIn('name', release.actors);
|
||||
|
||||
const newActors = release.actors
|
||||
.map(actorName => actorName.trim())
|
||||
.filter(actorName => !actorEntries.some(actor => actor.name === actorName));
|
||||
|
||||
const [newActorEntries, associatedActors] = await Promise.all([
|
||||
Promise.all(newActors.map(async actorName => storeActor({ name: actorName }))),
|
||||
knex('actors_associated').where('release_id', releaseId),
|
||||
async function associateActors(mappedActors, releases) {
|
||||
const [existingActorEntries, existingAssociationEntries] = await Promise.all([
|
||||
knex('actors').whereIn('name', Object.keys(mappedActors)),
|
||||
knex('actors_associated').whereIn('release_id', releases.map(release => release.id)),
|
||||
]);
|
||||
|
||||
const newlyAssociatedActors = actorEntries
|
||||
.concat(newActorEntries)
|
||||
.filter(actorEntry => !associatedActors.some(actor => actorEntry.id === actor.id))
|
||||
.map(actor => ({
|
||||
release_id: releaseId,
|
||||
actor_id: actor.id,
|
||||
}));
|
||||
const associations = await Promise.map(Object.entries(mappedActors), async ([actorName, releaseIds]) => {
|
||||
const actorEntry = existingActorEntries.find(actor => actor.name === actorName)
|
||||
|| await storeActor({ name: actorName });
|
||||
|
||||
await knex('actors_associated')
|
||||
.insert(newlyAssociatedActors);
|
||||
return releaseIds
|
||||
.map(releaseId => ({
|
||||
release_id: releaseId,
|
||||
actor_id: actorEntry.id,
|
||||
}))
|
||||
.filter(association => !existingAssociationEntries
|
||||
// remove associations already in database
|
||||
.some(associationEntry => associationEntry.actor_id === association.actor_id
|
||||
&& associationEntry.release_id === association.release_id));
|
||||
});
|
||||
|
||||
await Promise.all([
|
||||
knex('actors_associated').insert(associations.flat()),
|
||||
scrapeBasicActors(),
|
||||
]);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
@@ -29,7 +29,7 @@ async function getThumbnail(buffer) {
|
||||
}
|
||||
|
||||
async function createReleaseMediaDirectory(release, releaseId) {
|
||||
if (release.poster || (release.photos && release.photos.length)) {
|
||||
if (release.poster || (release.photos && release.photos.length) || release.trailer) {
|
||||
await fs.mkdir(
|
||||
path.join(config.media.path, 'releases', release.site.network.slug, release.site.slug, releaseId.toString()),
|
||||
{ recursive: true },
|
||||
@@ -133,7 +133,7 @@ async function storePhotos(release, releaseId) {
|
||||
return null;
|
||||
}
|
||||
}, {
|
||||
concurrency: 2,
|
||||
concurrency: 10,
|
||||
});
|
||||
|
||||
await knex('media')
|
||||
@@ -225,7 +225,7 @@ async function storeAvatars(profile, actor) {
|
||||
return null;
|
||||
}
|
||||
}, {
|
||||
concurrency: 2,
|
||||
concurrency: 10,
|
||||
});
|
||||
|
||||
const avatars = files.filter(file => file);
|
||||
|
||||
@@ -197,7 +197,6 @@ async function storeReleaseAssets(release, releaseId) {
|
||||
await createReleaseMediaDirectory(release, releaseId);
|
||||
|
||||
await Promise.all([
|
||||
associateActors(release, releaseId),
|
||||
associateTags(release, releaseId),
|
||||
storePhotos(release, releaseId),
|
||||
storePoster(release, releaseId),
|
||||
@@ -222,36 +221,59 @@ async function storeRelease(release) {
|
||||
})
|
||||
.returning('*');
|
||||
|
||||
await storeReleaseAssets(release, existingRelease.id);
|
||||
// await storeReleaseAssets(release, existingRelease.id);
|
||||
console.log(`Updated release "${release.title}" (${existingRelease.id}, ${release.site.name})`);
|
||||
|
||||
return updatedRelease || existingRelease;
|
||||
return updatedRelease ? updatedRelease.id : existingRelease.id;
|
||||
}
|
||||
|
||||
const [releaseEntry] = await knex('releases')
|
||||
.insert(curatedRelease)
|
||||
.returning('*');
|
||||
|
||||
await storeReleaseAssets(release, releaseEntry.id);
|
||||
// await storeReleaseAssets(release, releaseEntry.id);
|
||||
console.log(`Stored release "${release.title}" (${releaseEntry.id}, ${release.site.name})`);
|
||||
|
||||
return releaseEntry.id;
|
||||
}
|
||||
|
||||
async function storeReleases(releases) {
|
||||
return Promise.map(releases, async (release) => {
|
||||
const storedReleases = await Promise.map(releases, async (release) => {
|
||||
try {
|
||||
const releaseId = await storeRelease(release);
|
||||
|
||||
return releaseId;
|
||||
return {
|
||||
id: releaseId,
|
||||
...release,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
|
||||
return null;
|
||||
}
|
||||
}, {
|
||||
concurrency: 2,
|
||||
concurrency: 10,
|
||||
});
|
||||
|
||||
const actors = storedReleases.reduce((acc, release) => {
|
||||
release.actors.forEach((actor) => {
|
||||
const trimmedActor = actor.trim();
|
||||
|
||||
if (acc[trimmedActor]) {
|
||||
acc[trimmedActor] = acc[trimmedActor].concat(release.id);
|
||||
return;
|
||||
}
|
||||
|
||||
acc[trimmedActor] = [release.id];
|
||||
});
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
await Promise.all([
|
||||
associateActors(actors, storedReleases),
|
||||
Promise.all(storedReleases.map(async release => storeReleaseAssets(release, release.id))),
|
||||
]);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
@@ -7,7 +7,6 @@ const scrapers = require('./scrapers/scrapers');
|
||||
const { storeReleases } = require('./releases');
|
||||
const { findSiteByUrl } = require('./sites');
|
||||
const { findNetworkByUrl } = require('./networks');
|
||||
const { scrapeBasicActors } = require('./actors');
|
||||
|
||||
async function findSite(url, release) {
|
||||
const site = (release && release.site) || await findSiteByUrl(url);
|
||||
@@ -50,7 +49,6 @@ async function scrapeRelease(url, release, deep = false) {
|
||||
if (!deep && argv.save) {
|
||||
// don't store release when called by site scraper
|
||||
const [releaseId] = await storeReleases([scene]);
|
||||
await scrapeBasicActors();
|
||||
|
||||
console.log(`http://${config.web.host}:${config.web.port}/scene/${releaseId}`);
|
||||
}
|
||||
|
||||
@@ -9,7 +9,6 @@ const { fetchIncludedSites } = require('./sites');
|
||||
const scrapers = require('./scrapers/scrapers');
|
||||
const scrapeRelease = require('./scrape-release');
|
||||
const { storeReleases } = require('./releases');
|
||||
const { scrapeBasicActors } = require('./actors');
|
||||
|
||||
function getAfterDate() {
|
||||
return moment
|
||||
@@ -103,40 +102,39 @@ async function scrapeSiteReleases(scraper, site) {
|
||||
}
|
||||
|
||||
async function scrapeReleases() {
|
||||
const sites = await fetchIncludedSites();
|
||||
const networks = await fetchIncludedSites();
|
||||
|
||||
console.log(`Found ${sites.length} sites in database`);
|
||||
|
||||
await Promise.map(sites, async (site) => {
|
||||
const scrapedReleases = await Promise.map(networks, async network => Promise.map(network.sites, async (site) => {
|
||||
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
||||
|
||||
if (!scraper) {
|
||||
console.warn(`No scraper found for '${site.name}' (${site.slug})`);
|
||||
return;
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
const siteReleases = await scrapeSiteReleases(scraper, site);
|
||||
const siteActors = siteReleases.reduce((acc, release) => [...acc, ...release.actors], []);
|
||||
|
||||
console.log(siteActors);
|
||||
|
||||
if (argv.save) {
|
||||
await storeReleases(siteReleases);
|
||||
}
|
||||
return await scrapeSiteReleases(scraper, site);
|
||||
} catch (error) {
|
||||
if (argv.debug) {
|
||||
console.error(`${site.id}: Failed to scrape releases`, error);
|
||||
return;
|
||||
}
|
||||
|
||||
console.warn(`${site.id}: Failed to scrape releases`);
|
||||
|
||||
return [];
|
||||
}
|
||||
}, {
|
||||
// 2 network sites at a time
|
||||
concurrency: 2,
|
||||
}),
|
||||
{
|
||||
// 5 networks at a time
|
||||
concurrency: 5,
|
||||
});
|
||||
|
||||
await scrapeBasicActors();
|
||||
if (argv.save) {
|
||||
await storeReleases(scrapedReleases.flat(2));
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = scrapeReleases;
|
||||
|
||||
29
src/sites.js
29
src/sites.js
@@ -72,6 +72,25 @@ async function findSiteByUrl(url) {
|
||||
return null;
|
||||
}
|
||||
|
||||
function sitesByNetwork(sites) {
|
||||
const networks = sites.reduce((acc, site) => {
|
||||
if (acc[site.network.slug]) {
|
||||
acc[site.network.slug].sites = acc[site.network.slug].sites.concat(site);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc[site.network.slug] = {
|
||||
...site.network,
|
||||
sites: [site],
|
||||
};
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
return Object.values(networks);
|
||||
}
|
||||
|
||||
async function fetchSitesFromArgv() {
|
||||
const rawSites = await knex('sites')
|
||||
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.parameters as network_parameters')
|
||||
@@ -79,7 +98,10 @@ async function fetchSitesFromArgv() {
|
||||
.orWhereIn('networks.slug', argv.networks || [])
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id');
|
||||
|
||||
return curateSites(rawSites, true);
|
||||
const curatedSites = await curateSites(rawSites, true);
|
||||
console.log(`Found ${curatedSites.length} sites in database`);
|
||||
|
||||
return sitesByNetwork(curatedSites);
|
||||
}
|
||||
|
||||
async function fetchSitesFromConfig() {
|
||||
@@ -94,7 +116,10 @@ async function fetchSitesFromConfig() {
|
||||
.orWhereIn('network_id', networkIds)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id');
|
||||
|
||||
return curateSites(rawSites, true);
|
||||
const curatedSites = await curateSites(rawSites, true);
|
||||
console.log(`Found ${curatedSites.length} sites in database`);
|
||||
|
||||
return sitesByNetwork(curatedSites);
|
||||
}
|
||||
|
||||
async function fetchIncludedSites() {
|
||||
|
||||
25
src/tags.js
25
src/tags.js
@@ -4,13 +4,21 @@ const knex = require('./knex');
|
||||
const whereOr = require('./utils/where-or');
|
||||
|
||||
async function curateTag(tag) {
|
||||
const aliases = await knex('tags').where({ alias_for: tag.id });
|
||||
const [aliases, media] = await Promise.all([
|
||||
knex('tags').where({ alias_for: tag.id }),
|
||||
knex('media')
|
||||
.where('domain', 'tags')
|
||||
.andWhere('target_id', tag.id)
|
||||
.orderBy('index'),
|
||||
]);
|
||||
|
||||
return {
|
||||
id: tag.id,
|
||||
name: tag.name,
|
||||
slug: tag.slug,
|
||||
description: tag.description,
|
||||
poster: media.find(photo => photo.role === 'poster'),
|
||||
photos: media.filter(photo => photo.role === 'photo'),
|
||||
group: {
|
||||
id: tag.group_id,
|
||||
name: tag.group_name,
|
||||
@@ -31,15 +39,20 @@ async function associateTags(release, releaseId) {
|
||||
return;
|
||||
}
|
||||
|
||||
await knex('tags_associated').insert(release.tags.map(tagId => ({
|
||||
tag_id: tagId,
|
||||
release_id: releaseId,
|
||||
})));
|
||||
try {
|
||||
await knex('tags_associated').insert(release.tags.map(tagId => ({
|
||||
tag_id: tagId,
|
||||
release_id: releaseId,
|
||||
})));
|
||||
} catch (error) {
|
||||
console.log(release, error);
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchTags(queryObject, limit = 100) {
|
||||
async function fetchTags(queryObject, groupsQueryObject, limit = 100) {
|
||||
const tags = await knex('tags')
|
||||
.where(builder => whereOr(queryObject, 'tags', builder))
|
||||
.orWhere(builder => whereOr(groupsQueryObject, 'tags_groups', builder))
|
||||
.andWhere({ 'tags.alias_for': null })
|
||||
.select(
|
||||
'tags.*',
|
||||
|
||||
10
src/utils/escape-html.js
Normal file
10
src/utils/escape-html.js
Normal file
@@ -0,0 +1,10 @@
|
||||
function escapeHtml(text) {
|
||||
return text
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, ''');
|
||||
}
|
||||
|
||||
module.exports = escapeHtml;
|
||||
@@ -10,7 +10,7 @@ async function fetchTagsApi(req, res) {
|
||||
const tags = await fetchTags({
|
||||
id: tagId,
|
||||
slug: tagSlug,
|
||||
}, req.query.limit);
|
||||
}, null, req.query.limit);
|
||||
|
||||
if (tags.length > 0) {
|
||||
res.send(tags[0]);
|
||||
@@ -21,9 +21,16 @@ async function fetchTagsApi(req, res) {
|
||||
return;
|
||||
}
|
||||
|
||||
const tags = await fetchTags({
|
||||
priority: req.query.priority.split(','),
|
||||
}, req.query.limit);
|
||||
const query = {};
|
||||
const groupsQuery = {};
|
||||
|
||||
if (req.query.priority) query.priority = req.query.priority.split(',');
|
||||
if (req.query.slug) query.slug = req.query.slug.split(',');
|
||||
if (req.query.group) {
|
||||
groupsQuery.slug = req.query.group.split(',');
|
||||
}
|
||||
|
||||
const tags = await fetchTags(query, groupsQuery, req.query.limit);
|
||||
|
||||
res.send(tags);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user