Using media database for images.
This commit is contained in:
@@ -36,11 +36,13 @@ function curateSites(sites) {
|
||||
return sites.map(site => ({
|
||||
id: site.id,
|
||||
name: site.name,
|
||||
slug: site.slug,
|
||||
description: site.description,
|
||||
url: site.url,
|
||||
network: {
|
||||
id: site.network_id,
|
||||
name: site.network_name,
|
||||
slug: site.network_slug,
|
||||
},
|
||||
parameters: JSON.parse(site.parameters),
|
||||
}));
|
||||
@@ -48,11 +50,11 @@ function curateSites(sites) {
|
||||
|
||||
async function accumulateIncludedSites() {
|
||||
if (argv.networks || argv.sites) {
|
||||
const networks = await knex('networks').select('id').whereIn('slug', argv.networks);
|
||||
const networks = await knex('networks').select('id').whereIn('slug', argv.networks || []);
|
||||
const networkIds = networks.map(network => network.id);
|
||||
|
||||
const rawSites = await knex('sites')
|
||||
.select('sites.*', 'networks.name as network_name')
|
||||
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug')
|
||||
.whereIn('sites.slug', argv.sites || [])
|
||||
.orWhereIn('network_id', networkIds)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id');
|
||||
@@ -62,12 +64,12 @@ async function accumulateIncludedSites() {
|
||||
|
||||
const included = destructConfigNetworks(config.include);
|
||||
|
||||
const networks = await knex('networks').select('id').whereIn('slug', included.networks);
|
||||
const networks = await knex('networks').select('id').whereIn('slug', included.networks || []);
|
||||
const networkIds = networks.map(network => network.id);
|
||||
|
||||
const rawSites = await knex('sites')
|
||||
.select('sites.*', 'networks.name as network_name')
|
||||
.whereIn('sites.slug', included.sites)
|
||||
.whereIn('sites.slug', included.sites || [])
|
||||
.orWhereIn('network_id', networkIds)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id');
|
||||
|
||||
@@ -84,20 +86,42 @@ async function findDuplicateReleases(latestReleases, _siteId) {
|
||||
}
|
||||
|
||||
async function storePhotos(release, releaseEntry) {
|
||||
const photoPath = path.join(config.photoPath, release.site.id, releaseEntry.rows[0].id.toString());
|
||||
await fs.mkdir(path.join(config.photoPath, release.site.slug, releaseEntry.id.toString()), { recursive: true });
|
||||
|
||||
await fs.mkdir(photoPath, { recursive: true });
|
||||
console.log(`Storing photos for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
|
||||
|
||||
console.log(`Storing photos for (${release.site.name}, ${release.id}) "${release.title}"`);
|
||||
|
||||
await Promise.map(release.photos, async (photoUrl, index) => {
|
||||
const filepaths = await Promise.map(release.photos, async (photoUrl, index) => {
|
||||
const res = await bhttp.get(photoUrl);
|
||||
await fs.writeFile(path.join(photoPath, `${index + 1}.jpg`), res.body);
|
||||
const filepath = path.join(release.site.slug, releaseEntry.id.toString(), `${index + 1}.jpg`);
|
||||
await fs.writeFile(path.join(config.photoPath, filepath), res.body);
|
||||
|
||||
return photoUrl;
|
||||
return filepath;
|
||||
}, {
|
||||
concurrency: 2,
|
||||
});
|
||||
|
||||
await knex('media').insert(filepaths.map((filepath, index) => ({
|
||||
file: filepath,
|
||||
mime: 'image/jpeg',
|
||||
index,
|
||||
domain: 'releases',
|
||||
target_id: releaseEntry.id,
|
||||
role: null,
|
||||
})));
|
||||
|
||||
if (release.trailer && release.trailer.poster) {
|
||||
const res = await bhttp.get(release.trailer.poster);
|
||||
const filepath = path.join(release.site.slug, releaseEntry.id.toString(), 'poster.jpg');
|
||||
await fs.writeFile(path.join(config.photoPath, filepath), res.body);
|
||||
|
||||
await knex('media').insert({
|
||||
file: filepath,
|
||||
mime: 'image/jpeg',
|
||||
domain: 'releases',
|
||||
target_id: releaseEntry.id,
|
||||
role: 'poster',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function storeReleases(releases = []) {
|
||||
@@ -112,22 +136,25 @@ async function storeReleases(releases = []) {
|
||||
description: release.description,
|
||||
// director: release.director,
|
||||
duration: release.duration,
|
||||
photos: release.photos ? release.photos.length : 0,
|
||||
// photos: release.photos ? release.photos.length : 0,
|
||||
likes: release.rating && release.rating.likes,
|
||||
dislikes: release.rating && release.rating.dislikes,
|
||||
rating: release.rating && release.rating.stars,
|
||||
};
|
||||
|
||||
console.log(`Storing (${release.site.name}, ${release.id}) "${release.title}"`);
|
||||
|
||||
const releaseQuery = `${knex('releases').insert(curatedRelease).toString()} ON CONFLICT DO NOTHING RETURNING *`;
|
||||
const releaseEntry = await knex.raw(releaseQuery);
|
||||
|
||||
console.log(`Stored (${release.site.name}, ${releaseEntry.rows[0].id}) "${release.title}"`);
|
||||
|
||||
if (release.actors && release.actors.length > 0) {
|
||||
const actors = await knex('actors').whereIn('name', release.actors);
|
||||
const newActors = release.actors.filter(actorName => !actors.some(actor => actor.name === actorName));
|
||||
const { rows: insertedActors } = newActors.length
|
||||
? await knex.raw(`${knex('actors').insert(newActors.map(actorName => ({ name: actorName })))} ON CONFLICT DO NOTHING RETURNING *`)
|
||||
? await knex.raw(`${knex('actors').insert(newActors.map(actorName => ({
|
||||
name: actorName,
|
||||
slug: actorName.toLowerCase().replace(/\s+/g, '-'),
|
||||
})))} ON CONFLICT DO NOTHING RETURNING *`)
|
||||
: { rows: [] };
|
||||
|
||||
await knex('actors_associated').insert(actors.concat(insertedActors).map(actor => ({
|
||||
@@ -144,7 +171,7 @@ async function storeReleases(releases = []) {
|
||||
}
|
||||
|
||||
if (release.photos && release.photos.length > 0) {
|
||||
await storePhotos(release, releaseEntry);
|
||||
await storePhotos(release, releaseEntry.rows[0]);
|
||||
}
|
||||
}, {
|
||||
concurrency: 2,
|
||||
@@ -184,12 +211,8 @@ async function fetchNewReleases(scraper, site, afterDate, accReleases = [], page
|
||||
async function fetchReleases() {
|
||||
const sites = await accumulateIncludedSites();
|
||||
|
||||
console.log(sites);
|
||||
|
||||
return;
|
||||
|
||||
const scenesPerSite = await Promise.map(sites, async (site) => {
|
||||
const scraper = scrapers[site.id] || scrapers[site.network.id];
|
||||
const scraper = scrapers[site.slug] || scrapers[site.network.slug];
|
||||
|
||||
if (scraper) {
|
||||
try {
|
||||
|
||||
@@ -11,20 +11,10 @@ async function findSite(url) {
|
||||
const { hostname } = new URL(url);
|
||||
const domain = hostname.replace(/^www./, '');
|
||||
|
||||
/*
|
||||
const site = await knex('sites')
|
||||
.where({ url: `${protocol}//www.${domain}` })
|
||||
.orWhere({ url: `${protocol}//${domain}` })
|
||||
.first()
|
||||
// scene might use generic network URL, let network scraper determine channel site
|
||||
|| await knex('networks')
|
||||
.where({ url: `${protocol}//www.${hostname}` })
|
||||
.orWhere({ url: `${protocol}//${hostname}` })
|
||||
.first();
|
||||
*/
|
||||
|
||||
const site = await knex('sites')
|
||||
.where('url', 'like', `%${domain}`)
|
||||
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug')
|
||||
.where('sites.url', 'like', `%${domain}`)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.first()
|
||||
// scene might use generic network URL, let network scraper determine channel site
|
||||
|| await knex('networks')
|
||||
@@ -34,10 +24,12 @@ async function findSite(url) {
|
||||
return {
|
||||
id: site.id,
|
||||
name: site.name,
|
||||
slug: site.slug,
|
||||
description: site.description,
|
||||
url: site.url,
|
||||
network: {
|
||||
id: site.network_id || site.id,
|
||||
slug: site.network_slug || site.slug,
|
||||
},
|
||||
parameters: site.parameters && JSON.parse(site.parameters),
|
||||
isFallback: site.network_id === undefined,
|
||||
@@ -104,7 +96,7 @@ async function storeRelease(release) {
|
||||
|
||||
async function fetchScene(url) {
|
||||
const site = await findSite(url);
|
||||
const scraper = scrapers[site.id] || scrapers[site.network.id];
|
||||
const scraper = scrapers[site.slug] || scrapers[site.network.slug];
|
||||
|
||||
if (!scraper) {
|
||||
throw new Error('Could not find scraper for URL');
|
||||
|
||||
@@ -3,15 +3,19 @@
|
||||
const knex = require('./knex');
|
||||
|
||||
async function curateRelease(release) {
|
||||
const actors = await knex('actors_associated')
|
||||
.select('actors.id', 'actors.name', 'actors.gender')
|
||||
.where({ release_id: release.id })
|
||||
.leftJoin('actors', 'actors.id', 'actors_associated.actor_id');
|
||||
|
||||
const tags = await knex('tags_associated')
|
||||
.select('tags.tag', 'tags.capitalization')
|
||||
.where({ release_id: release.id })
|
||||
.leftJoin('tags', 'tags.tag', 'tags_associated.tag_id');
|
||||
const [actors, tags, media] = await Promise.all([
|
||||
knex('actors_associated')
|
||||
.select('actors.id', 'actors.name', 'actors.gender', 'actors.slug')
|
||||
.where({ release_id: release.id })
|
||||
.leftJoin('actors', 'actors.id', 'actors_associated.actor_id'),
|
||||
knex('tags_associated')
|
||||
.select('tags.tag', 'tags.slug')
|
||||
.where({ release_id: release.id })
|
||||
.leftJoin('tags', 'tags.id', 'tags_associated.tag_id'),
|
||||
knex('media')
|
||||
.where({ target_id: release.id })
|
||||
.orderBy('role'),
|
||||
]);
|
||||
|
||||
return {
|
||||
id: release.id,
|
||||
@@ -24,7 +28,7 @@ async function curateRelease(release) {
|
||||
actors,
|
||||
director: release.director,
|
||||
tags,
|
||||
photos: release.photos,
|
||||
photos: media,
|
||||
rating: {
|
||||
likes: release.likes,
|
||||
dislikes: release.dislikes,
|
||||
@@ -58,8 +62,6 @@ async function fetchReleases(releaseId) {
|
||||
.orderBy('date', 'desc')
|
||||
.limit(100);
|
||||
|
||||
// console.log(curateReleases(releases));
|
||||
|
||||
return curateReleases(releases);
|
||||
}
|
||||
|
||||
|
||||
@@ -73,7 +73,7 @@ async function scrapeScene(html, url, shootId, ratingRes, site) {
|
||||
const rawTags = $('.tag-list > a[href*="/tag"]').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
|
||||
const [channelSite, tags] = await Promise.all([
|
||||
knex('sites').where({ id: sitename }).first(),
|
||||
knex('sites').where({ slug: sitename }).first(),
|
||||
matchTags(rawTags),
|
||||
]);
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ const knex = require('./knex');
|
||||
async function matchTags(rawTags) {
|
||||
const tagEntries = await knex('tags').whereIn('tags.tag', rawTags.map(tag => tag.toLowerCase()));
|
||||
|
||||
return Array.from(new Set(tagEntries.map((tag => tag.alias_for || tag.tag)).sort())); // reduce to tag name and filter duplicates
|
||||
return Array.from(new Set(tagEntries.map((tag => tag.alias_for || tag.id)).sort())); // reduce to tag name and filter duplicates
|
||||
}
|
||||
|
||||
module.exports = { matchTags };
|
||||
|
||||
Reference in New Issue
Block a user