traxxx/src/fetch-scene.js

126 lines
3.6 KiB
JavaScript

'use strict';
const config = require('config');
const moment = require('moment');
const knex = require('./knex');
const argv = require('./argv');
const scrapers = require('./scrapers');
async function findSite(url) {
const { hostname } = new URL(url);
const domain = hostname.replace(/^www./, '');
const site = await knex('sites')
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug')
.where('sites.url', 'like', `%${domain}`)
.leftJoin('networks', 'sites.network_id', 'networks.id')
.first()
// scene might use generic network URL, let network scraper determine channel site
|| await knex('networks')
.where('url', 'like', `%${domain}`)
.first();
return {
id: site.id,
name: site.name,
slug: site.slug,
description: site.description,
url: site.url,
network: {
id: site.network_id || site.id,
slug: site.network_slug || site.slug,
},
parameters: site.parameters && JSON.parse(site.parameters),
isFallback: site.network_id === undefined,
};
}
function deriveFilename(scene) {
const props = {
siteName: scene.site.name,
sceneId: scene.shootId,
sceneTitle: scene.title,
sceneActors: scene.actors.join(config.filename.actorsJoin),
sceneDate: moment.utc(scene.date).format(config.filename.dateFormat),
};
const filename = config.filename.pattern.replace(/\{\w+\}/g, (match) => {
const prop = match.slice(1, -1);
const value = props[prop];
if (value && config.filename.subpatterns[prop]) {
return config.filename.subpatterns[prop]
.replace(/\{\w+\}/, value)
.replace(/\//g, config.filename.slash);
}
if (value) {
return value.replace(/\//g, config.filename.slash) || '';
}
return '';
});
return filename;
}
async function storeRelease(release) {
const curatedRelease = {
site_id: release.site.id,
shoot_id: release.shootId || null,
entry_id: release.entryId || null,
url: release.url,
title: release.title,
date: release.date,
description: release.description,
// director: release.director,
duration: release.duration,
photos: release.photos ? release.photos.length : 0,
likes: release.rating && release.rating.likes,
dislikes: release.rating && release.rating.dislikes,
rating: release.rating && release.rating.stars,
};
console.log('Saving release to database');
await knex.raw(`${knex('releases').insert(curatedRelease).toString()} ON CONFLICT (site_id, shoot_id) DO UPDATE SET
description = EXCLUDED.description,
likes = EXCLUDED.likes,
dislikes = EXCLUDED.dislikes,
rating = EXCLUDED.rating
`);
return release;
}
async function fetchScene(url) {
const site = await findSite(url);
const scraper = scrapers[site.slug] || scrapers[site.network.slug];
if (!scraper) {
throw new Error('Could not find scraper for URL');
}
if (!scraper.fetchScene) {
throw new Error(`The '${site.name}'-scraper cannot fetch individual scenes`);
}
const scene = await scraper.fetchScene(url, site);
const filename = deriveFilename(scene);
if (argv.scene && argv.save) {
await storeRelease(scene);
}
// knex.destroy();
return {
...scene,
filename,
copy: filename,
};
}
module.exports = fetchScene;