forked from DebaucheryLibrarian/traxxx
127 lines
3.7 KiB
JavaScript
127 lines
3.7 KiB
JavaScript
'use strict';
|
|
|
|
const config = require('config');
|
|
const moment = require('moment');
|
|
|
|
const knex = require('./knex');
|
|
const argv = require('./argv');
|
|
const scrapers = require('./scrapers');
|
|
|
|
async function findSite(url) {
|
|
const { hostname } = new URL(url);
|
|
const domain = hostname.replace(/^www./, '');
|
|
|
|
const site = await knex('sites')
|
|
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.parameters as network_parameters')
|
|
.where('sites.url', 'like', `%${domain}`)
|
|
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
|
.first()
|
|
// scene might use generic network URL, let network scraper determine channel site
|
|
|| await knex('networks')
|
|
.where('url', 'like', `%${domain}`)
|
|
.first();
|
|
|
|
return {
|
|
id: site.id,
|
|
name: site.name,
|
|
slug: site.slug,
|
|
description: site.description,
|
|
url: site.url,
|
|
network: {
|
|
id: site.network_id || site.id,
|
|
slug: site.network_slug || site.slug,
|
|
parameters: site.network_parameters && JSON.parse(site.network_parameters),
|
|
},
|
|
parameters: site.parameters && JSON.parse(site.parameters),
|
|
isFallback: site.network_id === undefined,
|
|
};
|
|
}
|
|
|
|
function deriveFilename(scene) {
|
|
const props = {
|
|
siteName: scene.site.name,
|
|
sceneId: scene.shootId,
|
|
sceneTitle: scene.title,
|
|
sceneActors: scene.actors.join(config.filename.actorsJoin),
|
|
sceneDate: moment.utc(scene.date).format(config.filename.dateFormat),
|
|
};
|
|
|
|
const filename = config.filename.pattern.replace(/\{\w+\}/g, (match) => {
|
|
const prop = match.slice(1, -1);
|
|
const value = props[prop];
|
|
|
|
if (value && config.filename.subpatterns[prop]) {
|
|
return config.filename.subpatterns[prop]
|
|
.replace(/\{\w+\}/, value)
|
|
.replace(/\//g, config.filename.slash);
|
|
}
|
|
|
|
if (value) {
|
|
return value.replace(/\//g, config.filename.slash) || '';
|
|
}
|
|
|
|
return '';
|
|
});
|
|
|
|
return filename;
|
|
}
|
|
|
|
async function storeRelease(release) {
|
|
const curatedRelease = {
|
|
site_id: release.site.id,
|
|
shoot_id: release.shootId || null,
|
|
entry_id: release.entryId || null,
|
|
url: release.url,
|
|
title: release.title,
|
|
date: release.date,
|
|
description: release.description,
|
|
// director: release.director,
|
|
duration: release.duration,
|
|
photos: release.photos ? release.photos.length : 0,
|
|
likes: release.rating && release.rating.likes,
|
|
dislikes: release.rating && release.rating.dislikes,
|
|
rating: release.rating && release.rating.stars,
|
|
};
|
|
|
|
console.log('Saving release to database');
|
|
|
|
await knex.raw(`${knex('releases').insert(curatedRelease).toString()} ON CONFLICT (site_id, shoot_id) DO UPDATE SET
|
|
description = EXCLUDED.description,
|
|
likes = EXCLUDED.likes,
|
|
dislikes = EXCLUDED.dislikes,
|
|
rating = EXCLUDED.rating
|
|
`);
|
|
|
|
return release;
|
|
}
|
|
|
|
async function fetchScene(url, release) {
|
|
const site = release.site || await findSite(url);
|
|
const scraper = scrapers[site.slug] || scrapers[site.network.slug];
|
|
|
|
if (!scraper) {
|
|
throw new Error('Could not find scraper for URL');
|
|
}
|
|
|
|
if (!scraper.fetchScene) {
|
|
throw new Error(`The '${site.name}'-scraper cannot fetch individual scenes`);
|
|
}
|
|
|
|
const scene = await scraper.fetchScene(url, site);
|
|
const filename = deriveFilename(scene);
|
|
|
|
if (argv.scene && argv.save) {
|
|
await storeRelease(scene);
|
|
}
|
|
|
|
// knex.destroy();
|
|
|
|
return {
|
|
...scene,
|
|
filename,
|
|
copy: filename,
|
|
};
|
|
}
|
|
|
|
module.exports = fetchScene;
|