Major refactor, cleand up site scrape module, fixed and cleaned up release scrape module. Removed old CLI code
This commit is contained in:
84
src/app.js
84
src/app.js
@@ -1,80 +1,30 @@
|
||||
'use strict';
|
||||
|
||||
const blessed = require('neo-blessed');
|
||||
const clipboard = require('clipboardy');
|
||||
|
||||
const argv = require('./argv');
|
||||
const { renderReleases, renderScene } = require('./tui/render');
|
||||
const knex = require('./knex');
|
||||
const initServer = require('./web/server');
|
||||
|
||||
const fetchReleases = require('./fetch-releases');
|
||||
const fetchScene = require('./fetch-scene');
|
||||
const scrapeSites = require('./scrape-sites');
|
||||
const scrapeRelease = require('./scrape-release');
|
||||
|
||||
function initScreen() {
|
||||
const screen = blessed.screen({
|
||||
title: `traxxx ${new Date().getTime()}`,
|
||||
smartCSR: true,
|
||||
mouse: false,
|
||||
});
|
||||
|
||||
screen.enableInput();
|
||||
|
||||
screen.key(['escape', 'q', 'C-c'], () => {
|
||||
screen.render();
|
||||
screen.destroy();
|
||||
});
|
||||
|
||||
return screen;
|
||||
}
|
||||
|
||||
function getMethod() {
|
||||
if (argv.scene) {
|
||||
return {
|
||||
fetch: () => fetchScene(argv.scene),
|
||||
render: renderScene,
|
||||
};
|
||||
}
|
||||
|
||||
if (argv.fetch) {
|
||||
return {
|
||||
fetch: () => fetchReleases(),
|
||||
render: renderReleases,
|
||||
};
|
||||
}
|
||||
|
||||
return initServer();
|
||||
}
|
||||
|
||||
async function init() {
|
||||
const screen = argv.render && !argv.filename && initScreen();
|
||||
if (argv.url) {
|
||||
await scrapeRelease(argv.url);
|
||||
knex.destroy();
|
||||
|
||||
try {
|
||||
const method = getMethod();
|
||||
|
||||
if (method) {
|
||||
const result = await method.fetch();
|
||||
|
||||
if (result) {
|
||||
if (argv.copy && result.copy) {
|
||||
clipboard.writeSync(result.copy);
|
||||
console.log(`Result copied to clipboard: ${result.copy}`);
|
||||
}
|
||||
|
||||
if (argv.filename && result.filename) {
|
||||
console.log(result.filename);
|
||||
|
||||
// setTimeout(() => log(), 5000);
|
||||
return;
|
||||
}
|
||||
|
||||
if (argv.render) {
|
||||
method.render(result, screen);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(argv.debug ? error : error.message);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
if (argv.networks || argv.sites) {
|
||||
await scrapeSites();
|
||||
knex.destroy();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
await initServer();
|
||||
}
|
||||
|
||||
init();
|
||||
|
||||
49
src/argv.js
49
src/argv.js
@@ -5,25 +5,25 @@ const yargs = require('yargs');
|
||||
|
||||
const { argv } = yargs
|
||||
.command('npm start')
|
||||
.option('fetch', {
|
||||
describe: 'Fetch latest releases',
|
||||
type: 'boolean',
|
||||
default: false,
|
||||
.option('networks', {
|
||||
describe: 'Networks to scrape (overrides config)',
|
||||
type: 'array',
|
||||
alias: 'network',
|
||||
})
|
||||
.option('sites', {
|
||||
describe: 'Sites to scrape (overrides config)',
|
||||
type: 'array',
|
||||
alias: 'site',
|
||||
})
|
||||
.option('deep', {
|
||||
describe: 'Fetch details for all releases',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('networks', {
|
||||
describe: 'Networks to include (overrides config)',
|
||||
type: 'array',
|
||||
alias: 'network',
|
||||
})
|
||||
.option('sites', {
|
||||
describe: 'Sites to include (overrides config)',
|
||||
type: 'array',
|
||||
alias: 'site',
|
||||
.option('url', {
|
||||
describe: 'Scrape scene info from URL',
|
||||
type: 'string',
|
||||
alias: 'fetch',
|
||||
})
|
||||
.option('after', {
|
||||
describe: 'Don\'t fetch scenes older than',
|
||||
@@ -40,32 +40,9 @@ const { argv } = yargs
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('render', {
|
||||
describe: 'Fetch data without rendering interface',
|
||||
type: 'boolean',
|
||||
default: false,
|
||||
})
|
||||
.option('scene', {
|
||||
describe: 'Fetch scene info from URL',
|
||||
type: 'string',
|
||||
})
|
||||
.option('copy', {
|
||||
describe: 'Copy relevant result to clipboard',
|
||||
type: 'boolean',
|
||||
alias: 'c',
|
||||
})
|
||||
.option('filename', {
|
||||
describe: 'Only output the suggested filename of a scene',
|
||||
type: 'boolean',
|
||||
})
|
||||
.option('debug', {
|
||||
describe: 'Show error stack traces',
|
||||
type: 'boolean',
|
||||
})
|
||||
.option('quit', {
|
||||
describe: 'Exit after fetching data. Usually used with --copy.',
|
||||
type: 'boolean',
|
||||
alias: 'q',
|
||||
});
|
||||
|
||||
module.exports = argv;
|
||||
|
||||
@@ -1,270 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const fs = require('fs-extra');
|
||||
const path = require('path');
|
||||
const Promise = require('bluebird');
|
||||
const moment = require('moment');
|
||||
|
||||
const argv = require('./argv');
|
||||
const knex = require('./knex');
|
||||
const scrapers = require('./scrapers');
|
||||
const fetchScene = require('./fetch-scene');
|
||||
const { storeTags } = require('./tags');
|
||||
const { storeActors } = require('./actors');
|
||||
const { storePoster, storePhotos, storeTrailer } = require('./media');
|
||||
|
||||
function destructConfigNetworks(networks) {
|
||||
return networks.reduce((acc, network) => {
|
||||
if (Array.isArray(network)) {
|
||||
// network specifies sites
|
||||
return {
|
||||
...acc,
|
||||
sites: [...acc.sites, ...network[1]],
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
...acc,
|
||||
networks: [...acc.networks, network],
|
||||
};
|
||||
}, {
|
||||
networks: [],
|
||||
sites: [],
|
||||
});
|
||||
}
|
||||
|
||||
function curateSites(sites) {
|
||||
return sites.map(site => ({
|
||||
id: site.id,
|
||||
name: site.name,
|
||||
slug: site.slug,
|
||||
description: site.description,
|
||||
url: site.url,
|
||||
network: {
|
||||
id: site.network_id,
|
||||
name: site.network_name,
|
||||
slug: site.network_slug,
|
||||
parameters: JSON.parse(site.network_parameters),
|
||||
},
|
||||
parameters: JSON.parse(site.parameters),
|
||||
}));
|
||||
}
|
||||
|
||||
async function accumulateIncludedSites() {
|
||||
if (argv.networks || argv.sites) {
|
||||
const networks = await knex('networks').select('id').whereIn('slug', argv.networks || []);
|
||||
const networkIds = networks.map(network => network.id);
|
||||
|
||||
const rawSites = await knex('sites')
|
||||
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.parameters as network_parameters')
|
||||
.whereIn('sites.slug', argv.sites || [])
|
||||
.orWhereIn('network_id', networkIds)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id');
|
||||
|
||||
return curateSites(rawSites);
|
||||
}
|
||||
|
||||
const included = destructConfigNetworks(config.include);
|
||||
|
||||
const networks = await knex('networks').select('id').whereIn('slug', included.networks || []);
|
||||
const networkIds = networks.map(network => network.id);
|
||||
|
||||
const rawSites = await knex('sites')
|
||||
.select('sites.*', 'networks.name as network_name')
|
||||
.whereIn('sites.slug', included.sites || [])
|
||||
.orWhereIn('network_id', networkIds)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id');
|
||||
|
||||
return curateSites(rawSites);
|
||||
}
|
||||
|
||||
async function findDuplicateReleases(latestReleases, _siteId) {
|
||||
const latestReleasesShootIds = latestReleases.map(release => release.shootId).filter(release => release !== undefined);
|
||||
const latestReleasesEntryIds = latestReleases.map(release => release.entryId).filter(release => release !== undefined);
|
||||
|
||||
return knex('releases')
|
||||
.whereIn('shoot_id', latestReleasesShootIds)
|
||||
.orWhereIn('entry_id', latestReleasesEntryIds);
|
||||
}
|
||||
|
||||
async function storeRelease(release) {
|
||||
const curatedRelease = {
|
||||
site_id: release.site.id,
|
||||
studio_id: release.studio ? release.studio.id : null,
|
||||
shoot_id: release.shootId || null,
|
||||
entry_id: release.entryId || null,
|
||||
url: release.url,
|
||||
title: release.title,
|
||||
date: release.date,
|
||||
description: release.description,
|
||||
// director: release.director,
|
||||
duration: release.duration,
|
||||
likes: release.rating && release.rating.likes,
|
||||
dislikes: release.rating && release.rating.dislikes,
|
||||
rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
|
||||
deep: Boolean(argv.deep && release.url && !release.upcoming),
|
||||
};
|
||||
|
||||
const releaseEntries = await knex('releases')
|
||||
.insert(curatedRelease)
|
||||
.returning('*');
|
||||
|
||||
if (releaseEntries.length) {
|
||||
const releaseEntry = releaseEntries[0];
|
||||
|
||||
console.log(`Stored (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
|
||||
|
||||
if (release.poster || (release.photos && release.photos.length)) {
|
||||
await fs.mkdir(path.join(config.media.path, release.site.network.slug, release.site.slug, releaseEntry.id.toString()), { recursive: true });
|
||||
}
|
||||
|
||||
await Promise.all([
|
||||
release.actors && release.actors.length > 0
|
||||
? storeActors(release, releaseEntry) : Promise.resolve(),
|
||||
release.tags && release.tags.length > 0
|
||||
? storeTags(release, releaseEntry) : Promise.resolve(),
|
||||
release.photos && release.photos.length > 0
|
||||
? storePhotos(release, releaseEntry) : Promise.resolve(),
|
||||
release.poster
|
||||
? storePoster(release, releaseEntry) : Promise.resolve(),
|
||||
release.trailer && release.trailer.src
|
||||
? storeTrailer(release, releaseEntry) : Promise.resolve(),
|
||||
]);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
console.error(`Unable to save scene to database, possible collision: "${release.title}" (${release.site.name})`);
|
||||
}
|
||||
|
||||
async function storeReleases(releases = []) {
|
||||
return Promise.map(releases, async (release) => {
|
||||
try {
|
||||
return storeRelease(release);
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
|
||||
return null;
|
||||
}
|
||||
}, {
|
||||
concurrency: 2,
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchNewReleases(scraper, site, afterDate, accReleases = [], page = 1) {
|
||||
const latestReleases = await scraper.fetchLatest(site, page);
|
||||
|
||||
if (latestReleases.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const duplicateReleases = await findDuplicateReleases(latestReleases, site.id);
|
||||
|
||||
const duplicateReleasesIds = new Set(
|
||||
duplicateReleases
|
||||
.map(release => release.shoot_id || release.entry_id)
|
||||
.concat(duplicateReleases.map(release => release.entry_id || release.shoot_id))
|
||||
// exclude accumulated releases to prevent an infinite loop if the next page contains the same releases as the previous
|
||||
.concat(accReleases.map(release => release.shootId || release.entryId)),
|
||||
);
|
||||
|
||||
const uniqueReleases = latestReleases.filter(release => !duplicateReleasesIds.has(String(release.shootId))
|
||||
&& !duplicateReleasesIds.has(String(release.entryId))
|
||||
&& moment(release.date).isAfter(afterDate));
|
||||
|
||||
console.log(`\x1b[90m${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases\x1b[0m`);
|
||||
|
||||
const oldestReleaseOnPage = latestReleases.slice(-1)[0].date;
|
||||
|
||||
if (uniqueReleases.length > 0 && moment(oldestReleaseOnPage).isAfter(afterDate) && (oldestReleaseOnPage || page < argv.pages)) {
|
||||
return fetchNewReleases(scraper, site, afterDate, accReleases.concat(uniqueReleases), page + 1);
|
||||
}
|
||||
|
||||
return accReleases.concat(uniqueReleases);
|
||||
}
|
||||
|
||||
async function fetchReleases() {
|
||||
const sites = await accumulateIncludedSites();
|
||||
|
||||
if (sites.length === 0) {
|
||||
console.error('None of the specified sites are in the database');
|
||||
return [];
|
||||
}
|
||||
|
||||
const scenesPerSite = await Promise.map(sites, async (site) => {
|
||||
const scraper = scrapers[site.slug] || scrapers[site.network.slug];
|
||||
|
||||
if (scraper) {
|
||||
try {
|
||||
const afterDate = moment.utc().subtract(...argv.after.split(' ')).toDate();
|
||||
|
||||
const [newReleases, upcomingReleases] = await Promise.all([
|
||||
fetchNewReleases(scraper, site, afterDate),
|
||||
scraper.fetchUpcoming ? scraper.fetchUpcoming(site) : [],
|
||||
]);
|
||||
|
||||
console.log(`${site.name}: Found ${newReleases.length} recent releases, ${upcomingReleases.length} upcoming releases`);
|
||||
|
||||
const markedUpcomingReleases = upcomingReleases.map(release => ({ ...release, upcoming: true }));
|
||||
|
||||
const finalReleases = argv.deep
|
||||
? await Promise.map([...newReleases, ...markedUpcomingReleases], async (release) => {
|
||||
if (release.url) {
|
||||
const scene = await fetchScene(release.url, release);
|
||||
|
||||
return {
|
||||
...release,
|
||||
...scene,
|
||||
};
|
||||
}
|
||||
|
||||
return release;
|
||||
}, {
|
||||
concurrency: 2,
|
||||
})
|
||||
: newReleases;
|
||||
|
||||
if (argv.save) {
|
||||
await storeReleases(finalReleases);
|
||||
}
|
||||
|
||||
return [
|
||||
...finalReleases.map(release => ({
|
||||
...release,
|
||||
network: site.network,
|
||||
})),
|
||||
...upcomingReleases.map(release => ({
|
||||
...release,
|
||||
network: site.network,
|
||||
upcoming: true,
|
||||
})),
|
||||
];
|
||||
} catch (error) {
|
||||
if (argv.debug) {
|
||||
console.error(`${site.id}: Failed to fetch releases`, error);
|
||||
return [];
|
||||
}
|
||||
|
||||
console.log(`${site.id}: Failed to fetch releases`);
|
||||
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
console.error(`Cound not find scraper for '${site.name}' (${site.slug})`);
|
||||
|
||||
return [];
|
||||
}, {
|
||||
concurrency: 2,
|
||||
});
|
||||
|
||||
const accumulatedScenes = scenesPerSite.reduce((acc, siteScenes) => ([...acc, ...siteScenes]), []);
|
||||
const sortedScenes = accumulatedScenes.sort(({ date: dateA }, { date: dateB }) => moment(dateB).diff(dateA));
|
||||
|
||||
knex.destroy();
|
||||
|
||||
return sortedScenes;
|
||||
}
|
||||
|
||||
module.exports = fetchReleases;
|
||||
@@ -1,126 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const moment = require('moment');
|
||||
|
||||
const knex = require('./knex');
|
||||
const argv = require('./argv');
|
||||
const scrapers = require('./scrapers');
|
||||
|
||||
async function findSite(url) {
|
||||
const { hostname } = new URL(url);
|
||||
const domain = hostname.replace(/^www./, '');
|
||||
|
||||
const site = await knex('sites')
|
||||
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.parameters as network_parameters')
|
||||
.where('sites.url', 'like', `%${domain}`)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.first()
|
||||
// scene might use generic network URL, let network scraper determine channel site
|
||||
|| await knex('networks')
|
||||
.where('url', 'like', `%${domain}`)
|
||||
.first();
|
||||
|
||||
return {
|
||||
id: site.id,
|
||||
name: site.name,
|
||||
slug: site.slug,
|
||||
description: site.description,
|
||||
url: site.url,
|
||||
network: {
|
||||
id: site.network_id || site.id,
|
||||
slug: site.network_slug || site.slug,
|
||||
parameters: site.network_parameters && JSON.parse(site.network_parameters),
|
||||
},
|
||||
parameters: site.parameters && JSON.parse(site.parameters),
|
||||
isFallback: site.network_id === undefined,
|
||||
};
|
||||
}
|
||||
|
||||
function deriveFilename(scene) {
|
||||
const props = {
|
||||
siteName: scene.site.name,
|
||||
sceneId: scene.shootId,
|
||||
sceneTitle: scene.title,
|
||||
sceneActors: scene.actors.join(config.filename.actorsJoin),
|
||||
sceneDate: moment.utc(scene.date).format(config.filename.dateFormat),
|
||||
};
|
||||
|
||||
const filename = config.filename.pattern.replace(/\{\w+\}/g, (match) => {
|
||||
const prop = match.slice(1, -1);
|
||||
const value = props[prop];
|
||||
|
||||
if (value && config.filename.subpatterns[prop]) {
|
||||
return config.filename.subpatterns[prop]
|
||||
.replace(/\{\w+\}/, value)
|
||||
.replace(/\//g, config.filename.slash);
|
||||
}
|
||||
|
||||
if (value) {
|
||||
return value.replace(/\//g, config.filename.slash) || '';
|
||||
}
|
||||
|
||||
return '';
|
||||
});
|
||||
|
||||
return filename;
|
||||
}
|
||||
|
||||
async function storeRelease(release) {
|
||||
const curatedRelease = {
|
||||
site_id: release.site.id,
|
||||
shoot_id: release.shootId || null,
|
||||
entry_id: release.entryId || null,
|
||||
url: release.url,
|
||||
title: release.title,
|
||||
date: release.date,
|
||||
description: release.description,
|
||||
// director: release.director,
|
||||
duration: release.duration,
|
||||
photos: release.photos ? release.photos.length : 0,
|
||||
likes: release.rating && release.rating.likes,
|
||||
dislikes: release.rating && release.rating.dislikes,
|
||||
rating: release.rating && release.rating.stars,
|
||||
};
|
||||
|
||||
console.log('Saving release to database');
|
||||
|
||||
await knex.raw(`${knex('releases').insert(curatedRelease).toString()} ON CONFLICT (site_id, shoot_id) DO UPDATE SET
|
||||
description = EXCLUDED.description,
|
||||
likes = EXCLUDED.likes,
|
||||
dislikes = EXCLUDED.dislikes,
|
||||
rating = EXCLUDED.rating
|
||||
`);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchScene(url, release) {
|
||||
const site = release.site || await findSite(url);
|
||||
const scraper = scrapers[site.slug] || scrapers[site.network.slug];
|
||||
|
||||
if (!scraper) {
|
||||
throw new Error('Could not find scraper for URL');
|
||||
}
|
||||
|
||||
if (!scraper.fetchScene) {
|
||||
throw new Error(`The '${site.name}'-scraper cannot fetch individual scenes`);
|
||||
}
|
||||
|
||||
const scene = await scraper.fetchScene(url, site);
|
||||
const filename = deriveFilename(scene);
|
||||
|
||||
if (argv.scene && argv.save) {
|
||||
await storeRelease(scene);
|
||||
}
|
||||
|
||||
// knex.destroy();
|
||||
|
||||
return {
|
||||
...scene,
|
||||
filename,
|
||||
copy: filename,
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = fetchScene;
|
||||
25
src/media.js
25
src/media.js
@@ -28,7 +28,21 @@ async function getThumbnail(buffer) {
|
||||
.toBuffer();
|
||||
}
|
||||
|
||||
async function createMediaDirectory(release, releaseId) {
|
||||
if (release.poster || (release.photos && release.photos.length)) {
|
||||
await fs.mkdir(
|
||||
path.join(config.media.path, release.site.network.slug, release.site.slug, releaseId.toString()),
|
||||
{ recursive: true },
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async function storePoster(release, releaseEntry) {
|
||||
if (!release.poster) {
|
||||
console.warn(`No poster available for (${release.site.name}, ${releaseEntry.id}}) "${release.title}"`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Storing poster for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
|
||||
|
||||
const res = await bhttp.get(release.poster);
|
||||
@@ -66,6 +80,11 @@ async function storePoster(release, releaseEntry) {
|
||||
}
|
||||
|
||||
async function storePhotos(release, releaseEntry) {
|
||||
if (release.photos.length === 0) {
|
||||
console.warn(`No photos available for (${release.site.name}, ${releaseEntry.id}}) "${release.title}"`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Storing ${release.photos.length} photos for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
|
||||
|
||||
const files = await Promise.map(release.photos, async (photoUrl, index) => {
|
||||
@@ -123,6 +142,11 @@ async function storePhotos(release, releaseEntry) {
|
||||
}
|
||||
|
||||
async function storeTrailer(release, releaseEntry) {
|
||||
if (!release.trailer || !release.trailer.src) {
|
||||
console.warn(`No trailer available for (${release.site.name}, ${releaseEntry.id}}) "${release.title}"`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Storing trailer for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
|
||||
|
||||
const { pathname } = new URL(release.trailer.src);
|
||||
@@ -146,6 +170,7 @@ async function storeTrailer(release, releaseEntry) {
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
createMediaDirectory,
|
||||
storePoster,
|
||||
storePhotos,
|
||||
storeTrailer,
|
||||
|
||||
@@ -4,7 +4,7 @@ const knex = require('./knex');
|
||||
const whereOr = require('./utils/where-or');
|
||||
const { fetchSites } = require('./sites');
|
||||
|
||||
async function curateNetwork(network) {
|
||||
async function curateNetwork(network, includeParameters = false) {
|
||||
const [sites, studios] = await Promise.all([
|
||||
fetchSites({ network_id: network.id }),
|
||||
knex('studios')
|
||||
@@ -18,6 +18,7 @@ async function curateNetwork(network) {
|
||||
description: network.description,
|
||||
slug: network.slug,
|
||||
sites,
|
||||
parameters: includeParameters ? JSON.parse(network.parameters) : null,
|
||||
studios: studios.map(studio => ({
|
||||
id: studio.id,
|
||||
name: studio.name,
|
||||
@@ -32,6 +33,21 @@ function curateNetworks(releases) {
|
||||
return Promise.all(releases.map(async release => curateNetwork(release)));
|
||||
}
|
||||
|
||||
async function findNetworkByUrl(url) {
|
||||
const { hostname } = new URL(url);
|
||||
const domain = hostname.replace(/^www./, '');
|
||||
|
||||
const network = await knex('networks')
|
||||
.where('networks.url', 'like', `%${domain}`)
|
||||
.first();
|
||||
|
||||
if (network) {
|
||||
return curateNetwork(network, true);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchNetworks(queryObject) {
|
||||
const releases = await knex('networks')
|
||||
.where(builder => whereOr(queryObject, 'networks', builder))
|
||||
@@ -54,4 +70,5 @@ async function fetchNetworksFromReleases() {
|
||||
module.exports = {
|
||||
fetchNetworks,
|
||||
fetchNetworksFromReleases,
|
||||
findNetworkByUrl,
|
||||
};
|
||||
|
||||
@@ -1,7 +1,17 @@
|
||||
'use strict';
|
||||
|
||||
const Promise = require('bluebird');
|
||||
const knex = require('./knex');
|
||||
const argv = require('./argv');
|
||||
const whereOr = require('./utils/where-or');
|
||||
const { storeTags } = require('./tags');
|
||||
const { storeActors } = require('./actors');
|
||||
const {
|
||||
createMediaDirectory,
|
||||
storePoster,
|
||||
storePhotos,
|
||||
storeTrailer,
|
||||
} = require('./media');
|
||||
|
||||
async function curateRelease(release) {
|
||||
const [actors, tags, media] = await Promise.all([
|
||||
@@ -71,6 +81,69 @@ function curateReleases(releases) {
|
||||
return Promise.all(releases.map(async release => curateRelease(release)));
|
||||
}
|
||||
|
||||
function curateScrapedRelease(release) {
|
||||
return {
|
||||
site_id: release.site.id,
|
||||
studio_id: release.studio ? release.studio.id : null,
|
||||
shoot_id: release.shootId || null,
|
||||
entry_id: release.entryId || null,
|
||||
url: release.url,
|
||||
title: release.title,
|
||||
date: release.date,
|
||||
description: release.description,
|
||||
// director: release.director,
|
||||
duration: release.duration,
|
||||
likes: release.rating && release.rating.likes,
|
||||
dislikes: release.rating && release.rating.dislikes,
|
||||
rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
|
||||
deep: Boolean(argv.deep && release.url && !release.upcoming),
|
||||
};
|
||||
}
|
||||
|
||||
async function storeRelease(release) {
|
||||
const curatedRelease = curateScrapedRelease(release);
|
||||
|
||||
const releaseEntries = await knex('releases')
|
||||
.insert(curatedRelease)
|
||||
.returning('*');
|
||||
|
||||
if (releaseEntries.length) {
|
||||
const releaseEntry = releaseEntries[0];
|
||||
|
||||
console.log(`Stored (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
|
||||
|
||||
await createMediaDirectory(release, releaseEntry.id);
|
||||
|
||||
await Promise.all([
|
||||
storeActors(release, releaseEntry),
|
||||
storeTags(release, releaseEntry),
|
||||
storePhotos(release, releaseEntry),
|
||||
storePoster(release, releaseEntry),
|
||||
storeTrailer(release, releaseEntry),
|
||||
]);
|
||||
|
||||
return releaseEntry.id;
|
||||
}
|
||||
|
||||
console.error(`Unable to save scene to database, possible collision: "${release.title}" (${release.site.name})`);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function storeReleases(releases) {
|
||||
return Promise.map(releases, async (release) => {
|
||||
try {
|
||||
return storeRelease(release);
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
|
||||
return null;
|
||||
}
|
||||
}, {
|
||||
concurrency: 2,
|
||||
});
|
||||
}
|
||||
|
||||
function commonQuery(queryBuilder, {
|
||||
filter = [],
|
||||
after = new Date(0), // January 1970
|
||||
@@ -160,4 +233,6 @@ module.exports = {
|
||||
fetchSiteReleases,
|
||||
fetchNetworkReleases,
|
||||
fetchTagReleases,
|
||||
storeRelease,
|
||||
storeReleases,
|
||||
};
|
||||
|
||||
58
src/scrape-release.js
Normal file
58
src/scrape-release.js
Normal file
@@ -0,0 +1,58 @@
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
|
||||
const argv = require('./argv');
|
||||
const scrapers = require('./scrapers/scrapers');
|
||||
const { storeRelease } = require('./releases');
|
||||
const { findSiteByUrl } = require('./sites');
|
||||
const { findNetworkByUrl } = require('./networks');
|
||||
|
||||
async function findSite(url, release) {
|
||||
const site = (release && release.site) || await findSiteByUrl(url);
|
||||
|
||||
if (site) {
|
||||
return site;
|
||||
}
|
||||
|
||||
const network = await findNetworkByUrl(url);
|
||||
|
||||
if (network) {
|
||||
return {
|
||||
...network,
|
||||
isFallback: true,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeRelease(url, release, deep = false) {
|
||||
const site = await findSite(url, release);
|
||||
const scraper = scrapers[site.slug] || scrapers[site.network.slug];
|
||||
|
||||
if (!site) {
|
||||
throw new Error('Could not find site in database');
|
||||
}
|
||||
|
||||
if (!scraper) {
|
||||
throw new Error('Could not find scraper for URL');
|
||||
}
|
||||
|
||||
if (!scraper.fetchScene) {
|
||||
throw new Error(`The '${site.name}'-scraper cannot fetch individual releases`);
|
||||
}
|
||||
|
||||
const scene = await scraper.fetchScene(url, site);
|
||||
|
||||
if (!deep && argv.save) {
|
||||
// don't store release when called by site scraper
|
||||
const releaseId = await storeRelease(scene);
|
||||
|
||||
console.log(`http://${config.web.host}:${config.web.port}/scene/${releaseId}`);
|
||||
}
|
||||
|
||||
return scene;
|
||||
}
|
||||
|
||||
module.exports = scrapeRelease;
|
||||
136
src/scrape-sites.js
Normal file
136
src/scrape-sites.js
Normal file
@@ -0,0 +1,136 @@
|
||||
'use strict';
|
||||
|
||||
const Promise = require('bluebird');
|
||||
const moment = require('moment');
|
||||
|
||||
const argv = require('./argv');
|
||||
const knex = require('./knex');
|
||||
const { fetchIncludedSites } = require('./sites');
|
||||
const scrapers = require('./scrapers/scrapers');
|
||||
const scrapeRelease = require('./scrape-release');
|
||||
const { storeReleases } = require('./releases');
|
||||
|
||||
function getAfterDate() {
|
||||
return moment
|
||||
.utc()
|
||||
.subtract(...argv.after.split(' '))
|
||||
.toDate();
|
||||
}
|
||||
|
||||
async function findDuplicateReleaseIds(latestReleases, accReleases) {
|
||||
const duplicateReleases = await knex('releases')
|
||||
.whereIn('entry_id', latestReleases.map(({ entryId }) => entryId));
|
||||
|
||||
// include accumulated releases as duplicates to prevent an infinite
|
||||
// loop when the next page contains the same releases as the previous
|
||||
return new Set(duplicateReleases
|
||||
.map(release => release.entry_id)
|
||||
.concat(accReleases.map(release => release.entryId)));
|
||||
}
|
||||
|
||||
async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = 1) {
|
||||
const latestReleases = await scraper.fetchLatest(site, page);
|
||||
const oldestReleaseOnPage = latestReleases.slice(-1)[0].date;
|
||||
|
||||
if (latestReleases.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const duplicateReleaseIds = await findDuplicateReleaseIds(latestReleases, accReleases);
|
||||
|
||||
const uniqueReleases = latestReleases
|
||||
.filter(release => !duplicateReleaseIds.has(String(release.entryId)) // release is already in database
|
||||
&& moment(release.date).isAfter(afterDate)); // release is older than specified date limit
|
||||
|
||||
console.log(`\x1b[90m${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases\x1b[0m`);
|
||||
|
||||
if (
|
||||
uniqueReleases.length > 0
|
||||
&& (oldestReleaseOnPage || page < argv.pages)
|
||||
&& moment(oldestReleaseOnPage).isAfter(afterDate)
|
||||
) {
|
||||
// oldest release on page is newer that specified limit, fetch next page
|
||||
return scrapeUniqueReleases(scraper, site, afterDate, accReleases.concat(uniqueReleases), page + 1);
|
||||
}
|
||||
|
||||
return accReleases.concat(uniqueReleases);
|
||||
}
|
||||
|
||||
async function scrapeUpcomingReleases(scraper, site) {
|
||||
if (scraper.fetchUpcoming) {
|
||||
const upcomingReleases = scraper.fetchUpcoming(site);
|
||||
|
||||
return upcomingReleases.map(release => ({ ...release, upcoming: true }));
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function deepFetchReleases(baseReleases) {
|
||||
return Promise.map(baseReleases, async (release) => {
|
||||
if (release.url) {
|
||||
const fullRelease = await scrapeRelease(release.url, release, true);
|
||||
|
||||
return {
|
||||
...release,
|
||||
...fullRelease,
|
||||
};
|
||||
}
|
||||
|
||||
return release;
|
||||
}, {
|
||||
concurrency: 2,
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeSiteReleases(scraper, site) {
|
||||
const [newReleases, upcomingReleases] = await Promise.all([
|
||||
scrapeUniqueReleases(scraper, site), // fetch basic release info from scene overview
|
||||
scrapeUpcomingReleases(scraper, site), // fetch basic release info from upcoming overview
|
||||
]);
|
||||
|
||||
console.log(`${site.name}: Found ${newReleases.length} recent releases, ${upcomingReleases.length} upcoming releases`);
|
||||
|
||||
const baseReleases = [...newReleases, ...upcomingReleases];
|
||||
|
||||
if (argv.deep) {
|
||||
// follow URL for every release
|
||||
return deepFetchReleases(baseReleases);
|
||||
}
|
||||
|
||||
return baseReleases;
|
||||
}
|
||||
|
||||
async function scrapeReleases() {
|
||||
const sites = await fetchIncludedSites();
|
||||
|
||||
console.log(`Found ${sites.length} sites in database`);
|
||||
|
||||
await Promise.map(sites, async (site) => {
|
||||
const scraper = scrapers[site.slug] || scrapers[site.network.slug];
|
||||
|
||||
if (!scraper) {
|
||||
console.warn(`No scraper found for '${site.name}' (${site.slug})`);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const siteReleases = await scrapeSiteReleases(scraper, site);
|
||||
|
||||
if (argv.save) {
|
||||
await storeReleases(siteReleases);
|
||||
}
|
||||
} catch (error) {
|
||||
if (argv.debug) {
|
||||
console.error(`${site.id}: Failed to fetch releases`, error);
|
||||
return;
|
||||
}
|
||||
|
||||
console.warn(`${site.id}: Failed to fetch releases`);
|
||||
}
|
||||
}, {
|
||||
concurrency: 2,
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = scrapeReleases;
|
||||
@@ -5,7 +5,7 @@ const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
|
||||
const fetchSites = require('../sites');
|
||||
const { fetchSites } = require('../sites');
|
||||
const { matchTags } = require('../tags');
|
||||
|
||||
function scrape(html, site, upcoming) {
|
||||
@@ -23,7 +23,7 @@ function scrape(html, site, upcoming) {
|
||||
|
||||
const url = `https://www.brazzers.com${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title');
|
||||
const shootId = url.split('/').slice(-3, -2)[0];
|
||||
const entryId = url.split('/').slice(-3, -2)[0];
|
||||
|
||||
const date = moment.utc($(element).find('time').text(), 'MMMM DD, YYYY').toDate();
|
||||
const actors = $(element).find('.model-names a').map((actorIndex, actorElement) => $(actorElement).attr('title')).toArray();
|
||||
@@ -36,7 +36,7 @@ function scrape(html, site, upcoming) {
|
||||
|
||||
return acc.concat({
|
||||
url,
|
||||
shootId,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
@@ -56,7 +56,7 @@ async function scrapeScene(html, url, site) {
|
||||
const videoJson = $('script:contains("window.videoUiOptions")').html();
|
||||
const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('"},') + 2));
|
||||
|
||||
const shootId = url.split('/').slice(-3, -2)[0];
|
||||
const entryId = url.split('/').slice(-3, -2)[0];
|
||||
const title = $('.scene-title[itemprop="name"]').text();
|
||||
|
||||
const description = $('#scene-description p[itemprop="description"]')
|
||||
@@ -83,20 +83,20 @@ async function scrapeScene(html, url, site) {
|
||||
const trailer = `https:${videoData.stream_info.http.paths.mp4_480_1500}`;
|
||||
const photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
|
||||
|
||||
const [tags, channelSite] = await Promise.all([
|
||||
const [tags, [channelSite]] = await Promise.all([
|
||||
matchTags(rawTags),
|
||||
site.isFallback
|
||||
? [fetchSites({
|
||||
? fetchSites({
|
||||
slug: siteSlug,
|
||||
name: siteName,
|
||||
url: siteUrl,
|
||||
})]
|
||||
: site,
|
||||
})
|
||||
: [site],
|
||||
]);
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
|
||||
@@ -5,8 +5,8 @@ const Promise = require('bluebird');
|
||||
const bhttp = require('bhttp');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
const knex = require('knex');
|
||||
|
||||
const knex = require('../knex');
|
||||
const { matchTags } = require('../tags');
|
||||
const pluckPhotos = require('../utils/pluck-photos');
|
||||
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
const knex = require('knex');
|
||||
|
||||
const { fetchSites } = require('../sites');
|
||||
const { matchTags } = require('../tags');
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
@@ -71,13 +71,17 @@ async function scrapeScene(html, url, shootId, ratingRes, site) {
|
||||
|
||||
const { average: stars } = ratingRes.body;
|
||||
|
||||
const sitename = $('.shoot-logo a').attr('href').split('/')[2];
|
||||
const siteName = $('.shoot-logo a').attr('href').split('/')[2];
|
||||
const siteSlug = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
const rawTags = $('.tag-list > a[href*="/tag"]').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
|
||||
const [channelSite, tags] = await Promise.all([
|
||||
const [[channelSite], tags] = await Promise.all([
|
||||
site.isFallback
|
||||
? knex('sites').where({ slug: sitename }).first()
|
||||
: site,
|
||||
? fetchSites({
|
||||
slug: siteSlug,
|
||||
name: siteName,
|
||||
})
|
||||
: [site],
|
||||
matchTags(rawTags),
|
||||
]);
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ const moment = require('moment');
|
||||
function scrape(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const shootId = $('li').attr('id');
|
||||
const entryId = $('li').attr('id');
|
||||
const sceneLinkElement = $('#scene_title_border a');
|
||||
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
|
||||
@@ -22,7 +22,7 @@ function scrape(html, site) {
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
|
||||
@@ -8,10 +8,10 @@ const moment = require('moment');
|
||||
const fetchSites = require('../sites');
|
||||
const { matchTags } = require('../tags');
|
||||
|
||||
async function getPhotos(shootId, site) {
|
||||
async function getPhotos(entryId, site) {
|
||||
const { hostname } = new URL(site.url);
|
||||
|
||||
const res = await bhttp.get(`https://${hostname}/gallery.php?type=highres&id=${shootId}`);
|
||||
const res = await bhttp.get(`https://${hostname}/gallery.php?type=highres&id=${entryId}`);
|
||||
const html = res.body.toString();
|
||||
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
@@ -50,7 +50,7 @@ function scrapeLatest(html, site) {
|
||||
|
||||
const url = sceneLinkElement.attr('href');
|
||||
const title = sceneLinkElement.text();
|
||||
const shootId = url.split('/').slice(-1)[0];
|
||||
const entryId = url.split('/').slice(-1)[0];
|
||||
|
||||
const date = moment.utc($(element).find('.scene-date'), 'MM/DD/YYYY').toDate();
|
||||
|
||||
@@ -64,7 +64,7 @@ function scrapeLatest(html, site) {
|
||||
|
||||
const scene = {
|
||||
url,
|
||||
shootId,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
@@ -83,7 +83,7 @@ function scrapeLatest(html, site) {
|
||||
async function scrapeScene(html, url, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const shootId = url.split('/').slice(-1)[0];
|
||||
const entryId = url.split('/').slice(-1)[0];
|
||||
const title = $('.video-wrapper meta[itemprop="name"]').attr('content');
|
||||
|
||||
const date = moment.utc($('.video-wrapper meta[itemprop="uploadDate"]').attr('content'), 'MM/DD/YYYY').toDate();
|
||||
@@ -93,8 +93,12 @@ async function scrapeScene(html, url, site) {
|
||||
const [minutes, seconds] = $('.video-wrapper meta[itemprop="duration"]').attr('content').match(/\d+/g);
|
||||
const duration = Number(minutes) * 60 + Number(seconds);
|
||||
|
||||
const poster = $('meta[property="og:image"]').attr('content');
|
||||
const trailer = $('meta[property="og:video"]').attr('content');
|
||||
const posterScript = $('script:contains(poster)').html();
|
||||
const posterLink = posterScript.slice(posterScript.indexOf('https://'), posterScript.indexOf('.jpg') + 4);
|
||||
const poster = $('meta[property="og:image"]').attr('content') || posterLink;
|
||||
|
||||
const trailerElementSrc = $('#videojs-trailer source').attr('src');
|
||||
const trailer = $('meta[property="og:video"]').attr('content') || trailerElementSrc;
|
||||
|
||||
const likes = Number($('.content-desc #social-actions #likes').text());
|
||||
|
||||
@@ -102,13 +106,13 @@ async function scrapeScene(html, url, site) {
|
||||
|
||||
const [tags, photos, channelSite] = await Promise.all([
|
||||
matchTags(rawTags),
|
||||
getPhotos(shootId, site),
|
||||
getPhotos(entryId, site),
|
||||
getChannelSite($, site),
|
||||
]);
|
||||
|
||||
const scene = {
|
||||
url,
|
||||
shootId,
|
||||
entryId,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
|
||||
@@ -23,7 +23,7 @@ function scrapeLatest(html, site) {
|
||||
const { videos: scenes } = JSON.parse(stateScript.slice(stateScript.indexOf('{'), stateScript.indexOf('};') + 1));
|
||||
|
||||
return scenes.map((scene) => {
|
||||
const shootId = String(scene.newId);
|
||||
const entryId = String(scene.newId);
|
||||
|
||||
const {
|
||||
title,
|
||||
@@ -40,7 +40,7 @@ function scrapeLatest(html, site) {
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
@@ -65,8 +65,8 @@ async function scrapeScene(html, url, site) {
|
||||
const stateObject = $('script:contains("INITIAL_STATE")');
|
||||
const data = JSON.parse(stateObject.html().trim().slice(27, -1));
|
||||
|
||||
const shootId = data.page.data[`${pathname}${search}`].data.video;
|
||||
const scene = data.videos.find(video => video.newId === shootId);
|
||||
const entryId = data.page.data[`${pathname}${search}`].data.video;
|
||||
const scene = data.videos.find(video => video.newId === entryId);
|
||||
|
||||
const [poster, ...photos] = scene.rotatingThumbsUrlSizes.map(photo => photo['1040w']);
|
||||
const trailer = scene.previews.listing.find(preview => preview.height === 353) || null;
|
||||
@@ -86,7 +86,7 @@ async function scrapeScene(html, url, site) {
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
const Promise = require('bluebird');
|
||||
const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const knex = require('knex');
|
||||
const moment = require('moment');
|
||||
|
||||
const knex = require('../knex');
|
||||
const { matchTags } = require('../tags');
|
||||
|
||||
const defaultTags = {
|
||||
|
||||
87
src/sites.js
87
src/sites.js
@@ -1,9 +1,12 @@
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
|
||||
const argv = require('./argv');
|
||||
const knex = require('./knex');
|
||||
const whereOr = require('./utils/where-or');
|
||||
|
||||
async function curateSite(site) {
|
||||
async function curateSite(site, includeParameters = false) {
|
||||
const parameters = JSON.parse(site.parameters);
|
||||
|
||||
return {
|
||||
@@ -13,12 +16,14 @@ async function curateSite(site) {
|
||||
description: site.description,
|
||||
slug: site.slug,
|
||||
independent: !!parameters && parameters.independent,
|
||||
parameters: includeParameters ? JSON.parse(site.parameters) : null,
|
||||
network: {
|
||||
id: site.network_id,
|
||||
name: site.network_name,
|
||||
description: site.network_description,
|
||||
slug: site.network_slug,
|
||||
url: site.network_url,
|
||||
parameters: includeParameters ? JSON.parse(site.network_parameters) : null,
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -27,12 +32,85 @@ function curateSites(sites) {
|
||||
return Promise.all(sites.map(async site => curateSite(site)));
|
||||
}
|
||||
|
||||
function destructConfigNetworks(networks) {
|
||||
return networks.reduce((acc, network) => {
|
||||
if (Array.isArray(network)) {
|
||||
// network specifies sites
|
||||
return {
|
||||
...acc,
|
||||
sites: [...acc.sites, ...network[1]],
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
...acc,
|
||||
networks: [...acc.networks, network],
|
||||
};
|
||||
}, {
|
||||
networks: [],
|
||||
sites: [],
|
||||
});
|
||||
}
|
||||
|
||||
async function findSiteByUrl(url) {
|
||||
const { hostname } = new URL(url);
|
||||
const domain = hostname.replace(/^www./, '');
|
||||
|
||||
const site = await knex('sites')
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
)
|
||||
.where('sites.url', 'like', `%${domain}`)
|
||||
.first();
|
||||
|
||||
if (site) {
|
||||
return curateSite(site, true);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchSitesFromArgv() {
|
||||
const rawSites = await knex('sites')
|
||||
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.parameters as network_parameters')
|
||||
.whereIn('sites.slug', argv.sites || [])
|
||||
.orWhereIn('networks.slug', argv.networks || [])
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id');
|
||||
|
||||
return curateSites(rawSites, true);
|
||||
}
|
||||
|
||||
async function fetchSitesFromConfig() {
|
||||
const included = destructConfigNetworks(config.include);
|
||||
|
||||
const networks = await knex('networks').select('id').whereIn('slug', included.networks || []);
|
||||
const networkIds = networks.map(network => network.id);
|
||||
|
||||
const rawSites = await knex('sites')
|
||||
.select('sites.*', 'networks.name as network_name')
|
||||
.whereIn('sites.slug', included.sites || [])
|
||||
.orWhereIn('network_id', networkIds)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id');
|
||||
|
||||
return curateSites(rawSites, true);
|
||||
}
|
||||
|
||||
async function fetchIncludedSites() {
|
||||
if (argv.networks || argv.sites) {
|
||||
return fetchSitesFromArgv();
|
||||
}
|
||||
|
||||
return fetchSitesFromConfig();
|
||||
}
|
||||
|
||||
async function fetchSites(queryObject) {
|
||||
const sites = await knex('sites')
|
||||
.where(builder => whereOr(queryObject, 'sites', builder))
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as networks_description',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.limit(100);
|
||||
@@ -51,6 +129,11 @@ async function fetchSitesFromReleases() {
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
curateSites,
|
||||
fetchIncludedSites,
|
||||
fetchSites,
|
||||
fetchSitesFromConfig,
|
||||
fetchSitesFromArgv,
|
||||
fetchSitesFromReleases,
|
||||
findSiteByUrl,
|
||||
};
|
||||
|
||||
@@ -25,7 +25,12 @@ function curateTags(tags) {
|
||||
}
|
||||
|
||||
async function storeTags(release, releaseEntry) {
|
||||
return knex('tags_associated').insert(release.tags.map(tagId => ({
|
||||
if (!release.tags || release.tags.length === 0) {
|
||||
console.warn(`No tags available for (${release.site.name}, ${releaseEntry.id}}) "${release.title}"`);
|
||||
return;
|
||||
}
|
||||
|
||||
await knex('tags_associated').insert(release.tags.map(tagId => ({
|
||||
tag_id: tagId,
|
||||
release_id: releaseEntry.id,
|
||||
})));
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
const moment = require('moment');
|
||||
|
||||
const formatters = {
|
||||
site: site => site.name,
|
||||
network: network => network.name,
|
||||
date: (date, column) => moment(date).format(column.format || 'MMM DD, YYYY'),
|
||||
actors: actors => actors.join(', '),
|
||||
rating: (rating) => {
|
||||
if ((rating.likes === 0 && rating.dislikes === 0) || rating.stars === 0) {
|
||||
return '\x1b[90mUnrated\x1b[0m';
|
||||
}
|
||||
|
||||
if (rating.likes !== undefined && rating.dislikes === undefined) {
|
||||
return `\x1b[93m★\x1b[0m N/A \x1b[92m▲\x1b[0m ${String(rating.likes).padEnd(3)}`;
|
||||
}
|
||||
|
||||
if (rating.stars) {
|
||||
return `\x1b[93m★ ${rating.stars.toFixed(2)}\x1b[0m`;
|
||||
}
|
||||
|
||||
const stars = rating.likes || rating.dislikes ? Math.floor(((rating.likes * 5 + rating.dislikes) / (rating.likes + rating.dislikes)) * 100) / 100 : null;
|
||||
|
||||
return `\x1b[93m★\x1b[0m ${stars.toFixed(2)} \x1b[92m▲\x1b[0m ${String(rating.likes).padEnd(3)} \x1b[31m▼\x1b[0m ${String(rating.dislikes).padEnd(3)}`;
|
||||
},
|
||||
};
|
||||
|
||||
module.exports = formatters;
|
||||
@@ -1,111 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const blessed = require('neo-blessed');
|
||||
const opn = require('opn');
|
||||
const formatters = require('./formatters');
|
||||
|
||||
function renderReleases(scenes, screen) {
|
||||
screen.realloc();
|
||||
|
||||
const tableTop = blessed.Text({
|
||||
content: config.columns.reduce((acc, column, index) => `${acc}${'─'.repeat(column.width)}${index < config.columns.length - 1 ? '┬' : '┐\x1b[0m'}`, '\x1b[30m┌'),
|
||||
});
|
||||
|
||||
const items = scenes.map((scene, sceneIndex) => {
|
||||
const row = config.columns.reduce((acc, column) => {
|
||||
const value = (scene[column.value] && (formatters[column.value]
|
||||
? formatters[column.value](scene[column.value], column)
|
||||
: scene[column.value])
|
||||
.toString()) || '\x1b[90mNot available\x1b[0m';
|
||||
|
||||
const realLength = value.replace(/\x1b\[\d+m/g, '').length; // eslint-disable-line no-control-regex
|
||||
const entityLength = value.length - realLength;
|
||||
|
||||
const truncatedValue = realLength > column.width - 2 ? `${value.slice(0, column.width - 2 - 3)}...` : value;
|
||||
const paddedValue = truncatedValue.padEnd(column.width + entityLength - 1).padStart(column.width + entityLength);
|
||||
const coloredValue = scene.upcoming ? `\x1b[92m${paddedValue}\x1b[0m` : `\x1b[97m${paddedValue}\x1b[0m`;
|
||||
|
||||
return `${acc}${coloredValue}\x1b[90m│\x1b[0m`;
|
||||
}, '\x1b[90m│\x1b[0m');
|
||||
|
||||
if (sceneIndex < scenes.length - 1) {
|
||||
const line = config.columns.reduce((acc, column, index) => `${acc}${'─'.repeat(column.width)}${index < config.columns.length - 1 ? '┼' : '┤\x1b[0m'}`, '\n\x1b[30m├');
|
||||
|
||||
return `${row}${line}`;
|
||||
}
|
||||
|
||||
return `${row}${sceneIndex}`;
|
||||
});
|
||||
|
||||
const menu = blessed.List({
|
||||
style: {
|
||||
selected: {
|
||||
bold: true,
|
||||
},
|
||||
},
|
||||
top: 1,
|
||||
height: screen.rows - 3,
|
||||
// width: 161,
|
||||
width: config.columns.reduce((acc, column) => acc + column.width, 0),
|
||||
keys: true,
|
||||
vi: true,
|
||||
mouse: true,
|
||||
scrollbar: {
|
||||
style: {
|
||||
bg: 'red',
|
||||
},
|
||||
track: {
|
||||
bg: 'magenta',
|
||||
},
|
||||
},
|
||||
items,
|
||||
});
|
||||
|
||||
menu.search = (cb) => {
|
||||
const searchbox = blessed.Textbox({
|
||||
inputOnFocus: true,
|
||||
});
|
||||
|
||||
screen.append(searchbox);
|
||||
searchbox.focus();
|
||||
|
||||
screen.render();
|
||||
|
||||
searchbox.on('submit', () => {
|
||||
menu.focus();
|
||||
cb(null, searchbox.value);
|
||||
|
||||
screen.append(menu);
|
||||
screen.render();
|
||||
});
|
||||
};
|
||||
|
||||
const tableBottom = blessed.Text({
|
||||
content: config.columns.reduce((acc, column, index) => `${acc}${'─'.repeat(column.width)}${index < config.columns.length - 1 ? '┴' : '┘\x1b[0m\n'}`, '\x1b[30m└'),
|
||||
top: screen.rows - 2,
|
||||
});
|
||||
|
||||
screen.append(tableTop);
|
||||
screen.append(menu);
|
||||
screen.append(tableBottom);
|
||||
|
||||
menu.focus();
|
||||
|
||||
menu.on('select', (child) => {
|
||||
const scene = scenes[menu.getItemIndex(child)];
|
||||
|
||||
opn(scene.url);
|
||||
});
|
||||
|
||||
screen.render();
|
||||
}
|
||||
|
||||
function renderScene(scene, _screen) {
|
||||
console.log(scene);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
renderReleases,
|
||||
renderScene,
|
||||
};
|
||||
@@ -63,8 +63,10 @@ function initServer() {
|
||||
|
||||
app.use(router);
|
||||
|
||||
app.listen(config.web.port, () => {
|
||||
console.log(`Web server listening on port ${config.web.port}`);
|
||||
const server = app.listen(config.web.port, config.web.host, () => {
|
||||
const { address, port } = server.address();
|
||||
|
||||
console.log(`Web server listening on ${address}:${port}`);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user