Experimenting using GraphQL in favor of REST.
This commit is contained in:
@@ -12,13 +12,13 @@ const { scrapeActors, scrapeBasicActors } = require('./actors');
|
||||
|
||||
async function init() {
|
||||
if (argv.scene) {
|
||||
await Promise.map(argv.scene, async url => scrapeRelease(url, null, false, false), {
|
||||
await Promise.map(argv.scene, async url => scrapeRelease(url, null, false, 'scene'), {
|
||||
concurrency: 5,
|
||||
});
|
||||
}
|
||||
|
||||
if (argv.movie) {
|
||||
await Promise.map(argv.movie, async url => scrapeRelease(url, null, false, true), {
|
||||
await Promise.map(argv.movie, async url => scrapeRelease(url, null, false, 'movie'), {
|
||||
concurrency: 5,
|
||||
});
|
||||
}
|
||||
|
||||
262
src/releases.js
262
src/releases.js
@@ -15,8 +15,45 @@ const {
|
||||
} = require('./media');
|
||||
const { fetchSites, findSiteByUrl } = require('./sites');
|
||||
|
||||
async function curateRelease(release) {
|
||||
const [actors, tags, media] = await Promise.all([
|
||||
function commonQuery(queryBuilder, {
|
||||
filter = [],
|
||||
after = new Date(0), // January 1970
|
||||
before = new Date(2 ** 44), // May 2109
|
||||
limit = 100,
|
||||
}) {
|
||||
const finalFilter = [].concat(filter); // ensure filter is array
|
||||
|
||||
queryBuilder
|
||||
.leftJoin('sites', 'releases.site_id', 'sites.id')
|
||||
.leftJoin('studios', 'releases.studio_id', 'studios.id')
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.select(
|
||||
'releases.*',
|
||||
'sites.name as site_name', 'sites.slug as site_slug', 'sites.url as site_url', 'sites.network_id', 'sites.parameters as site_parameters',
|
||||
'studios.name as studio_name', 'sites.slug as site_slug', 'studios.url as studio_url',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description',
|
||||
)
|
||||
.whereNotExists((builder) => {
|
||||
// apply tag filters
|
||||
builder
|
||||
.select('*')
|
||||
.from('tags_associated')
|
||||
.leftJoin('tags', 'tags_associated.tag_id', 'tags.id')
|
||||
.whereIn('tags.slug', finalFilter)
|
||||
.where('tags_associated.domain', 'releases')
|
||||
.whereRaw('tags_associated.target_id = releases.id');
|
||||
})
|
||||
.andWhere('releases.date', '>', after)
|
||||
.andWhere('releases.date', '<=', before)
|
||||
.orderBy([{ column: 'date', order: 'desc' }, { column: 'created_at', order: 'desc' }])
|
||||
.limit(limit);
|
||||
}
|
||||
|
||||
async function curateMovie(release, deep = true) {
|
||||
const [scenes, actors, tags, media] = await Promise.all([
|
||||
knex('releases')
|
||||
.where('releases.parent_id', release.id)
|
||||
.modify(commonQuery, {}),
|
||||
knex('actors_associated')
|
||||
.select(
|
||||
'actors.id', 'actors.name', 'actors.gender', 'actors.slug', 'actors.birthdate',
|
||||
@@ -49,8 +86,9 @@ async function curateRelease(release) {
|
||||
.orderBy(['role', 'index']),
|
||||
]);
|
||||
|
||||
return {
|
||||
const curatedRelease = {
|
||||
id: release.id,
|
||||
type: release.type,
|
||||
title: release.title,
|
||||
date: release.date,
|
||||
dateAdded: release.created_at,
|
||||
@@ -108,6 +146,126 @@ async function curateRelease(release) {
|
||||
url: release.network_url,
|
||||
},
|
||||
};
|
||||
|
||||
if (scenes && scenes.length > 0 && deep) {
|
||||
curatedRelease.scenes = await Promise.map(scenes, scene => curateRelease(scene, false));
|
||||
}
|
||||
|
||||
return curatedRelease;
|
||||
}
|
||||
|
||||
async function curateScene(release, deep = true) {
|
||||
const [movie, actors, tags, media] = await Promise.all([
|
||||
knex('releases')
|
||||
.where('releases.id', release.parent_id)
|
||||
.modify(commonQuery, {})
|
||||
.first(),
|
||||
knex('actors_associated')
|
||||
.select(
|
||||
'actors.id', 'actors.name', 'actors.gender', 'actors.slug', 'actors.birthdate',
|
||||
'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'birth_countries.alias as birth_country_alias',
|
||||
'media.thumbnail as avatar',
|
||||
)
|
||||
.where({ release_id: release.id })
|
||||
.leftJoin('actors', 'actors.id', 'actors_associated.actor_id')
|
||||
.leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2')
|
||||
.leftJoin('media', (builder) => {
|
||||
builder
|
||||
.on('media.target_id', 'actors.id')
|
||||
.andOnVal('media.domain', 'actors')
|
||||
.andOnVal('media.index', '0');
|
||||
})
|
||||
.orderBy('actors.gender'),
|
||||
knex('tags_associated')
|
||||
.select('tags.name', 'tags.slug')
|
||||
.where({
|
||||
domain: 'releases',
|
||||
target_id: release.id,
|
||||
})
|
||||
.leftJoin('tags', 'tags.id', 'tags_associated.tag_id')
|
||||
.orderBy('tags.priority', 'desc'),
|
||||
knex('media')
|
||||
.where({
|
||||
target_id: release.id,
|
||||
domain: 'releases',
|
||||
})
|
||||
.orderBy(['role', 'index']),
|
||||
]);
|
||||
|
||||
const curatedRelease = {
|
||||
id: release.id,
|
||||
type: release.type,
|
||||
title: release.title,
|
||||
date: release.date,
|
||||
dateAdded: release.created_at,
|
||||
description: release.description,
|
||||
url: release.url,
|
||||
shootId: release.shoot_id,
|
||||
entryId: release.entry_id,
|
||||
actors: actors.map(actor => ({
|
||||
id: actor.id,
|
||||
slug: actor.slug,
|
||||
name: actor.name,
|
||||
gender: actor.gender,
|
||||
birthdate: actor.birthdate,
|
||||
age: moment().diff(actor.birthdate, 'years'),
|
||||
ageThen: moment(release.date).diff(actor.birthdate, 'years'),
|
||||
avatar: actor.avatar,
|
||||
origin: actor.birth_country_alpha2
|
||||
? {
|
||||
country: {
|
||||
name: actor.birth_country_alias,
|
||||
alpha2: actor.birth_country_alpha2,
|
||||
},
|
||||
}
|
||||
: null,
|
||||
})),
|
||||
director: release.director,
|
||||
tags,
|
||||
duration: release.duration,
|
||||
photos: media.filter(item => item.role === 'photo'),
|
||||
poster: media.filter(item => item.role === 'poster')[0],
|
||||
covers: media.filter(item => item.role === 'cover'),
|
||||
trailer: media.filter(item => item.role === 'trailer')[0],
|
||||
site: {
|
||||
id: release.site_id,
|
||||
name: release.site_name,
|
||||
independent: release.site_parameters
|
||||
? (JSON.parse(release.site_parameters).independent || false)
|
||||
: false,
|
||||
slug: release.site_slug,
|
||||
url: release.site_url,
|
||||
},
|
||||
studio: release.studio_id
|
||||
? {
|
||||
id: release.studio_id,
|
||||
name: release.studio_name,
|
||||
slug: release.studio_slug,
|
||||
url: release.studio_url,
|
||||
}
|
||||
: null,
|
||||
network: {
|
||||
id: release.network_id,
|
||||
name: release.network_name,
|
||||
description: release.network_description,
|
||||
slug: release.network_slug,
|
||||
url: release.network_url,
|
||||
},
|
||||
};
|
||||
|
||||
if (movie && deep) {
|
||||
curatedRelease.movie = await curateMovie(movie, false);
|
||||
}
|
||||
|
||||
return curatedRelease;
|
||||
}
|
||||
|
||||
async function curateRelease(release) {
|
||||
if (release.type === 'movie') {
|
||||
return curateMovie(release);
|
||||
}
|
||||
|
||||
return curateScene(release);
|
||||
}
|
||||
|
||||
function curateReleases(releases) {
|
||||
@@ -129,12 +287,14 @@ async function getChannelSite(release) {
|
||||
}
|
||||
}
|
||||
|
||||
async function curateScrapedRelease(release) {
|
||||
async function curateReleaseEntry(release) {
|
||||
const curatedRelease = {
|
||||
site_id: release.site.id,
|
||||
studio_id: release.studio ? release.studio.id : null,
|
||||
shoot_id: release.shootId || null,
|
||||
entry_id: release.entryId || null,
|
||||
parent_id: release.parentId,
|
||||
type: release.type,
|
||||
url: release.url,
|
||||
title: release.title,
|
||||
date: release.date,
|
||||
@@ -159,40 +319,6 @@ async function curateScrapedRelease(release) {
|
||||
return curatedRelease;
|
||||
}
|
||||
|
||||
function commonQuery(queryBuilder, {
|
||||
filter = [],
|
||||
after = new Date(0), // January 1970
|
||||
before = new Date(2 ** 44), // May 2109
|
||||
limit = 100,
|
||||
}) {
|
||||
const finalFilter = [].concat(filter); // ensure filter is array
|
||||
|
||||
queryBuilder
|
||||
.leftJoin('sites', 'releases.site_id', 'sites.id')
|
||||
.leftJoin('studios', 'releases.studio_id', 'studios.id')
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.select(
|
||||
'releases.*',
|
||||
'sites.name as site_name', 'sites.slug as site_slug', 'sites.url as site_url', 'sites.network_id', 'sites.parameters as site_parameters',
|
||||
'studios.name as studio_name', 'sites.slug as site_slug', 'studios.url as studio_url',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description',
|
||||
)
|
||||
.whereNotExists((builder) => {
|
||||
// apply tag filters
|
||||
builder
|
||||
.select('*')
|
||||
.from('tags_associated')
|
||||
.leftJoin('tags', 'tags_associated.tag_id', 'tags.id')
|
||||
.whereIn('tags.slug', finalFilter)
|
||||
.where('tags_associated.domain', 'releases')
|
||||
.whereRaw('tags_associated.target_id = releases.id');
|
||||
})
|
||||
.andWhere('date', '>', after)
|
||||
.andWhere('date', '<=', before)
|
||||
.orderBy([{ column: 'date', order: 'desc' }, { column: 'created_at', order: 'desc' }])
|
||||
.limit(limit);
|
||||
}
|
||||
|
||||
async function fetchReleases(queryObject = {}, options = {}) {
|
||||
const releases = await knex('releases')
|
||||
.modify(commonQuery, options)
|
||||
@@ -244,6 +370,40 @@ async function fetchTagReleases(queryObject, options = {}) {
|
||||
return curateReleases(releases);
|
||||
}
|
||||
|
||||
function accumulateActors(releases) {
|
||||
return releases.reduce((acc, release) => {
|
||||
if (!release.actors) return acc;
|
||||
|
||||
release.actors.forEach((actor) => {
|
||||
const trimmedActor = actor.trim();
|
||||
|
||||
if (acc[trimmedActor]) {
|
||||
acc[trimmedActor] = acc[trimmedActor].concat(release.id);
|
||||
return;
|
||||
}
|
||||
|
||||
acc[trimmedActor] = [release.id];
|
||||
});
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
}
|
||||
|
||||
function accumulateMovies(releases) {
|
||||
return releases.reduce((acc, release) => {
|
||||
if (release.movie) {
|
||||
if (acc[release.movie]) {
|
||||
acc[release.movie] = acc[release.movie].concat(release.id);
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc[release.movie] = [release.id];
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
}
|
||||
|
||||
async function storeReleaseAssets(release, releaseId) {
|
||||
const subpath = `${release.site.network.slug}/${release.site.slug}/${release.id}/`;
|
||||
const identifier = `"${release.title}" (${releaseId})`;
|
||||
@@ -279,7 +439,7 @@ async function storeReleaseAssets(release, releaseId) {
|
||||
|
||||
async function storeRelease(release) {
|
||||
const existingRelease = await knex('releases').where('entry_id', release.entryId).first();
|
||||
const curatedRelease = await curateScrapedRelease(release);
|
||||
const curatedRelease = await curateReleaseEntry(release);
|
||||
|
||||
if (existingRelease && !argv.redownload) {
|
||||
return existingRelease.id;
|
||||
@@ -339,22 +499,8 @@ async function storeReleases(releases) {
|
||||
concurrency: 10,
|
||||
}).filter(release => release);
|
||||
|
||||
const actors = storedReleases.reduce((acc, release) => {
|
||||
if (!release.actors) return acc;
|
||||
|
||||
release.actors.forEach((actor) => {
|
||||
const trimmedActor = actor.trim();
|
||||
|
||||
if (acc[trimmedActor]) {
|
||||
acc[trimmedActor] = acc[trimmedActor].concat(release.id);
|
||||
return;
|
||||
}
|
||||
|
||||
acc[trimmedActor] = [release.id];
|
||||
});
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
const actors = accumulateActors(storedReleases);
|
||||
const movies = accumulateMovies(storedReleases);
|
||||
|
||||
await Promise.all([
|
||||
associateActors(actors, storedReleases),
|
||||
@@ -363,7 +509,11 @@ async function storeReleases(releases) {
|
||||
}),
|
||||
]);
|
||||
|
||||
return storedReleases;
|
||||
return {
|
||||
releases: storedReleases,
|
||||
actors,
|
||||
movies,
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
@@ -28,7 +28,7 @@ async function findSite(url, release) {
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeRelease(url, release, deep = false, isMovie = false) {
|
||||
async function scrapeRelease(url, release, deep = true, type = 'scene') {
|
||||
const site = await findSite(url, release);
|
||||
|
||||
if (!site) {
|
||||
@@ -41,21 +41,32 @@ async function scrapeRelease(url, release, deep = false, isMovie = false) {
|
||||
throw new Error('Could not find scraper for URL');
|
||||
}
|
||||
|
||||
if (!isMovie && !scraper.fetchScene) {
|
||||
if (type === 'scene' && !scraper.fetchScene) {
|
||||
throw new Error(`The '${site.name}'-scraper cannot fetch individual scenes`);
|
||||
}
|
||||
|
||||
if (isMovie && !scraper.fetchMovie) {
|
||||
if (type === 'movie' && !scraper.fetchMovie) {
|
||||
throw new Error(`The '${site.name}'-scraper cannot fetch individual movies`);
|
||||
}
|
||||
|
||||
const scrapedRelease = isMovie
|
||||
? await scraper.fetchMovie(url, site, release)
|
||||
: await scraper.fetchScene(url, site, release);
|
||||
const scrapedRelease = type === 'scene'
|
||||
? await scraper.fetchScene(url, site, release)
|
||||
: await scraper.fetchMovie(url, site, release);
|
||||
|
||||
const curatedRelease = { ...scrapedRelease, type };
|
||||
|
||||
if (!deep && argv.save) {
|
||||
// don't store release when called by site scraper
|
||||
const [storedRelease] = await storeReleases([scrapedRelease]);
|
||||
const movie = scrapedRelease.movie
|
||||
? await scrapeRelease(scrapedRelease.movie, null, false, 'movie')
|
||||
: null;
|
||||
|
||||
if (movie) {
|
||||
const { releases: [storedMovie] } = await storeReleases([movie]);
|
||||
curatedRelease.parentId = storedMovie.id;
|
||||
}
|
||||
|
||||
const { releases: [storedRelease] } = await storeReleases([curatedRelease]);
|
||||
|
||||
if (storedRelease) {
|
||||
console.log(`http://${config.web.host}:${config.web.port}/scene/${storedRelease.id}`);
|
||||
|
||||
@@ -70,7 +70,7 @@ async function deepFetchReleases(baseReleases) {
|
||||
return Promise.map(baseReleases, async (release) => {
|
||||
if (release.url) {
|
||||
try {
|
||||
const fullRelease = await scrapeRelease(release.url, release, true);
|
||||
const fullRelease = await scrapeRelease(release.url, release, true, 'scene');
|
||||
|
||||
return {
|
||||
...release,
|
||||
@@ -114,7 +114,7 @@ async function scrapeSiteReleases(scraper, site) {
|
||||
async function scrapeReleases() {
|
||||
const networks = await fetchIncludedSites();
|
||||
|
||||
const scrapedReleases = await Promise.map(networks, async network => Promise.map(network.sites, async (site) => {
|
||||
const scrapedNetworks = await Promise.map(networks, async network => Promise.map(network.sites, async (site) => {
|
||||
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
||||
|
||||
if (!scraper) {
|
||||
@@ -143,7 +143,8 @@ async function scrapeReleases() {
|
||||
});
|
||||
|
||||
if (argv.save) {
|
||||
await storeReleases(scrapedReleases.flat(2));
|
||||
const { movies } = await storeReleases(scrapedNetworks.flat(2));
|
||||
console.log(movies);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ async function getPhotos(albumUrl) {
|
||||
const lastPhotoPage = Array.from(document.querySelectorAll('.preview-image-container a')).slice(-1)[0].href;
|
||||
const lastPhotoIndex = parseInt(lastPhotoPage.match(/\d+.jpg/)[0], 10);
|
||||
|
||||
const photoUrls = await Promise.map(Array.from({ length: lastPhotoIndex }), async (index) => {
|
||||
const photoUrls = await Promise.map(Array.from({ length: lastPhotoIndex }), async (value, index) => {
|
||||
const pageUrl = `https://blacksonblondes.com${lastPhotoPage.replace(/\d+.jpg/, `${index.toString().padStart(3, '0')}.jpg`)}`;
|
||||
|
||||
return getPhoto(pageUrl);
|
||||
|
||||
@@ -3,9 +3,13 @@
|
||||
const path = require('path');
|
||||
const config = require('config');
|
||||
const express = require('express');
|
||||
const { postgraphile } = require('postgraphile');
|
||||
const Router = require('express-promise-router');
|
||||
const bodyParser = require('body-parser');
|
||||
|
||||
const ConnectionFilterPlugin = require('postgraphile-plugin-connection-filter');
|
||||
const PgSimplifyInflectorPlugin = require('@graphile-contrib/pg-simplify-inflector');
|
||||
|
||||
const {
|
||||
fetchReleases,
|
||||
fetchReleaseById,
|
||||
@@ -28,6 +32,25 @@ function initServer() {
|
||||
const app = express();
|
||||
const router = Router();
|
||||
|
||||
const connectionString = `postgres://${config.database.user}:${config.database.password}@${config.database.host}:5432/${config.database.database}`;
|
||||
|
||||
app.use(postgraphile(
|
||||
connectionString,
|
||||
'public',
|
||||
{
|
||||
// watchPg: true,
|
||||
dynamicJson: true,
|
||||
graphiql: true,
|
||||
enhanceGraphiql: true,
|
||||
allowExplain: () => true,
|
||||
simpleCollections: 'only',
|
||||
graphileBuildOptions: {
|
||||
pgOmitListSuffix: true,
|
||||
},
|
||||
appendPlugins: [PgSimplifyInflectorPlugin, ConnectionFilterPlugin],
|
||||
},
|
||||
));
|
||||
|
||||
router.use('/media', express.static(config.media.path));
|
||||
router.use(express.static('public'));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user