Moved networks to GraphQL.

This commit is contained in:
2020-01-03 00:59:02 +01:00
parent e77dbca954
commit 70e27a6cd9
14 changed files with 169 additions and 76 deletions

View File

@@ -370,7 +370,7 @@ async function scrapeActors(actorNames) {
await createMediaDirectory('actors', `${newActorEntry.slug}/`);
await storePhotos(profile.avatars, {
domain: 'actors',
domain: 'actor',
role: 'photo',
primaryRole: 'avatar',
targetId: newActorEntry.id,

View File

@@ -1,26 +1,20 @@
'use strict';
const Promise = require('bluebird');
const argv = require('./argv');
const knex = require('./knex');
const initServer = require('./web/server');
const scrapeSites = require('./scrape-sites');
const scrapeRelease = require('./scrape-release');
const { scrapeReleases } = require('./scrape-releases');
const { scrapeActors, scrapeBasicActors } = require('./actors');
async function init() {
if (argv.scene) {
await Promise.map(argv.scene, async url => scrapeRelease(url, null, false, 'scene'), {
concurrency: 5,
});
await scrapeReleases(argv.scene, null, 'scene');
}
if (argv.movie) {
await Promise.map(argv.movie, async url => scrapeRelease(url, null, false, 'movie'), {
concurrency: 5,
});
await scrapeReleases(argv.movie, null, 'movie');
}
if (argv.scrape || argv.networks || argv.sites) {

View File

@@ -48,6 +48,7 @@ async function createThumbnail(buffer) {
}
async function createMediaDirectory(domain, subpath) {
console.log(domain, subpath);
const filepath = path.join(config.media.path, domain, subpath);
await fs.mkdir(filepath, { recursive: true });
@@ -246,8 +247,6 @@ async function storeTrailer(trailers, {
const [sourceDuplicates, sourceOriginals] = await findDuplicates([trailer], 'source', 'src', label);
const metaFiles = await Promise.map(sourceOriginals, async (trailerX) => {
console.log('trailer x', trailerX, trailerX.src);
const { pathname } = new URL(trailerX.src);
const mimetype = trailerX.type || mime.getType(pathname);
@@ -256,6 +255,7 @@ async function storeTrailer(trailers, {
const filepath = path.join(domain, subpath, `trailer${trailerX.quality ? `_${trailerX.quality}` : ''}.${mime.getExtension(mimetype)}`);
return {
trailer: res.body,
path: filepath,
mime: mimetype,
source: trailerX.src,
@@ -264,25 +264,28 @@ async function storeTrailer(trailers, {
};
});
const [hashDuplicates, hashOriginals] = await findDuplicates(metaFiles, 'hash', null, label);
console.log('hash dup', hashDuplicates, hashOriginals);
const [hashDuplicates, hashOriginals] = await findDuplicates(metaFiles, 'hash', 'hash', label);
const newTrailers = await knex('media')
.insert(hashOriginals)
.insert(hashOriginals.map(trailerX => ({
path: trailerX.path,
mime: trailerX.mime,
source: trailerX.source,
quality: trailerX.quality,
hash: trailerX.hash,
})))
.returning('*');
console.log(newTrailers);
await Promise.all(hashOriginals.map(trailerX => fs.writeFile(path.join(config.media.path, trailerX.path), trailerX.trailer)));
await Promise.all([
// fs.writeFile(path.join(config.media.path, filepath), res.body),
/*
knex('releases_trailers').insert({
release_id: targetId,
media_id: mediaEntries[0].id,
}),
*/
]);
const trailerEntries = Array.isArray(newTrailers)
? [...sourceDuplicates, ...hashDuplicates, ...newTrailers]
: [...sourceDuplicates, ...hashDuplicates];
await upsert('releases_trailers', trailerEntries.map(trailerEntry => ({
release_id: targetId,
media_id: trailerEntry.id,
})), ['release_id', 'media_id']);
}
module.exports = {

View File

@@ -1,6 +1,7 @@
'use strict';
const config = require('config');
const Promise = require('bluebird');
const argv = require('./argv');
const scrapers = require('./scrapers/scrapers');
@@ -28,7 +29,7 @@ async function findSite(url, release) {
return null;
}
async function scrapeRelease(url, release, deep = true, type = 'scene') {
async function scrapeRelease(url, release, type = 'scene') {
const site = await findSite(url, release);
if (!site) {
@@ -53,10 +54,17 @@ async function scrapeRelease(url, release, deep = true, type = 'scene') {
? await scraper.fetchScene(url, site, release)
: await scraper.fetchMovie(url, site, release);
const curatedRelease = { ...scrapedRelease, type };
return scrapedRelease;
}
if (!deep && argv.save) {
// don't store release when called by site scraper
async function scrapeReleases(urls, release, type = 'scene') {
const scrapedReleases = await Promise.map(urls, async url => scrapeRelease(url, release, type), {
concurrency: 5,
});
const curatedReleases = scrapedReleases.map(scrapedRelease => ({ ...scrapedRelease, type }));
if (argv.save) {
/*
const movie = scrapedRelease.movie
? await scrapeRelease(scrapedRelease.movie, null, false, 'movie')
@@ -68,14 +76,15 @@ async function scrapeRelease(url, release, deep = true, type = 'scene') {
}
*/
const { releases: [storedRelease] } = await storeReleases([curatedRelease]);
const { releases: storedReleases } = await storeReleases(curatedReleases);
if (storedRelease) {
console.log(`http://${config.web.host}:${config.web.port}/scene/${storedRelease.id}`);
if (storedReleases) {
console.log(storedReleases.map(storedRelease => `http://${config.web.host}:${config.web.port}/scene/${storedRelease.id}`).join('\n'));
}
}
return scrapedRelease;
}
module.exports = scrapeRelease;
module.exports = {
scrapeRelease,
scrapeReleases,
};

View File

@@ -7,7 +7,7 @@ const argv = require('./argv');
const knex = require('./knex');
const { fetchIncludedSites } = require('./sites');
const scrapers = require('./scrapers/scrapers');
const scrapeRelease = require('./scrape-release');
const { scrapeRelease } = require('./scrape-releases');
const { storeReleases } = require('./releases');
function getAfterDate() {
@@ -70,7 +70,7 @@ async function deepFetchReleases(baseReleases) {
return Promise.map(baseReleases, async (release) => {
if (release.url) {
try {
const fullRelease = await scrapeRelease(release.url, release, true, 'scene');
const fullRelease = await scrapeRelease(release.url, release, 'scene');
return {
...release,
@@ -111,7 +111,7 @@ async function scrapeSiteReleases(scraper, site) {
return baseReleases;
}
async function scrapeReleases() {
async function scrapeSites() {
const networks = await fetchIncludedSites();
const scrapedNetworks = await Promise.map(networks, async network => Promise.map(network.sites, async (site) => {
@@ -147,4 +147,4 @@ async function scrapeReleases() {
}
}
module.exports = scrapeReleases;
module.exports = scrapeSites;

View File

@@ -50,7 +50,9 @@ function scrapeProfile(html, actorName) {
if (bio.weight) profile.weight = Number(bio.weight.split(',')[0]);
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), el => el.href);
profile.avatar = document.querySelector('.profile-image-large img').src;
const avatar = document.querySelector('.profile-image-large img').src;
if (!avatar.match('placeholder')) profile.avatar = document.querySelector('.profile-image-large img').src;
return profile;
}