From 5a6bf2b42f65a150c13b5bb08ec59392090e2fcc Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Tue, 31 Dec 2019 03:12:52 +0100 Subject: [PATCH] Further refactoring. Fixed actor pages and more. --- assets/components/actors/actor.vue | 9 +- assets/js/actors/actions.js | 67 +++++++-- assets/js/curate.js | 8 +- migrations/20190325001339_releases.js | 14 ++ src/actors.js | 16 +-- src/media.js | 189 +++++++++++++------------- src/releases.js | 2 + src/scrape-release.js | 2 + src/scrapers/scrapers.js | 2 +- src/utils/upsert.js | 32 +++-- src/web/plugins/actors.js | 2 - 11 files changed, 206 insertions(+), 137 deletions(-) diff --git a/assets/components/actors/actor.vue b/assets/components/actors/actor.vue index 962c97d7..d215c258 100644 --- a/assets/components/actors/actor.vue +++ b/assets/components/actors/actor.vue @@ -232,7 +232,7 @@ /> - + @@ -252,10 +252,9 @@ function scrollPhotos(event) { } async function mounted() { - [this.actor] = await Promise.all([ - this.$store.dispatch('fetchActors', { actorSlug: this.$route.params.actorSlug }), - this.fetchReleases(), - ]); + this.actor = await this.$store.dispatch('fetchActors', { actorSlug: this.$route.params.actorSlug }); + + console.log(this.actor.releases[0]); if (this.actor) { this.pageTitle = this.actor.name; diff --git a/assets/js/actors/actions.js b/assets/js/actors/actions.js index 2e8d04a3..d2d7ca11 100644 --- a/assets/js/actors/actions.js +++ b/assets/js/actors/actions.js @@ -1,9 +1,15 @@ import { graphql, get } from '../api'; +import { + releasePosterFragment, + releaseActorsFragment, + releaseTagsFragment, +} from '../fragments'; +import { curateRelease } from '../curate'; function curateActor(actor) { const curatedActor = { ...actor, - avatar: actor.avatar[0], + avatar: actor.avatar.media, height: actor.heightMetric && { metric: actor.heightMetric, imperial: actor.heightImperial, @@ -24,6 +30,14 @@ function curateActor(actor) { }, }; + if (actor.releases) { + curatedActor.releases = actor.releases.map(release => curateRelease(release.release)); + } + + if (actor.photos) { + curatedActor.photos = actor.photos.map(photo => photo.media); + } + return curatedActor; } @@ -50,15 +64,19 @@ function initActorActions(store, _router) { hasPiercings tattoos piercings - avatar: actorsMediasByTargetId(condition: { role:"avatar" }) { - thumbnail - path + avatar: actorsAvatarByActorId { + media { + thumbnail + path + } } - photos: actorsMediasByTargetId(condition: { role:"photo" }) { - id - thumbnail - path - index + photos: actorsPhotos { + media { + id + thumbnail + path + index + } } birthCity birthState @@ -74,7 +92,7 @@ function initActorActions(store, _router) { name alias } - social: actorsSocialsByTargetId { + social: actorsSocials { id url platform @@ -84,6 +102,29 @@ function initActorActions(store, _router) { name slug } + releases: releasesActors { + release { + id + url + title + date + ${releaseActorsFragment} + ${releaseTagsFragment} + ${releasePosterFragment} + site { + id + name + slug + url + network { + id + name + slug + url + } + } + } + } } } `, { @@ -104,8 +145,10 @@ function initActorActions(store, _router) { id name slug - avatar: actorsMediasByTargetId(condition: { role:"avatar" }) { - thumbnail + avatar: actorsAvatarByActorId { + media { + thumbnail + } } birthCountry: countryByBirthCountryAlpha2 { alpha2 diff --git a/assets/js/curate.js b/assets/js/curate.js index f20b0596..3398168e 100644 --- a/assets/js/curate.js +++ b/assets/js/curate.js @@ -12,16 +12,18 @@ function curateActor(actor) { } function curateRelease(release) { + console.log(release); + const curatedRelease = { ...release, - actors: release.actors.map(({ actor }) => curateActor(actor)), + actors: release.actors ? release.actors.map(({ actor }) => curateActor(actor)) : [], poster: release.poster && release.poster.media, - tags: release.tags.map(({ tag }) => tag), + tags: release.tags ? release.tags.map(({ tag }) => tag) : [], network: release.site.network, }; if (release.photos) curatedRelease.photos = release.photos.map(({ media }) => media); - if (release.trailer) [curatedRelease.trailer] = release.trailer.media; + if (release.trailer) curatedRelease.trailer = release.trailer.media; return curatedRelease; } diff --git a/migrations/20190325001339_releases.js b/migrations/20190325001339_releases.js index a842425c..f6f2fcef 100644 --- a/migrations/20190325001339_releases.js +++ b/migrations/20190325001339_releases.js @@ -378,6 +378,19 @@ exports.up = knex => Promise.resolve() table.unique('release_id'); })) + .then(() => knex.schema.createTable('releases_covers', (table) => { + table.integer('release_id', 16) + .notNullable() + .references('id') + .inTable('releases'); + + table.integer('media_id', 16) + .notNullable() + .references('id') + .inTable('media'); + + table.unique(['release_id', 'media_id']); + })) .then(() => knex.schema.createTable('releases_trailers', (table) => { table.integer('release_id', 16) .notNullable() @@ -445,6 +458,7 @@ exports.down = knex => knex.raw(` DROP TABLE IF EXISTS releases_directors CASCADE; DROP TABLE IF EXISTS releases_posters CASCADE; DROP TABLE IF EXISTS releases_photos CASCADE; + DROP TABLE IF EXISTS releases_covers CASCADE; DROP TABLE IF EXISTS releases_trailers CASCADE; DROP TABLE IF EXISTS releases_tags CASCADE; DROP TABLE IF EXISTS actors_avatars CASCADE; diff --git a/src/actors.js b/src/actors.js index 973f769d..d2aa6dbf 100644 --- a/src/actors.js +++ b/src/actors.js @@ -17,8 +17,8 @@ async function curateActor(actor) { knex('media') .where({ domain: 'actors', target_id: actor.id }) .orderBy('index'), - knex('social') - .where({ domain: 'actors', target_id: actor.id }) + knex('actors_social') + .where('actor_id', actor.id) .orderBy('platform', 'desc'), ]); @@ -197,8 +197,7 @@ function curateSocialEntry(url, actorId) { return { url: match.url, platform: match.platform, - domain: 'actors', - target_id: actorId, + actor_id: actorId, }; } @@ -207,10 +206,7 @@ async function curateSocialEntries(urls, actorId) { return []; } - const existingSocialLinks = await knex('social').where({ - domain: 'actors', - target_id: actorId, - }); + const existingSocialLinks = await knex('actors_social').where('actor_id', actorId); return urls.reduce((acc, url) => { const socialEntry = curateSocialEntry(url, actorId); @@ -243,7 +239,7 @@ async function fetchActors(queryObject, limit = 100) { async function storeSocialLinks(urls, actorId) { const curatedSocialEntries = await curateSocialEntries(urls, actorId); - await knex('social').insert(curatedSocialEntries); + await knex('actors_social').insert(curatedSocialEntries); } async function storeActor(actor, scraped = false, scrapeSuccess = false) { @@ -358,7 +354,7 @@ async function scrapeActors(actorNames) { updateActor(profile, true, true), // storeAvatars(profile, actorEntry), storePhotos(profile.avatars, { - domain: 'actors', + domain: 'actor', role: 'photo', primaryRole: 'avatar', targetId: actorEntry.id, diff --git a/src/media.js b/src/media.js index c4ab36e7..f1cd271c 100644 --- a/src/media.js +++ b/src/media.js @@ -10,6 +10,7 @@ const sharp = require('sharp'); const blake2 = require('blake2'); const knex = require('./knex'); +const upsert = require('./utils/upsert'); function getHash(buffer) { const hash = blake2.createHash('blake2b', { digestLength: 24 }); @@ -41,7 +42,7 @@ async function createThumbnail(buffer) { withoutEnlargement: true, }) .jpeg({ - quality: 50, + quality: 75, }) .toBuffer(); } @@ -53,7 +54,7 @@ async function createMediaDirectory(domain, subpath) { return filepath; } -function curatePhotoEntries(files, domain = 'releases', role = 'photo', targetId) { +function curatePhotoEntries(files) { return files.map((file, index) => ({ path: file.filepath, thumbnail: file.thumbpath, @@ -61,51 +62,33 @@ function curatePhotoEntries(files, domain = 'releases', role = 'photo', targetId hash: file.hash, source: file.source, index, - domain, - target_id: targetId, - role: file.role || role, })); } -// before fetching -async function filterSourceDuplicates(photos, domains = ['releases'], roles = ['photo'], identifier) { - const photoSourceEntries = await knex('media') - .whereIn('source', photos.flat()) - .whereIn('domain', domains) - .whereIn('role', roles); // accept string argument +async function findDuplicates(photos, identifier, prop = null, label) { + const duplicates = await knex('media') + .whereIn(identifier, photos.flat().map(photo => (prop ? photo[prop] : photo))); - const photoSources = new Set(photoSourceEntries.map(photo => photo.source)); - const newPhotos = photos.filter(source => (Array.isArray(source) // fallbacks provided? - ? !source.some(sourceX => photoSources.has(sourceX)) // ensure none of the sources match - : !photoSources.has(source))); + const duplicateLookup = new Set(duplicates.map(photo => photo[prop || identifier])); + const originals = photos.filter(source => (Array.isArray(source) // fallbacks provided? + ? !source.some(sourceX => duplicateLookup.has(prop ? sourceX[prop] : sourceX)) // ensure none of the sources match + : !duplicateLookup.has(prop ? source[prop] : source))); - if (photoSourceEntries.length > 0) { - console.log(`Ignoring ${photoSourceEntries.length} ${roles} items already present by source for ${identifier}`); + if (duplicates.length > 0) { + console.log(`${duplicates.length} media items already present by ${identifier} for ${label}`); } - return newPhotos; -} - -// after fetching -async function filterHashDuplicates(files, domains = ['releases'], roles = ['photo'], identifier) { - const photoHashEntries = await knex('media') - .whereIn('hash', files.map(file => file.hash)) - .whereIn('domain', [].concat(domains)) - .whereIn('role', [].concat(roles)); // accept string argument - - const photoHashes = new Set(photoHashEntries.map(entry => entry.hash)); - - if (photoHashEntries.length > 0) { - console.log(`Ignoring ${photoHashEntries.length} ${roles} items already present by hash for ${identifier}`); + if (originals.length > 0) { + console.log(`Fetching ${originals.length} new media items for ${label}`); } - return files.filter(file => file && !photoHashes.has(file.hash)); + return [duplicates, originals]; } -async function fetchPhoto(photoUrl, index, identifier, attempt = 1) { +async function fetchPhoto(photoUrl, index, label, attempt = 1) { if (Array.isArray(photoUrl)) { return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => { - const photo = await fetchPhoto(url, index, identifier); + const photo = await fetchPhoto(url, index, label); if (photo) { return photo; @@ -136,11 +119,11 @@ async function fetchPhoto(photoUrl, index, identifier, attempt = 1) { throw new Error(`Response ${res.statusCode} not OK`); } catch (error) { - console.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} for ${identifier} (${photoUrl}): ${error}`); + console.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} for ${label} (${photoUrl}): ${error}`); if (attempt < 3) { await Promise.delay(1000); - return fetchPhoto(photoUrl, index, identifier, attempt + 1); + return fetchPhoto(photoUrl, index, label, attempt + 1); } return null; @@ -148,7 +131,7 @@ async function fetchPhoto(photoUrl, index, identifier, attempt = 1) { } async function savePhotos(files, { - domain = 'releases', + domain = 'release', subpath, role = 'photo', naming = 'index', @@ -158,11 +141,11 @@ async function savePhotos(files, { const thumbnail = await createThumbnail(file.photo); const filename = naming === 'index' - ? `${file.role || role}-${index + 1}` + ? `${file.role || role}${index + 1}` : `${timestamp + index}`; - const filepath = path.join(domain, subpath, `${filename}.${file.extension}`); - const thumbpath = path.join(domain, subpath, `${filename}_thumb.${file.extension}`); + const filepath = path.join(`${domain}s`, subpath, `${filename}.${file.extension}`); + const thumbpath = path.join(`${domain}s`, subpath, `${filename}_thumb.${file.extension}`); await Promise.all([ fs.writeFile(path.join(config.media.path, filepath), file.photo), @@ -179,49 +162,28 @@ async function savePhotos(files, { } async function storePhotos(photos, { - domain = 'releases', + domain = 'release', role = 'photo', naming = 'index', targetId, subpath, primaryRole, // role to assign to first photo if not already in database, used mainly for avatars -}, identifier) { +}, label) { if (!photos || photos.length === 0) { - console.warn(`No ${role}s available for ${identifier}`); + console.warn(`No ${role}s available for ${label}`); return; } const pluckedPhotos = pluckPhotos(photos); - const roles = primaryRole ? [role, primaryRole] : [role]; + const [sourceDuplicates, sourceOriginals] = await findDuplicates(pluckedPhotos, 'source', null, label); - const newPhotos = await filterSourceDuplicates(pluckedPhotos, [domain], roles, identifier); - - if (newPhotos.length === 0) return; - - console.log(`Fetching ${newPhotos.length} ${role}s for ${identifier}`); - - const metaFiles = await Promise.map(newPhotos, async (photoUrl, index) => fetchPhoto(photoUrl, index, identifier), { + const metaFiles = await Promise.map(sourceOriginals, async (photoUrl, index) => fetchPhoto(photoUrl, index, label), { concurrency: 10, }).filter(photo => photo); - const [uniquePhotos, primaryPhoto] = await Promise.all([ - filterHashDuplicates(metaFiles, [domain], roles, identifier), - primaryRole - ? await knex('media') - .where('domain', domain) - .where('target_id', targetId) - .where('role', primaryRole) - .first() - : null, - ]); + const [hashDuplicates, hashOriginals] = await findDuplicates(metaFiles, 'hash', 'hash', label); - if (primaryRole && !primaryPhoto) { - console.log(`Setting first photo as ${primaryRole} for ${identifier}`); - - uniquePhotos[0].role = primaryRole; - } - - const savedPhotos = await savePhotos(uniquePhotos, { + const savedPhotos = await savePhotos(hashOriginals, { domain, role, targetId, @@ -231,59 +193,96 @@ async function storePhotos(photos, { const curatedPhotoEntries = curatePhotoEntries(savedPhotos, domain, role, targetId); - await knex('media').insert(curatedPhotoEntries); + const newPhotos = await knex('media').insert(curatedPhotoEntries).returning('*'); + const photoEntries = Array.isArray(newPhotos) + ? [...sourceDuplicates, ...hashDuplicates, ...newPhotos] + : [...sourceDuplicates, ...hashDuplicates]; - console.log(`Stored ${newPhotos.length} ${role}s for ${identifier}`); + const photoAssociations = photoEntries + .map(photoEntry => ({ + [`${domain}_id`]: targetId, + media_id: photoEntry.id, + })); + + if (primaryRole) { + // store one photo as a 'primary' photo, such as an avatar or cover + const primaryPhoto = await knex(`${domain}s_${primaryRole}s`) + .where(`${domain}_id`, targetId) + .first(); + + if (primaryPhoto) { + await upsert(`${domain}s_${role}s`, photoAssociations, [`${domain}_id`, 'media_id']); + return; + } + + await Promise.all([ + upsert(`${domain}s_${primaryRole}s`, photoAssociations.slice(0, 1), [`${domain}_id`, 'media_id']), + upsert(`${domain}s_${role}s`, photoAssociations.slice(1), [`${domain}_id`, 'media_id']), + ]); + } + + await upsert(`${domain}s_${role}s`, photoAssociations, [`${domain}_id`, 'media_id']); } async function storeTrailer(trailers, { domain = 'releases', - role = 'trailer', targetId, subpath, -}, identifier) { +}, label) { // support scrapers supplying multiple qualities const trailer = Array.isArray(trailers) ? trailers.find(trailerX => [1080, 720].includes(trailerX.quality)) || trailers[0] : trailers; if (!trailer || !trailer.src) { - console.warn(`No trailer available for ${identifier}`); + console.warn(`No trailer available for ${label}`); return; } - console.log(`Storing trailer for ${identifier}`); + const [sourceDuplicates, sourceOriginals] = await findDuplicates([trailer], 'source', 'src', label); - const { pathname } = new URL(trailer.src); - const mimetype = trailer.type || mime.getType(pathname); + const metaFiles = await Promise.map(sourceOriginals, async (trailerX) => { + console.log('trailer x', trailerX, trailerX.src); - const res = await bhttp.get(trailer.src); - const filepath = path.join('releases', subpath, `trailer${trailer.quality ? `_${trailer.quality}` : ''}.${mime.getExtension(mimetype)}`); + const { pathname } = new URL(trailerX.src); + const mimetype = trailerX.type || mime.getType(pathname); - await Promise.all([ - fs.writeFile(path.join(config.media.path, filepath), res.body), - knex('media').insert({ + const res = await bhttp.get(trailerX.src); + const hash = getHash(res.body); + const filepath = path.join(domain, subpath, `trailer${trailerX.quality ? `_${trailerX.quality}` : ''}.${mime.getExtension(mimetype)}`); + + return { path: filepath, mime: mimetype, - source: trailer.src, - domain, - target_id: targetId, - role, - quality: trailer.quality || null, - }), - ]); -} + source: trailerX.src, + quality: trailerX.quality || null, + hash, + }; + }); -async function findAvatar(actorId, domain = 'actors') { - return knex('media') - .where('domain', domain) - .where('target_id', actorId) - .where('role', 'avatar'); + const [hashDuplicates, hashOriginals] = await findDuplicates(metaFiles, 'hash', null, label); + + console.log('hash dup', hashDuplicates, hashOriginals); + + const newTrailers = await knex('media') + .insert(hashOriginals) + .returning('*'); + + console.log(newTrailers); + + await Promise.all([ + // fs.writeFile(path.join(config.media.path, filepath), res.body), + /* + knex('releases_trailers').insert({ + release_id: targetId, + media_id: mediaEntries[0].id, + }), + */ + ]); } module.exports = { createMediaDirectory, - findAvatar, storePhotos, storeTrailer, }; diff --git a/src/releases.js b/src/releases.js index 1c96f338..c5fcf6d6 100644 --- a/src/releases.js +++ b/src/releases.js @@ -306,6 +306,7 @@ async function storeReleaseAssets(release, releaseId) { targetId: releaseId, subpath, }, identifier), + /* storePhotos(release.covers, { role: 'cover', targetId: releaseId, @@ -315,6 +316,7 @@ async function storeReleaseAssets(release, releaseId) { targetId: releaseId, subpath, }, identifier), + */ ]); } catch (error) { console.log(release.url, error); diff --git a/src/scrape-release.js b/src/scrape-release.js index c18a25b1..d7d0a00d 100644 --- a/src/scrape-release.js +++ b/src/scrape-release.js @@ -57,6 +57,7 @@ async function scrapeRelease(url, release, deep = true, type = 'scene') { if (!deep && argv.save) { // don't store release when called by site scraper + /* const movie = scrapedRelease.movie ? await scrapeRelease(scrapedRelease.movie, null, false, 'movie') : null; @@ -65,6 +66,7 @@ async function scrapeRelease(url, release, deep = true, type = 'scene') { const { releases: [storedMovie] } = await storeReleases([movie]); curatedRelease.parentId = storedMovie.id; } + */ const { releases: [storedRelease] } = await storeReleases([curatedRelease]); diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 26d44810..27f31e38 100644 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -54,13 +54,13 @@ module.exports = { actors: { // ordered by data priority xempire, + julesjordan, brazzers, legalporno, pornhub, freeones, freeonesLegacy, kellymadison, - julesjordan, ddfnetwork, }, }; diff --git a/src/utils/upsert.js b/src/utils/upsert.js index ea139174..e8230697 100644 --- a/src/utils/upsert.js +++ b/src/utils/upsert.js @@ -1,11 +1,21 @@ 'use strict'; -async function upsert(table, items, identifier = 'id', knex) { - const duplicates = await knex(table).whereIn(identifier, items.map(item => item[identifier])); - const duplicatesByIdentifier = duplicates.reduce((acc, item) => ({ ...acc, [item[identifier]]: item }), {}); +const knex = require('../knex'); + +async function upsert(table, items, identifier = ['id'], _knex) { + const identifiers = Array.isArray(identifier) ? identifier : [identifier]; + + const duplicates = await knex(table).whereIn(identifiers, items.map(item => identifiers.map(identifierX => item[identifierX]))); + const duplicatesByIdentifiers = duplicates.reduce((acc, duplicate) => { + const duplicateIdentifier = identifiers.map(identifierX => duplicate[identifierX]).toString(); + + return { ...acc, [duplicateIdentifier]: duplicate }; + }, {}); const { insert, update } = items.reduce((acc, item) => { - if (duplicatesByIdentifier[item[identifier]]) { + const itemIdentifier = identifiers.map(identifierX => item[identifierX]).toString(); + + if (duplicatesByIdentifiers[itemIdentifier]) { acc.update.push(item); return acc; } @@ -23,11 +33,15 @@ async function upsert(table, items, identifier = 'id', knex) { const [inserted, updated] = await Promise.all([ knex(table).returning('*').insert(insert), - knex.transaction(async trx => Promise.all(update.map(item => trx - .where({ [identifier]: item[identifier] }) - .update(item) - .into(table) - .returning('*')))), + knex.transaction(async trx => Promise.all(update.map((item) => { + const clause = identifiers.reduce((acc, identifierX) => ({ ...acc, [identifierX]: item[identifierX] }), {}); + + return trx + .where(clause) + .update(item) + .into(table) + .returning('*'); + }))), ]); return { diff --git a/src/web/plugins/actors.js b/src/web/plugins/actors.js index 4e073cdd..9899dce5 100644 --- a/src/web/plugins/actors.js +++ b/src/web/plugins/actors.js @@ -20,8 +20,6 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({ resolvers: { Actor: { age(parent, _args, _context, _info) { - console.log(parent); - if (!parent.birthdate) return null; return moment().diff(parent.birthdate, 'years');