2020-03-16 03:10:52 +00:00
'use strict' ;
const config = require ( 'config' ) ;
2020-03-21 01:48:24 +00:00
const logger = require ( './logger' ) ( _ _filename ) ;
2020-03-16 03:10:52 +00:00
const knex = require ( './knex' ) ;
const slugify = require ( './utils/slugify' ) ;
2020-03-24 02:48:24 +00:00
const { associateActors } = require ( './actors' ) ;
2020-03-29 02:00:46 +00:00
const { associateReleaseTags } = require ( './tags' ) ;
2020-03-30 01:01:08 +00:00
const { curateSite } = require ( './sites' ) ;
2020-03-29 02:00:46 +00:00
const { associateReleaseMedia } = require ( './media' ) ;
2020-03-16 03:10:52 +00:00
function curateReleaseEntry ( release , batchId , existingRelease ) {
2020-03-30 01:01:08 +00:00
const slug = slugify ( release . title || release . actors ? . join ( '-' ) || null , '-' , {
2020-03-16 03:10:52 +00:00
encode : true ,
limit : config . titleSlugLength ,
} ) ;
const curatedRelease = {
title : release . title ,
entry _id : release . entryId || null ,
site _id : release . site . id ,
shoot _id : release . shootId || null ,
studio _id : release . studio ? . id || null ,
url : release . url ,
date : release . date ,
slug ,
description : release . description ,
duration : release . duration ,
type : release . type ,
// director: release.director,
// likes: release.rating && release.rating.likes,
// dislikes: release.rating && release.rating.dislikes,
// rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
deep : typeof release . deep === 'boolean' ? release . deep : false ,
deep _url : release . deepUrl ,
updated _batch _id : batchId ,
} ;
2020-03-22 02:50:24 +00:00
if ( ! existingRelease && ! release . id ) {
2020-03-16 03:10:52 +00:00
curatedRelease . created _batch _id = batchId ;
}
return curatedRelease ;
}
2020-03-16 23:58:03 +00:00
async function attachChannelSites ( releases ) {
const releasesWithoutSite = releases . filter ( release => release . channel && ( ! release . site || release . site . isFallback ) ) ;
2020-03-16 03:10:52 +00:00
2020-03-30 01:01:08 +00:00
const channelSites = await knex ( 'sites' )
. leftJoin ( 'networks' , 'networks.id' , 'sites.network_id' )
. select ( 'sites.*' , 'networks.name as network_name' , 'networks.slug as network_slug' , 'networks.url as network_url' , 'networks.parameters as network_parameters' , 'networks.description as network_description' )
. whereIn ( 'sites.slug' , releasesWithoutSite . map ( release => release . channel ) ) ;
2020-03-16 23:58:03 +00:00
const channelSitesBySlug = channelSites . reduce ( ( acc , site ) => ( { ... acc , [ site . slug ] : site } ) , { } ) ;
2020-03-30 01:01:08 +00:00
const releasesWithChannelSite = await Promise . all ( releases
. map ( async ( release ) => {
2020-03-16 23:58:03 +00:00
if ( release . site && ! release . site . isFallback ) {
return release ;
}
if ( release . channel && channelSitesBySlug [ release . channel ] ) {
2020-03-30 01:01:08 +00:00
const curatedSite = await curateSite ( channelSitesBySlug [ release . channel ] ) ;
2020-03-16 23:58:03 +00:00
return {
... release ,
2020-03-30 01:01:08 +00:00
site : curatedSite ,
2020-03-16 23:58:03 +00:00
} ;
}
2020-03-22 02:50:24 +00:00
logger . error ( ` Unable to match channel ' ${ release . channel ? . slug || release . channel } ' from generic URL ${ release . url } ` ) ;
2020-03-16 23:58:03 +00:00
return null ;
2020-03-30 01:01:08 +00:00
} ) ) ;
2020-03-16 23:58:03 +00:00
2020-03-30 01:01:08 +00:00
return releasesWithChannelSite . filter ( Boolean ) ;
2020-03-16 23:58:03 +00:00
}
async function attachStudios ( releases ) {
const studioSlugs = releases . map ( release => release . studio ) . filter ( Boolean ) ;
const studios = await knex ( 'studios' ) . whereIn ( 'slug' , studioSlugs ) ;
const studioBySlug = studios . reduce ( ( acc , studio ) => ( { ... acc , [ studio . slug ] : studio } ) , { } ) ;
const releasesWithStudio = releases . map ( ( release ) => {
if ( release . studio && studioBySlug [ release . studio ] ) {
return {
... release ,
studio : release . studio ,
} ;
}
if ( release . studio ) {
logger . warn ( ` Unable to match studio ' ${ release . studio } ' for ${ release . url } ` ) ;
}
return release ;
} ) ;
return releasesWithStudio ;
2020-03-16 03:10:52 +00:00
}
2020-03-22 02:50:24 +00:00
function attachReleaseIds ( releases , storedReleases ) {
const storedReleaseIdsBySiteIdAndEntryId = storedReleases . reduce ( ( acc , release ) => {
if ( ! acc [ release . site _id ] ) acc [ release . site _id ] = { } ;
acc [ release . site _id ] [ release . entry _id ] = release . id ;
return acc ;
} , { } ) ;
const releasesWithId = releases . map ( release => ( {
... release ,
id : storedReleaseIdsBySiteIdAndEntryId [ release . site . id ] [ release . entryId ] ,
} ) ) ;
return releasesWithId ;
}
2020-03-28 03:37:04 +00:00
function filterInternalDuplicateReleases ( releases ) {
const releasesBySiteIdAndEntryId = releases . reduce ( ( acc , release ) => {
if ( ! acc [ release . site . id ] ) {
acc [ release . site . id ] = { } ;
}
acc [ release . site . id ] [ release . entryId ] = release ;
return acc ;
} , { } ) ;
return Object . values ( releasesBySiteIdAndEntryId )
. map ( siteReleases => Object . values ( siteReleases ) )
. flat ( ) ;
}
async function filterDuplicateReleases ( releases ) {
const internalUniqueReleases = filterInternalDuplicateReleases ( releases ) ;
2020-03-16 03:10:52 +00:00
const duplicateReleaseEntries = await knex ( 'releases' )
2020-03-28 03:37:04 +00:00
. whereIn ( [ 'entry_id' , 'site_id' ] , internalUniqueReleases . map ( release => [ release . entryId , release . site . id ] ) ) ;
2020-03-16 03:10:52 +00:00
2020-03-22 02:50:24 +00:00
const duplicateReleasesBySiteIdAndEntryId = duplicateReleaseEntries . reduce ( ( acc , release ) => {
if ( ! acc [ release . site _id ] ) acc [ release . site _id ] = { } ;
acc [ release . site _id ] [ release . entry _id ] = true ;
2020-03-16 03:10:52 +00:00
2020-03-22 02:50:24 +00:00
return acc ;
} , { } ) ;
2020-03-28 03:37:04 +00:00
const duplicateReleases = internalUniqueReleases . filter ( release => duplicateReleasesBySiteIdAndEntryId [ release . site . id ] ? . [ release . entryId ] ) ;
const uniqueReleases = internalUniqueReleases . filter ( release => ! duplicateReleasesBySiteIdAndEntryId [ release . site . id ] ? . [ release . entryId ] ) ;
2020-03-22 02:50:24 +00:00
return {
uniqueReleases ,
duplicateReleases ,
duplicateReleaseEntries ,
} ;
2020-03-16 03:10:52 +00:00
}
2020-03-29 02:00:46 +00:00
async function updateReleasesSearch ( releaseIds ) {
logger . info ( ` Updating search documents for ${ releaseIds ? releaseIds . length : 'all' } releases ` ) ;
const documents = await knex . raw ( `
SELECT
releases . id AS release _id ,
TO _TSVECTOR (
'traxxx' ,
2020-03-29 21:42:41 +00:00
COALESCE ( releases . title , '' ) || ' ' ||
2020-03-29 02:00:46 +00:00
networks . name || ' ' ||
networks . slug || ' ' ||
networks . url || ' ' ||
sites . name || ' ' ||
sites . slug || ' ' ||
COALESCE ( sites . url , '' ) || ' ' ||
COALESCE ( sites . alias , '' ) || ' ' ||
COALESCE ( releases . shoot _id , '' ) || ' ' ||
COALESCE ( TO _CHAR ( releases . date , 'YYYY YY MM FMMM FMmonth mon DD FMDD' ) , '' ) || ' ' ||
STRING _AGG ( COALESCE ( actors . name , '' ) , ' ' ) || ' ' ||
STRING _AGG ( COALESCE ( tags . name , '' ) , ' ' ) || ' ' ||
STRING _AGG ( COALESCE ( tags _aliases . name , '' ) , ' ' )
) as document
FROM releases
LEFT JOIN sites ON releases . site _id = sites . id
LEFT JOIN networks ON sites . network _id = networks . id
LEFT JOIN releases _actors AS local _actors ON local _actors . release _id = releases . id
LEFT JOIN releases _tags AS local _tags ON local _tags . release _id = releases . id
LEFT JOIN actors ON local _actors . actor _id = actors . id
LEFT JOIN tags ON local _tags . tag _id = tags . id AND tags . priority >= 7
LEFT JOIN tags as tags _aliases ON local _tags . tag _id = tags _aliases . alias _for AND tags _aliases . secondary = true
$ { releaseIds ? 'WHERE releases.id = ANY(?)' : '' }
GROUP BY releases . id , sites . name , sites . slug , sites . alias , sites . url , networks . name , networks . slug , networks . url ;
` , releaseIds && [releaseIds]);
if ( documents . rows ? . length > 0 ) {
const query = knex ( 'releases_search' ) . insert ( documents . rows ) . toString ( ) ;
await knex . raw ( ` ${ query } ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document ` ) ;
}
}
2020-03-16 03:10:52 +00:00
async function storeReleases ( releases ) {
const [ batchId ] = await knex ( 'batches' ) . insert ( { comment : null } ) . returning ( 'id' ) ;
2020-03-16 23:58:03 +00:00
const releasesWithSites = await attachChannelSites ( releases ) ;
const releasesWithStudios = await attachStudios ( releasesWithSites ) ;
// uniqueness is site ID + entry ID, filter uniques after adding sites
2020-03-28 03:37:04 +00:00
const { uniqueReleases , duplicateReleases , duplicateReleaseEntries } = await filterDuplicateReleases ( releasesWithStudios ) ;
2020-03-16 03:10:52 +00:00
2020-03-22 02:50:24 +00:00
const curatedNewReleaseEntries = uniqueReleases . map ( release => curateReleaseEntry ( release , batchId ) ) ;
2020-03-21 01:48:24 +00:00
2020-03-22 02:50:24 +00:00
const storedReleases = await knex ( 'releases' ) . insert ( curatedNewReleaseEntries ) . returning ( '*' ) ;
// TODO: update duplicate releases
2020-03-16 03:10:52 +00:00
2020-03-22 02:50:24 +00:00
const storedReleaseEntries = Array . isArray ( storedReleases ) ? storedReleases : [ ] ;
2020-03-28 03:37:04 +00:00
const releasesWithId = attachReleaseIds ( [ ] . concat ( uniqueReleases , duplicateReleases ) , [ ] . concat ( storedReleaseEntries , duplicateReleaseEntries ) ) ;
2020-03-22 02:50:24 +00:00
2020-03-24 02:48:24 +00:00
await Promise . all ( [
associateActors ( releasesWithId ) ,
2020-03-29 02:00:46 +00:00
associateReleaseTags ( releasesWithId ) ,
associateReleaseMedia ( releasesWithId ) ,
2020-03-24 02:48:24 +00:00
] ) ;
2020-03-21 01:48:24 +00:00
2020-03-28 03:37:04 +00:00
logger . info ( ` Stored ${ storedReleaseEntries . length } releases ` ) ;
2020-03-29 02:00:46 +00:00
await updateReleasesSearch ( releasesWithId . map ( release => release . id ) ) ;
2020-03-22 02:50:24 +00:00
return releasesWithId ;
2020-03-16 03:10:52 +00:00
}
module . exports = {
storeReleases ,
2020-03-29 02:00:46 +00:00
updateReleasesSearch ,
2020-03-16 03:10:52 +00:00
} ;