2019-11-10 03:20:22 +00:00
'use strict' ;
2020-02-01 03:14:08 +00:00
const config = require ( 'config' ) ;
2019-11-17 02:56:45 +00:00
const Promise = require ( 'bluebird' ) ;
2019-11-30 04:55:32 +00:00
const UrlPattern = require ( 'url-pattern' ) ;
2019-12-10 21:35:00 +00:00
const moment = require ( 'moment' ) ;
2019-11-20 03:53:36 +00:00
2020-02-08 01:49:39 +00:00
const logger = require ( './logger' ) ( _ _filename ) ;
2019-11-10 03:20:22 +00:00
const knex = require ( './knex' ) ;
2019-11-17 02:56:45 +00:00
const argv = require ( './argv' ) ;
const scrapers = require ( './scrapers/scrapers' ) ;
2019-11-13 02:14:24 +00:00
const whereOr = require ( './utils/where-or' ) ;
2019-11-29 04:46:06 +00:00
const resolvePlace = require ( './utils/resolve-place' ) ;
2020-01-07 03:23:28 +00:00
const slugify = require ( './utils/slugify' ) ;
2019-12-13 02:28:52 +00:00
const { createMediaDirectory , storePhotos } = require ( './media' ) ;
2019-11-10 03:20:22 +00:00
async function curateActor ( actor ) {
2020-02-09 18:41:39 +00:00
const [ aliases , avatar , photos , social ] = await Promise . all ( [
2019-11-20 03:53:36 +00:00
knex ( 'actors' ) . where ( { alias _for : actor . id } ) ,
2020-02-09 18:41:39 +00:00
knex ( 'actors_avatars' )
. where ( 'actor_id' , actor . id )
. join ( 'media' , 'media.id' , 'actors_avatars.media_id' )
. first ( ) ,
knex ( 'actors_photos' )
. where ( 'actor_id' , actor . id )
. join ( 'media' , 'media.id' , 'actors_photos.media_id' )
2019-11-21 03:24:55 +00:00
. orderBy ( 'index' ) ,
2019-12-31 02:12:52 +00:00
knex ( 'actors_social' )
. where ( 'actor_id' , actor . id )
2019-12-01 04:32:47 +00:00
. orderBy ( 'platform' , 'desc' ) ,
2019-11-20 03:53:36 +00:00
] ) ;
2019-11-10 03:20:22 +00:00
2019-11-29 04:46:06 +00:00
const curatedActor = {
2019-11-10 03:20:22 +00:00
id : actor . id ,
2019-11-19 03:36:15 +00:00
gender : actor . gender ,
2019-11-10 03:20:22 +00:00
name : actor . name ,
description : actor . description ,
birthdate : actor . birthdate && new Date ( actor . birthdate ) ,
country : actor . country _alpha2 ,
2019-11-29 04:46:06 +00:00
origin : ( actor . birth _city || actor . birth _state || actor . birth _country _alpha2 ) ? { } : null ,
residence : ( actor . residence _city || actor . residence _state || actor . residence _country _alpha2 ) ? { } : null ,
2019-11-10 03:20:22 +00:00
ethnicity : actor . ethnicity ,
height : actor . height ,
2019-11-28 04:36:22 +00:00
weight : actor . weight ,
2019-11-21 03:05:32 +00:00
bust : actor . bust ,
waist : actor . waist ,
hip : actor . hip ,
naturalBoobs : actor . natural _boobs ,
2019-11-10 03:20:22 +00:00
aliases : aliases . map ( ( { name } ) => name ) ,
slug : actor . slug ,
2020-02-09 18:41:39 +00:00
avatar ,
photos ,
2019-11-29 04:46:06 +00:00
hasTattoos : actor . has _tattoos ,
hasPiercings : actor . has _piercings ,
tattoos : actor . tattoos ,
piercings : actor . piercings ,
2019-11-27 03:58:38 +00:00
social ,
2019-11-28 04:36:22 +00:00
scrapedAt : actor . scraped _at ,
2019-11-10 03:20:22 +00:00
} ;
2019-11-29 04:46:06 +00:00
2019-12-10 21:35:00 +00:00
if ( curatedActor . birthdate ) {
curatedActor . age = moment ( ) . diff ( curatedActor . birthdate , 'years' ) ;
}
2019-11-29 04:46:06 +00:00
if ( actor . birth _city ) curatedActor . origin . city = actor . birth _city ;
if ( actor . birth _state ) curatedActor . origin . state = actor . birth _state ;
if ( actor . birth _country _alpha2 ) {
curatedActor . origin . country = {
alpha2 : actor . birth _country _alpha2 ,
name : actor . birth _country _name ,
2019-11-30 04:55:32 +00:00
alias : actor . birth _country _alias ,
2019-11-29 04:46:06 +00:00
} ;
}
if ( actor . residence _city ) curatedActor . residence . city = actor . residence _city ;
if ( actor . residence _state ) curatedActor . residence . state = actor . residence _state ;
if ( actor . residence _country _alpha2 ) {
curatedActor . residence . country = {
alpha2 : actor . residence _country _alpha2 ,
name : actor . residence _country _name ,
2019-11-30 04:55:32 +00:00
alias : actor . residence _country _alias ,
2019-11-29 04:46:06 +00:00
} ;
}
return curatedActor ;
2019-11-10 03:20:22 +00:00
}
function curateActors ( releases ) {
return Promise . all ( releases . map ( async release => curateActor ( release ) ) ) ;
}
2019-11-19 03:36:15 +00:00
function curateActorEntry ( actor , scraped , scrapeSuccess ) {
const curatedActor = {
name : actor . name
. split ( ' ' )
. map ( segment => ` ${ segment . charAt ( 0 ) . toUpperCase ( ) } ${ segment . slice ( 1 ) } ` )
. join ( ' ' ) ,
2020-01-07 03:23:28 +00:00
slug : slugify ( actor . name ) ,
2019-11-17 02:56:45 +00:00
birthdate : actor . birthdate ,
description : actor . description ,
gender : actor . gender ,
ethnicity : actor . ethnicity ,
2019-11-21 03:05:32 +00:00
bust : actor . bust ,
waist : actor . waist ,
hip : actor . hip ,
natural _boobs : actor . naturalBoobs ,
2019-11-17 02:56:45 +00:00
height : actor . height ,
2019-11-20 03:53:36 +00:00
weight : actor . weight ,
2019-11-17 02:56:45 +00:00
hair : actor . hair ,
eyes : actor . eyes ,
2019-11-21 03:05:32 +00:00
has _tattoos : actor . hasTattoos ,
has _piercings : actor . hasPiercings ,
2019-11-17 02:56:45 +00:00
tattoos : actor . tattoos ,
piercings : actor . piercings ,
} ;
2019-11-19 03:36:15 +00:00
2019-11-21 03:05:32 +00:00
if ( actor . id ) {
curatedActor . id = actor . id ;
}
2019-11-29 04:46:06 +00:00
if ( actor . birthPlace ) {
curatedActor . birth _city = actor . birthPlace . city ;
curatedActor . birth _state = actor . birthPlace . state ;
curatedActor . birth _country _alpha2 = actor . birthPlace . country ;
}
if ( actor . residencePlace ) {
curatedActor . residence _city = actor . residencePlace . city ;
curatedActor . residence _state = actor . residencePlace . state ;
curatedActor . residence _country _alpha2 = actor . residencePlace . country ;
}
2019-11-19 03:36:15 +00:00
if ( scraped ) {
2019-11-21 03:05:32 +00:00
curatedActor . scraped _at = new Date ( ) ;
curatedActor . scrape _success = scrapeSuccess ;
2019-11-19 03:36:15 +00:00
}
return curatedActor ;
2019-11-17 02:56:45 +00:00
}
2019-11-28 04:36:22 +00:00
function curateSocialEntry ( url , actorId ) {
2019-11-30 04:55:32 +00:00
const platforms = [
2019-12-01 04:32:47 +00:00
// links supplied by PH often look like domain.com/domain.com/username
2019-11-30 04:55:32 +00:00
{
label : 'twitter' ,
pattern : 'http(s)\\://(*)twitter.com/:username(/)(?*)' ,
format : username => ` https://www.twitter.com/ ${ username } ` ,
} ,
2019-12-01 04:32:47 +00:00
{
label : 'youtube' ,
pattern : 'http(s)\\://(*)youtube.com/channel/:username(?*)' ,
format : username => ` https://www.youtube.com/channel/ ${ username } ` ,
} ,
2019-11-30 04:55:32 +00:00
{
label : 'instagram' ,
pattern : 'http(s)\\://(*)instagram.com/:username(/)(?*)' ,
format : username => ` https://www.instagram.com/ ${ username } ` ,
} ,
{
label : 'snapchat' ,
pattern : 'http(s)\\://(*)snapchat.com/add/:username(/)(?*)' ,
format : username => ` https://www.snapchat.com/add/ ${ username } ` ,
} ,
{
label : 'tumblr' ,
pattern : 'http(s)\\://:username.tumblr.com(*)' ,
format : username => ` https:// ${ username } .tumblr.com ` ,
} ,
2019-12-01 04:32:47 +00:00
{
label : 'onlyfans' ,
pattern : 'http(s)\\://(*)onlyfans.com/:username(/)(?*)' ,
format : username => ` https://www.onlyfans.com/ ${ username } ` ,
} ,
2019-11-30 04:55:32 +00:00
{
label : 'fancentro' ,
2019-12-01 04:32:47 +00:00
pattern : 'http(s)\\://(*)fancentro.com/:username(/)(?*)' ,
2019-11-30 04:55:32 +00:00
format : username => ` https://www.fancentro.com/ ${ username } ` ,
} ,
2019-12-01 04:32:47 +00:00
{
label : 'modelhub' ,
pattern : 'http(s)\\://(*)modelhub.com/:username(/)(?*)' ,
format : username => ` https://www.modelhub.com/ ${ username } ` ,
} ,
2019-11-30 04:55:32 +00:00
] ;
const match = platforms . reduce ( ( acc , platform ) => {
if ( acc ) return acc ;
const patternMatch = new UrlPattern ( platform . pattern ) . match ( url ) ;
if ( patternMatch ) {
return {
platform : platform . label ,
original : url ,
username : patternMatch . username ,
url : platform . format ? platform . format ( patternMatch . username ) : url ,
} ;
}
return null ;
} , null ) || { url } ;
2019-11-27 03:58:38 +00:00
return {
2019-11-30 04:55:32 +00:00
url : match . url ,
platform : match . platform ,
2019-12-31 02:12:52 +00:00
actor _id : actorId ,
2019-11-27 03:58:38 +00:00
} ;
}
2019-11-28 04:36:22 +00:00
async function curateSocialEntries ( urls , actorId ) {
2019-11-27 03:58:38 +00:00
if ( ! urls ) {
return [ ] ;
}
2019-12-31 02:12:52 +00:00
const existingSocialLinks = await knex ( 'actors_social' ) . where ( 'actor_id' , actorId ) ;
2019-11-28 04:36:22 +00:00
2019-11-27 03:58:38 +00:00
return urls . reduce ( ( acc , url ) => {
2019-11-28 04:36:22 +00:00
const socialEntry = curateSocialEntry ( url , actorId ) ;
2019-11-27 03:58:38 +00:00
2019-11-30 04:55:32 +00:00
if ( acc . some ( entry => socialEntry . url . toLowerCase ( ) === entry . url . toLowerCase ( ) ) || existingSocialLinks . some ( entry => socialEntry . url . toLowerCase ( ) === entry . url . toLowerCase ( ) ) ) {
2019-11-27 03:58:38 +00:00
// prevent duplicates
return acc ;
}
return [ ... acc , socialEntry ] ;
} , [ ] ) ;
}
2019-12-01 04:32:47 +00:00
async function fetchActors ( queryObject , limit = 100 ) {
2019-11-10 03:20:22 +00:00
const releases = await knex ( 'actors' )
2019-11-17 02:56:45 +00:00
. select (
'actors.*' ,
2019-11-30 04:55:32 +00:00
'birth_countries.alpha2 as birth_country_alpha2' , 'birth_countries.name as birth_country_name' , 'birth_countries.alias as birth_country_alias' ,
'residence_countries.alpha2 as residence_country_alpha2' , 'residence_countries.name as residence_country_name' , 'residence_countries.alias as residence_country_alias' ,
2019-11-17 02:56:45 +00:00
)
. leftJoin ( 'countries as birth_countries' , 'actors.birth_country_alpha2' , 'birth_countries.alpha2' )
. leftJoin ( 'countries as residence_countries' , 'actors.residence_country_alpha2' , 'residence_countries.alpha2' )
2019-11-30 04:55:32 +00:00
. orderBy ( [ 'actors.name' , 'actors.gender' ] )
2019-11-13 02:14:24 +00:00
. where ( builder => whereOr ( queryObject , 'actors' , builder ) )
2019-12-01 04:32:47 +00:00
. limit ( limit ) ;
2019-11-10 03:20:22 +00:00
return curateActors ( releases ) ;
}
2019-11-28 04:36:22 +00:00
async function storeSocialLinks ( urls , actorId ) {
const curatedSocialEntries = await curateSocialEntries ( urls , actorId ) ;
2019-11-27 04:12:51 +00:00
2019-12-31 02:12:52 +00:00
await knex ( 'actors_social' ) . insert ( curatedSocialEntries ) ;
2019-11-27 04:12:51 +00:00
}
2019-11-19 03:36:15 +00:00
async function storeActor ( actor , scraped = false , scrapeSuccess = false ) {
const curatedActor = curateActorEntry ( actor , scraped , scrapeSuccess ) ;
2019-11-17 02:56:45 +00:00
2019-11-27 03:58:38 +00:00
const [ actorEntry ] = await knex ( 'actors' )
2019-11-17 02:56:45 +00:00
. insert ( curatedActor )
. returning ( '*' ) ;
2019-11-28 04:36:22 +00:00
await storeSocialLinks ( actor . social , actorEntry . id ) ;
2019-11-17 02:56:45 +00:00
2020-02-11 03:58:18 +00:00
if ( actor . avatars ) {
await createMediaDirectory ( 'actors' , ` ${ actorEntry . slug } / ` ) ;
await storePhotos ( actor . avatars , {
domain : 'actor' ,
role : 'photo' ,
primaryRole : 'avatar' ,
targetId : actorEntry . id ,
subpath : ` ${ actorEntry . slug } / ` ,
naming : 'timestamp' ,
} , actorEntry . name ) ;
}
2020-01-10 01:43:04 +00:00
logger . info ( ` Added new entry for actor ' ${ actor . name } ' ` ) ;
2019-11-17 02:56:45 +00:00
2019-11-27 03:58:38 +00:00
return actorEntry ;
2019-11-17 02:56:45 +00:00
}
2019-11-20 03:53:36 +00:00
async function updateActor ( actor , scraped = false , scrapeSuccess = false ) {
2019-11-19 03:36:15 +00:00
const curatedActor = curateActorEntry ( actor , scraped , scrapeSuccess ) ;
2019-11-17 02:56:45 +00:00
2019-11-27 03:58:38 +00:00
const [ actorEntry ] = await knex ( 'actors' )
2019-11-20 03:53:36 +00:00
. where ( { id : actor . id } )
2019-11-17 02:56:45 +00:00
. update ( curatedActor )
. returning ( '*' ) ;
2019-11-28 04:36:22 +00:00
await storeSocialLinks ( actor . social , actor . id ) ;
2019-11-27 03:58:38 +00:00
2020-01-10 01:43:04 +00:00
logger . info ( ` Updated entry for actor ' ${ actor . name } ' ` ) ;
2019-11-17 02:56:45 +00:00
2019-11-27 03:58:38 +00:00
return actorEntry ;
2019-11-17 02:56:45 +00:00
}
2019-11-29 04:46:06 +00:00
async function mergeProfiles ( profiles , actor ) {
2020-02-05 00:41:30 +00:00
if ( profiles . filter ( Boolean ) . length === 0 ) {
return null ;
}
2019-11-29 04:46:06 +00:00
const mergedProfile = profiles . reduce ( ( prevProfile , profile ) => {
2019-11-20 03:53:36 +00:00
if ( profile === null ) {
return prevProfile ;
}
return {
2019-11-21 03:05:32 +00:00
id : actor ? actor . id : null ,
2019-11-30 04:55:32 +00:00
name : actor ? actor . name : ( prevProfile . name || profile . name ) ,
2019-11-21 03:05:32 +00:00
description : prevProfile . description || profile . description ,
2019-11-20 03:53:36 +00:00
gender : prevProfile . gender || profile . gender ,
2019-11-27 04:12:51 +00:00
birthdate : Number . isNaN ( Number ( prevProfile . birthdate ) ) ? profile . birthdate : prevProfile . birthdate ,
2019-11-20 03:53:36 +00:00
birthPlace : prevProfile . birthPlace || profile . birthPlace ,
2019-11-21 03:05:32 +00:00
residencePlace : prevProfile . residencePlace || profile . residencePlace ,
2020-01-23 02:52:12 +00:00
nationality : prevProfile . nationality || profile . nationality , // used to derive country when not available
2019-11-20 03:53:36 +00:00
ethnicity : prevProfile . ethnicity || profile . ethnicity ,
2019-11-21 03:05:32 +00:00
bust : prevProfile . bust || profile . bust ,
waist : prevProfile . waist || profile . waist ,
hip : prevProfile . hip || profile . hip ,
2019-11-29 04:46:06 +00:00
naturalBoobs : prevProfile . naturalBoobs === undefined ? profile . naturalBoobs : prevProfile . naturalBoobs ,
2019-11-20 03:53:36 +00:00
height : prevProfile . height || profile . height ,
weight : prevProfile . weight || profile . weight ,
hair : prevProfile . hair || profile . hair ,
eyes : prevProfile . eyes || profile . eyes ,
2019-11-29 04:46:06 +00:00
hasPiercings : prevProfile . hasPiercings === undefined ? profile . hasPiercings : prevProfile . hasPiercings ,
hasTattoos : prevProfile . hasTattoos === undefined ? profile . hasTattoos : prevProfile . hasTattoos ,
2019-11-20 03:53:36 +00:00
piercings : prevProfile . piercings || profile . piercings ,
tattoos : prevProfile . tattoos || profile . tattoos ,
social : prevProfile . social . concat ( profile . social || [ ] ) ,
2020-01-22 22:17:39 +00:00
avatars : prevProfile . avatars . concat ( profile . avatar ? [ profile . avatar ] : [ ] ) , // don't flatten fallbacks
2020-01-31 00:55:55 +00:00
releases : prevProfile . releases . concat ( profile . releases ? profile . releases : [ ] ) , // don't flatten fallbacks
2019-11-20 03:53:36 +00:00
} ;
} , {
social : [ ] ,
avatars : [ ] ,
2020-01-31 00:55:55 +00:00
releases : [ ] ,
2019-11-20 03:53:36 +00:00
} ) ;
2019-11-29 04:46:06 +00:00
const [ birthPlace , residencePlace ] = await Promise . all ( [
resolvePlace ( mergedProfile . birthPlace ) ,
resolvePlace ( mergedProfile . residencePlace ) ,
] ) ;
mergedProfile . birthPlace = birthPlace ;
mergedProfile . residencePlace = residencePlace ;
2020-01-23 02:52:12 +00:00
if ( ! mergedProfile . birthPlace && mergedProfile . nationality ) {
const country = await knex ( 'countries' )
. where ( 'nationality' , 'ilike' , ` % ${ mergedProfile . nationality } % ` )
. orderBy ( 'priority' , 'desc' )
. first ( ) ;
mergedProfile . birthPlace = {
country : country . alpha2 ,
} ;
}
2019-11-29 04:46:06 +00:00
return mergedProfile ;
2019-11-20 03:53:36 +00:00
}
2019-11-17 02:56:45 +00:00
async function scrapeActors ( actorNames ) {
2020-01-31 00:55:55 +00:00
return Promise . map ( actorNames || argv . actors , async ( actorName ) => {
2019-11-21 10:43:07 +00:00
try {
2020-01-07 03:23:28 +00:00
const actorSlug = slugify ( actorName ) ;
2019-11-21 10:43:07 +00:00
const actorEntry = await knex ( 'actors' ) . where ( { slug : actorSlug } ) . first ( ) ;
2020-02-01 03:14:08 +00:00
const sources = argv . sources || config . profiles || Object . keys ( scrapers . actors ) ;
2020-02-12 00:54:54 +00:00
const finalSources = argv . withReleases ? sources . flat ( ) : sources ; // ignore race-to-success grouping when scenes are requested
const profiles = await Promise . map ( finalSources , async ( source ) => {
2020-02-01 03:14:08 +00:00
// const [scraperSlug, scraper] = source;
const profileScrapers = [ ] . concat ( source ) . map ( slug => ( { scraperSlug : slug , scraper : scrapers . actors [ slug ] } ) ) ;
2019-11-30 04:55:32 +00:00
2020-01-25 00:46:58 +00:00
try {
2020-02-02 04:14:58 +00:00
return await profileScrapers . reduce ( async ( outcome , { scraper , scraperSlug } ) => outcome . catch ( async ( ) => {
2020-02-06 22:15:28 +00:00
if ( ! scraper ) {
logger . warn ( ` No profile profile scraper available for ${ scraperSlug } ` ) ;
throw Object . assign ( new Error ( ` No profile scraper available for ${ scraperSlug } ` ) ) ;
}
2020-02-01 03:14:08 +00:00
logger . verbose ( ` Searching ' ${ actorName } ' on ${ scraperSlug } ` ) ;
const profile = await scraper . fetchProfile ( actorEntry ? actorEntry . name : actorName , scraperSlug ) ;
if ( profile ) {
logger . verbose ( ` Found profile for ' ${ actorName } ' on ${ scraperSlug } ` ) ;
2020-01-25 00:46:58 +00:00
2020-02-01 03:14:08 +00:00
return {
... profile ,
name : actorName ,
scraper : scraperSlug ,
} ;
}
logger . verbose ( ` No profile for ' ${ actorName } ' available on ${ scraperSlug } ` ) ;
2020-02-03 01:57:53 +00:00
throw Object . assign ( new Error ( ` Profile for ${ actorName } not available on ${ scraperSlug } ` ) , { warn : false } ) ;
2020-02-01 03:14:08 +00:00
} ) , Promise . reject ( new Error ( ) ) ) ;
2020-01-25 00:46:58 +00:00
} catch ( error ) {
2020-02-03 01:57:53 +00:00
if ( error . warn !== false ) {
logger . warn ( ` Error in scraper ${ source } : ${ error . message } ` ) ;
2020-02-06 22:15:28 +00:00
// logger.error(error.stack);
2020-02-03 01:57:53 +00:00
}
2020-01-25 00:46:58 +00:00
}
2020-02-01 03:14:08 +00:00
return null ;
2019-11-29 04:46:06 +00:00
} ) ;
const profile = await mergeProfiles ( profiles , actorEntry ) ;
2019-11-17 02:56:45 +00:00
2020-02-06 22:15:28 +00:00
if ( argv . inspect ) {
console . log ( profile ) ;
2020-02-09 02:09:06 +00:00
logger . info ( ` Found ${ profile . releases . length } releases for ${ actorName } ` ) ;
2020-02-06 22:15:28 +00:00
}
2019-11-21 10:43:07 +00:00
if ( profile === null ) {
2020-01-10 01:43:04 +00:00
logger . warn ( ` Could not find profile for actor ' ${ actorName } ' ` ) ;
2019-11-29 04:46:06 +00:00
2020-02-05 00:41:30 +00:00
if ( argv . save && ! actorEntry ) {
await storeActor ( { name : actorName } , false , false ) ;
2019-11-29 04:46:06 +00:00
}
2019-11-20 03:53:36 +00:00
2020-01-31 00:55:55 +00:00
return null ;
2019-11-21 10:43:07 +00:00
}
2019-11-17 02:56:45 +00:00
2019-11-29 04:46:06 +00:00
if ( argv . save ) {
if ( actorEntry && profile ) {
2019-12-13 02:28:52 +00:00
await createMediaDirectory ( 'actors' , ` ${ actorEntry . slug } / ` ) ;
2019-11-20 03:53:36 +00:00
2019-11-29 04:46:06 +00:00
await Promise . all ( [
updateActor ( profile , true , true ) ,
2019-12-13 02:28:52 +00:00
// storeAvatars(profile, actorEntry),
storePhotos ( profile . avatars , {
2019-12-31 02:12:52 +00:00
domain : 'actor' ,
2019-12-13 02:28:52 +00:00
role : 'photo' ,
primaryRole : 'avatar' ,
targetId : actorEntry . id ,
subpath : ` ${ actorEntry . slug } / ` ,
naming : 'timestamp' ,
} , actorEntry . name ) ,
2019-11-29 04:46:06 +00:00
] ) ;
2019-11-20 03:53:36 +00:00
2020-01-31 00:55:55 +00:00
return profile ;
2019-11-29 04:46:06 +00:00
}
2019-11-19 03:36:15 +00:00
2020-02-11 03:58:18 +00:00
await storeActor ( profile , true , true ) ;
2019-11-29 04:46:06 +00:00
}
2020-01-31 00:55:55 +00:00
return profile ;
2019-11-21 10:43:07 +00:00
} catch ( error ) {
2020-02-02 04:14:58 +00:00
console . log ( error ) ;
2020-01-24 22:36:06 +00:00
logger . warn ( ` ${ actorName } : ${ error } ` ) ;
2020-01-31 00:55:55 +00:00
return null ;
2019-11-21 10:43:07 +00:00
}
2019-11-17 02:56:45 +00:00
} , {
2019-11-21 03:05:32 +00:00
concurrency : 3 ,
2019-11-17 02:56:45 +00:00
} ) ;
}
2019-11-19 03:36:15 +00:00
async function scrapeBasicActors ( ) {
const basicActors = await knex ( 'actors' ) . where ( 'scraped_at' , null ) ;
return scrapeActors ( basicActors . map ( actor => actor . name ) ) ;
}
2019-12-04 20:58:08 +00:00
async function associateActors ( mappedActors , releases ) {
const [ existingActorEntries , existingAssociationEntries ] = await Promise . all ( [
2020-01-07 03:23:28 +00:00
knex ( 'actors' )
2020-02-09 22:25:54 +00:00
. whereIn ( 'name' , Object . values ( mappedActors ) . map ( actor => actor . name ) )
. orWhereIn ( 'slug' , Object . keys ( mappedActors ) ) ,
2019-12-19 03:42:50 +00:00
knex ( 'releases_actors' ) . whereIn ( 'release_id' , releases . map ( release => release . id ) ) ,
2019-11-19 03:36:15 +00:00
] ) ;
2019-11-11 02:20:00 +00:00
2020-02-09 22:25:54 +00:00
const associations = await Promise . map ( Object . entries ( mappedActors ) , async ( [ actorSlug , actor ] ) => {
2020-01-07 03:23:28 +00:00
try {
2020-02-09 22:25:54 +00:00
const actorEntry = existingActorEntries . find ( actorX => actorX . slug === actorSlug )
|| await storeActor ( actor ) ;
2020-01-07 03:23:28 +00:00
2020-02-05 22:57:55 +00:00
// if a scene
2020-02-09 22:25:54 +00:00
return Array . from ( actor . releaseIds )
2020-01-07 03:23:28 +00:00
. map ( releaseId => ( {
release _id : releaseId ,
actor _id : actorEntry . id ,
} ) )
. filter ( association => ! existingAssociationEntries
// remove associations already in database
. some ( associationEntry => associationEntry . actor _id === association . actor _id
&& associationEntry . release _id === association . release _id ) ) ;
} catch ( error ) {
2020-02-09 22:25:54 +00:00
logger . error ( actor . name , error ) ;
2020-01-07 03:23:28 +00:00
return null ;
}
2019-12-04 20:58:08 +00:00
} ) ;
2019-11-11 02:20:00 +00:00
2020-01-27 00:41:04 +00:00
await knex ( 'releases_actors' ) . insert ( associations . filter ( association => association ) . flat ( ) ) ;
2020-02-03 23:18:53 +00:00
// basic actor scraping is failure prone, don't run together with actor association
2020-01-27 00:41:04 +00:00
// await scrapebasicactors(),
2019-11-11 02:20:00 +00:00
}
2019-11-10 03:20:22 +00:00
module . exports = {
2019-11-19 03:36:15 +00:00
associateActors ,
2019-11-10 03:20:22 +00:00
fetchActors ,
2019-11-17 02:56:45 +00:00
scrapeActors ,
2019-11-19 03:36:15 +00:00
scrapeBasicActors ,
2019-11-10 03:20:22 +00:00
} ;