2019-11-10 03:20:22 +00:00
'use strict' ;
2019-11-17 02:56:45 +00:00
const Promise = require ( 'bluebird' ) ;
2019-11-30 04:55:32 +00:00
const UrlPattern = require ( 'url-pattern' ) ;
2019-11-20 03:53:36 +00:00
2019-11-10 03:20:22 +00:00
const knex = require ( './knex' ) ;
2019-11-17 02:56:45 +00:00
const argv = require ( './argv' ) ;
const scrapers = require ( './scrapers/scrapers' ) ;
2019-11-13 02:14:24 +00:00
const whereOr = require ( './utils/where-or' ) ;
2019-11-29 04:46:06 +00:00
const resolvePlace = require ( './utils/resolve-place' ) ;
2019-11-20 03:53:36 +00:00
const { createActorMediaDirectory , storeAvatars } = require ( './media' ) ;
2019-11-10 03:20:22 +00:00
async function curateActor ( actor ) {
2019-11-28 04:36:22 +00:00
const [ aliases , photos , social ] = await Promise . all ( [
2019-11-20 03:53:36 +00:00
knex ( 'actors' ) . where ( { alias _for : actor . id } ) ,
2019-11-21 03:24:55 +00:00
knex ( 'media' )
. where ( { domain : 'actors' , target _id : actor . id } )
. orderBy ( 'index' ) ,
2019-11-27 03:58:38 +00:00
knex ( 'social' )
2019-12-01 04:32:47 +00:00
. where ( { domain : 'actors' , target _id : actor . id } )
. orderBy ( 'platform' , 'desc' ) ,
2019-11-20 03:53:36 +00:00
] ) ;
2019-11-10 03:20:22 +00:00
2019-11-29 04:46:06 +00:00
const curatedActor = {
2019-11-10 03:20:22 +00:00
id : actor . id ,
2019-11-19 03:36:15 +00:00
gender : actor . gender ,
2019-11-10 03:20:22 +00:00
name : actor . name ,
description : actor . description ,
birthdate : actor . birthdate && new Date ( actor . birthdate ) ,
country : actor . country _alpha2 ,
2019-11-29 04:46:06 +00:00
origin : ( actor . birth _city || actor . birth _state || actor . birth _country _alpha2 ) ? { } : null ,
residence : ( actor . residence _city || actor . residence _state || actor . residence _country _alpha2 ) ? { } : null ,
2019-11-10 03:20:22 +00:00
ethnicity : actor . ethnicity ,
height : actor . height ,
2019-11-28 04:36:22 +00:00
weight : actor . weight ,
2019-11-21 03:05:32 +00:00
bust : actor . bust ,
waist : actor . waist ,
hip : actor . hip ,
naturalBoobs : actor . natural _boobs ,
2019-11-10 03:20:22 +00:00
aliases : aliases . map ( ( { name } ) => name ) ,
slug : actor . slug ,
2019-11-28 04:36:22 +00:00
avatar : photos . find ( photo => photo . role === 'avatar' ) ,
photos : photos . filter ( photo => photo . role === 'photo' ) ,
2019-11-29 04:46:06 +00:00
hasTattoos : actor . has _tattoos ,
hasPiercings : actor . has _piercings ,
tattoos : actor . tattoos ,
piercings : actor . piercings ,
2019-11-27 03:58:38 +00:00
social ,
2019-11-28 04:36:22 +00:00
scrapedAt : actor . scraped _at ,
2019-11-10 03:20:22 +00:00
} ;
2019-11-29 04:46:06 +00:00
if ( actor . birth _city ) curatedActor . origin . city = actor . birth _city ;
if ( actor . birth _state ) curatedActor . origin . state = actor . birth _state ;
if ( actor . birth _country _alpha2 ) {
curatedActor . origin . country = {
alpha2 : actor . birth _country _alpha2 ,
name : actor . birth _country _name ,
2019-11-30 04:55:32 +00:00
alias : actor . birth _country _alias ,
2019-11-29 04:46:06 +00:00
} ;
}
if ( actor . residence _city ) curatedActor . residence . city = actor . residence _city ;
if ( actor . residence _state ) curatedActor . residence . state = actor . residence _state ;
if ( actor . residence _country _alpha2 ) {
curatedActor . residence . country = {
alpha2 : actor . residence _country _alpha2 ,
name : actor . residence _country _name ,
2019-11-30 04:55:32 +00:00
alias : actor . residence _country _alias ,
2019-11-29 04:46:06 +00:00
} ;
}
return curatedActor ;
2019-11-10 03:20:22 +00:00
}
function curateActors ( releases ) {
return Promise . all ( releases . map ( async release => curateActor ( release ) ) ) ;
}
2019-11-19 03:36:15 +00:00
function curateActorEntry ( actor , scraped , scrapeSuccess ) {
const curatedActor = {
name : actor . name
. split ( ' ' )
. map ( segment => ` ${ segment . charAt ( 0 ) . toUpperCase ( ) } ${ segment . slice ( 1 ) } ` )
. join ( ' ' ) ,
2019-11-17 02:56:45 +00:00
slug : actor . name . toLowerCase ( ) . replace ( /\s+/g , '-' ) ,
birthdate : actor . birthdate ,
description : actor . description ,
gender : actor . gender ,
ethnicity : actor . ethnicity ,
2019-11-21 03:05:32 +00:00
bust : actor . bust ,
waist : actor . waist ,
hip : actor . hip ,
natural _boobs : actor . naturalBoobs ,
2019-11-17 02:56:45 +00:00
height : actor . height ,
2019-11-20 03:53:36 +00:00
weight : actor . weight ,
2019-11-17 02:56:45 +00:00
hair : actor . hair ,
eyes : actor . eyes ,
2019-11-21 03:05:32 +00:00
has _tattoos : actor . hasTattoos ,
has _piercings : actor . hasPiercings ,
2019-11-17 02:56:45 +00:00
tattoos : actor . tattoos ,
piercings : actor . piercings ,
} ;
2019-11-19 03:36:15 +00:00
2019-11-21 03:05:32 +00:00
if ( actor . id ) {
curatedActor . id = actor . id ;
}
2019-11-29 04:46:06 +00:00
if ( actor . birthPlace ) {
curatedActor . birth _city = actor . birthPlace . city ;
curatedActor . birth _state = actor . birthPlace . state ;
curatedActor . birth _country _alpha2 = actor . birthPlace . country ;
}
if ( actor . residencePlace ) {
curatedActor . residence _city = actor . residencePlace . city ;
curatedActor . residence _state = actor . residencePlace . state ;
curatedActor . residence _country _alpha2 = actor . residencePlace . country ;
}
2019-11-19 03:36:15 +00:00
if ( scraped ) {
2019-11-21 03:05:32 +00:00
curatedActor . scraped _at = new Date ( ) ;
curatedActor . scrape _success = scrapeSuccess ;
2019-11-19 03:36:15 +00:00
}
return curatedActor ;
2019-11-17 02:56:45 +00:00
}
2019-11-28 04:36:22 +00:00
function curateSocialEntry ( url , actorId ) {
2019-11-30 04:55:32 +00:00
const platforms = [
2019-12-01 04:32:47 +00:00
// links supplied by PH often look like domain.com/domain.com/username
2019-11-30 04:55:32 +00:00
{
label : 'twitter' ,
pattern : 'http(s)\\://(*)twitter.com/:username(/)(?*)' ,
format : username => ` https://www.twitter.com/ ${ username } ` ,
} ,
2019-12-01 04:32:47 +00:00
{
label : 'youtube' ,
pattern : 'http(s)\\://(*)youtube.com/channel/:username(?*)' ,
format : username => ` https://www.youtube.com/channel/ ${ username } ` ,
} ,
2019-11-30 04:55:32 +00:00
{
label : 'instagram' ,
pattern : 'http(s)\\://(*)instagram.com/:username(/)(?*)' ,
format : username => ` https://www.instagram.com/ ${ username } ` ,
} ,
{
label : 'snapchat' ,
pattern : 'http(s)\\://(*)snapchat.com/add/:username(/)(?*)' ,
format : username => ` https://www.snapchat.com/add/ ${ username } ` ,
} ,
{
label : 'tumblr' ,
pattern : 'http(s)\\://:username.tumblr.com(*)' ,
format : username => ` https:// ${ username } .tumblr.com ` ,
} ,
2019-12-01 04:32:47 +00:00
{
label : 'onlyfans' ,
pattern : 'http(s)\\://(*)onlyfans.com/:username(/)(?*)' ,
format : username => ` https://www.onlyfans.com/ ${ username } ` ,
} ,
2019-11-30 04:55:32 +00:00
{
label : 'fancentro' ,
2019-12-01 04:32:47 +00:00
pattern : 'http(s)\\://(*)fancentro.com/:username(/)(?*)' ,
2019-11-30 04:55:32 +00:00
format : username => ` https://www.fancentro.com/ ${ username } ` ,
} ,
2019-12-01 04:32:47 +00:00
{
label : 'modelhub' ,
pattern : 'http(s)\\://(*)modelhub.com/:username(/)(?*)' ,
format : username => ` https://www.modelhub.com/ ${ username } ` ,
} ,
2019-11-30 04:55:32 +00:00
] ;
const match = platforms . reduce ( ( acc , platform ) => {
if ( acc ) return acc ;
const patternMatch = new UrlPattern ( platform . pattern ) . match ( url ) ;
if ( patternMatch ) {
return {
platform : platform . label ,
original : url ,
username : patternMatch . username ,
url : platform . format ? platform . format ( patternMatch . username ) : url ,
} ;
}
return null ;
} , null ) || { url } ;
2019-11-27 03:58:38 +00:00
return {
2019-11-30 04:55:32 +00:00
url : match . url ,
platform : match . platform ,
2019-11-27 03:58:38 +00:00
domain : 'actors' ,
2019-11-28 04:36:22 +00:00
target _id : actorId ,
2019-11-27 03:58:38 +00:00
} ;
}
2019-11-28 04:36:22 +00:00
async function curateSocialEntries ( urls , actorId ) {
2019-11-27 03:58:38 +00:00
if ( ! urls ) {
return [ ] ;
}
2019-11-28 04:36:22 +00:00
const existingSocialLinks = await knex ( 'social' ) . where ( {
domain : 'actors' ,
target _id : actorId ,
} ) ;
2019-11-27 03:58:38 +00:00
return urls . reduce ( ( acc , url ) => {
2019-11-28 04:36:22 +00:00
const socialEntry = curateSocialEntry ( url , actorId ) ;
2019-11-27 03:58:38 +00:00
2019-11-30 04:55:32 +00:00
if ( acc . some ( entry => socialEntry . url . toLowerCase ( ) === entry . url . toLowerCase ( ) ) || existingSocialLinks . some ( entry => socialEntry . url . toLowerCase ( ) === entry . url . toLowerCase ( ) ) ) {
2019-11-27 03:58:38 +00:00
// prevent duplicates
return acc ;
}
return [ ... acc , socialEntry ] ;
} , [ ] ) ;
}
2019-12-01 04:32:47 +00:00
async function fetchActors ( queryObject , limit = 100 ) {
2019-11-10 03:20:22 +00:00
const releases = await knex ( 'actors' )
2019-11-17 02:56:45 +00:00
. select (
'actors.*' ,
2019-11-30 04:55:32 +00:00
'birth_countries.alpha2 as birth_country_alpha2' , 'birth_countries.name as birth_country_name' , 'birth_countries.alias as birth_country_alias' ,
'residence_countries.alpha2 as residence_country_alpha2' , 'residence_countries.name as residence_country_name' , 'residence_countries.alias as residence_country_alias' ,
2019-11-17 02:56:45 +00:00
)
. leftJoin ( 'countries as birth_countries' , 'actors.birth_country_alpha2' , 'birth_countries.alpha2' )
. leftJoin ( 'countries as residence_countries' , 'actors.residence_country_alpha2' , 'residence_countries.alpha2' )
2019-11-30 04:55:32 +00:00
. orderBy ( [ 'actors.name' , 'actors.gender' ] )
2019-11-13 02:14:24 +00:00
. where ( builder => whereOr ( queryObject , 'actors' , builder ) )
2019-12-01 04:32:47 +00:00
. limit ( limit ) ;
2019-11-10 03:20:22 +00:00
return curateActors ( releases ) ;
}
2019-11-28 04:36:22 +00:00
async function storeSocialLinks ( urls , actorId ) {
const curatedSocialEntries = await curateSocialEntries ( urls , actorId ) ;
2019-11-27 04:12:51 +00:00
2019-11-28 04:36:22 +00:00
await knex ( 'social' ) . insert ( curatedSocialEntries ) ;
2019-11-27 04:12:51 +00:00
}
2019-11-19 03:36:15 +00:00
async function storeActor ( actor , scraped = false , scrapeSuccess = false ) {
const curatedActor = curateActorEntry ( actor , scraped , scrapeSuccess ) ;
2019-11-17 02:56:45 +00:00
2019-11-27 03:58:38 +00:00
const [ actorEntry ] = await knex ( 'actors' )
2019-11-17 02:56:45 +00:00
. insert ( curatedActor )
. returning ( '*' ) ;
2019-11-28 04:36:22 +00:00
await storeSocialLinks ( actor . social , actorEntry . id ) ;
2019-11-17 02:56:45 +00:00
2019-11-27 03:58:38 +00:00
console . log ( ` Added new entry for actor ' ${ actor . name } ' ` ) ;
2019-11-17 02:56:45 +00:00
2019-11-27 03:58:38 +00:00
return actorEntry ;
2019-11-17 02:56:45 +00:00
}
2019-11-20 03:53:36 +00:00
async function updateActor ( actor , scraped = false , scrapeSuccess = false ) {
2019-11-19 03:36:15 +00:00
const curatedActor = curateActorEntry ( actor , scraped , scrapeSuccess ) ;
2019-11-17 02:56:45 +00:00
2019-11-27 03:58:38 +00:00
const [ actorEntry ] = await knex ( 'actors' )
2019-11-20 03:53:36 +00:00
. where ( { id : actor . id } )
2019-11-17 02:56:45 +00:00
. update ( curatedActor )
. returning ( '*' ) ;
2019-11-28 04:36:22 +00:00
await storeSocialLinks ( actor . social , actor . id ) ;
2019-11-27 03:58:38 +00:00
2019-11-17 02:56:45 +00:00
console . log ( ` Updated entry for actor ' ${ actor . name } ' ` ) ;
2019-11-27 03:58:38 +00:00
return actorEntry ;
2019-11-17 02:56:45 +00:00
}
2019-11-29 04:46:06 +00:00
async function mergeProfiles ( profiles , actor ) {
const mergedProfile = profiles . reduce ( ( prevProfile , profile ) => {
2019-11-20 03:53:36 +00:00
if ( profile === null ) {
return prevProfile ;
}
return {
2019-11-21 03:05:32 +00:00
id : actor ? actor . id : null ,
2019-11-30 04:55:32 +00:00
name : actor ? actor . name : ( prevProfile . name || profile . name ) ,
2019-11-21 03:05:32 +00:00
description : prevProfile . description || profile . description ,
2019-11-20 03:53:36 +00:00
gender : prevProfile . gender || profile . gender ,
2019-11-27 04:12:51 +00:00
birthdate : Number . isNaN ( Number ( prevProfile . birthdate ) ) ? profile . birthdate : prevProfile . birthdate ,
2019-11-20 03:53:36 +00:00
birthPlace : prevProfile . birthPlace || profile . birthPlace ,
2019-11-21 03:05:32 +00:00
residencePlace : prevProfile . residencePlace || profile . residencePlace ,
2019-11-20 03:53:36 +00:00
ethnicity : prevProfile . ethnicity || profile . ethnicity ,
2019-11-21 03:05:32 +00:00
bust : prevProfile . bust || profile . bust ,
waist : prevProfile . waist || profile . waist ,
hip : prevProfile . hip || profile . hip ,
2019-11-29 04:46:06 +00:00
naturalBoobs : prevProfile . naturalBoobs === undefined ? profile . naturalBoobs : prevProfile . naturalBoobs ,
2019-11-20 03:53:36 +00:00
height : prevProfile . height || profile . height ,
weight : prevProfile . weight || profile . weight ,
hair : prevProfile . hair || profile . hair ,
eyes : prevProfile . eyes || profile . eyes ,
2019-11-29 04:46:06 +00:00
hasPiercings : prevProfile . hasPiercings === undefined ? profile . hasPiercings : prevProfile . hasPiercings ,
hasTattoos : prevProfile . hasTattoos === undefined ? profile . hasTattoos : prevProfile . hasTattoos ,
2019-11-20 03:53:36 +00:00
piercings : prevProfile . piercings || profile . piercings ,
tattoos : prevProfile . tattoos || profile . tattoos ,
social : prevProfile . social . concat ( profile . social || [ ] ) ,
avatars : prevProfile . avatars . concat ( profile . avatar || [ ] ) ,
} ;
} , {
social : [ ] ,
avatars : [ ] ,
} ) ;
2019-11-29 04:46:06 +00:00
const [ birthPlace , residencePlace ] = await Promise . all ( [
resolvePlace ( mergedProfile . birthPlace ) ,
resolvePlace ( mergedProfile . residencePlace ) ,
] ) ;
mergedProfile . birthPlace = birthPlace ;
mergedProfile . residencePlace = residencePlace ;
return mergedProfile ;
2019-11-20 03:53:36 +00:00
}
2019-11-17 02:56:45 +00:00
async function scrapeActors ( actorNames ) {
await Promise . map ( actorNames || argv . actors , async ( actorName ) => {
2019-11-21 10:43:07 +00:00
try {
const actorSlug = actorName . toLowerCase ( ) . replace ( /\s+/g , '-' ) ;
const actorEntry = await knex ( 'actors' ) . where ( { slug : actorSlug } ) . first ( ) ;
2019-11-30 04:55:32 +00:00
const sources = argv . sources ? argv . sources . map ( source => [ source , scrapers . actors [ source ] ] ) : Object . entries ( scrapers . actors ) ;
const profiles = await Promise . map ( sources , async ( [ scraperSlug , scraper ] ) => {
2019-11-29 04:46:06 +00:00
const profile = await scraper . fetchProfile ( actorEntry ? actorEntry . name : actorName ) ;
2019-11-21 03:05:32 +00:00
2019-11-29 04:46:06 +00:00
return {
scraper : scraperSlug ,
... profile ,
} ;
} ) ;
const profile = await mergeProfiles ( profiles , actorEntry ) ;
2019-11-17 02:56:45 +00:00
2019-11-21 10:43:07 +00:00
if ( profile === null ) {
console . log ( ` Could not find profile for actor ' ${ actorName } ' ` ) ;
2019-11-29 04:46:06 +00:00
if ( argv . save ) {
await updateActor ( profile , true , false ) ;
}
2019-11-20 03:53:36 +00:00
2019-11-21 10:43:07 +00:00
return ;
}
2019-11-17 02:56:45 +00:00
2019-11-29 04:46:06 +00:00
if ( argv . save ) {
if ( actorEntry && profile ) {
await createActorMediaDirectory ( profile , actorEntry ) ;
2019-11-20 03:53:36 +00:00
2019-11-29 04:46:06 +00:00
await Promise . all ( [
updateActor ( profile , true , true ) ,
storeAvatars ( profile , actorEntry ) ,
] ) ;
2019-11-20 03:53:36 +00:00
2019-11-29 04:46:06 +00:00
return ;
}
2019-11-19 03:36:15 +00:00
2019-11-29 04:46:06 +00:00
const newActorEntry = await storeActor ( profile , true , true ) ;
2019-11-20 03:53:36 +00:00
2019-11-29 04:46:06 +00:00
await createActorMediaDirectory ( profile , newActorEntry ) ;
await storeAvatars ( profile , newActorEntry ) ;
}
2019-11-21 10:43:07 +00:00
} catch ( error ) {
console . warn ( actorName , error ) ;
}
2019-11-17 02:56:45 +00:00
} , {
2019-11-21 03:05:32 +00:00
concurrency : 3 ,
2019-11-17 02:56:45 +00:00
} ) ;
}
2019-11-19 03:36:15 +00:00
async function scrapeBasicActors ( ) {
const basicActors = await knex ( 'actors' ) . where ( 'scraped_at' , null ) ;
return scrapeActors ( basicActors . map ( actor => actor . name ) ) ;
}
async function associateActors ( release , releaseId ) {
2019-11-17 02:56:45 +00:00
const actorEntries = await knex ( 'actors' ) . whereIn ( 'name' , release . actors ) ;
const newActors = release . actors
. map ( actorName => actorName . trim ( ) )
. filter ( actorName => ! actorEntries . some ( actor => actor . name === actorName ) ) ;
2019-11-19 03:36:15 +00:00
const [ newActorEntries , associatedActors ] = await Promise . all ( [
Promise . all ( newActors . map ( async actorName => storeActor ( { name : actorName } ) ) ) ,
knex ( 'actors_associated' ) . where ( 'release_id' , releaseId ) ,
] ) ;
2019-11-11 02:20:00 +00:00
2019-11-19 03:36:15 +00:00
const newlyAssociatedActors = actorEntries
. concat ( newActorEntries )
. filter ( actorEntry => ! associatedActors . some ( actor => actorEntry . id === actor . id ) )
. map ( actor => ( {
release _id : releaseId ,
2019-11-17 02:56:45 +00:00
actor _id : actor . id ,
2019-11-19 03:36:15 +00:00
} ) ) ;
2019-11-11 02:20:00 +00:00
2019-11-19 03:36:15 +00:00
await knex ( 'actors_associated' )
. insert ( newlyAssociatedActors ) ;
2019-11-11 02:20:00 +00:00
}
2019-11-10 03:20:22 +00:00
module . exports = {
2019-11-19 03:36:15 +00:00
associateActors ,
2019-11-10 03:20:22 +00:00
fetchActors ,
2019-11-17 02:56:45 +00:00
scrapeActors ,
2019-11-19 03:36:15 +00:00
scrapeBasicActors ,
2019-11-10 03:20:22 +00:00
} ;