2020-11-23 03:32:56 +00:00
'use strict' ;
const qu = require ( '../utils/qu' ) ;
2020-11-24 03:29:44 +00:00
const slugify = require ( '../utils/slugify' ) ;
2020-11-23 03:32:56 +00:00
2020-11-24 03:29:44 +00:00
function matchChannel ( release , channel ) {
const series = channel . children || channel . parent . children ;
console . log ( channel , series ) ;
const serieNames = series . reduce ( ( acc , serie ) => ( {
... acc ,
[ serie . name ] : serie ,
[ serie . slug ] : serie ,
} ) , { } ) ;
const serieName = release . title . match ( new RegExp ( Object . keys ( serieNames ) . join ( '|' ) , 'i' ) ) ? . [ 0 ] ;
const serie = serieName && serieNames [ slugify ( serieName , '' ) ] ;
if ( serie ) {
return {
channel : serie . slug ,
title : release . title . replace ( new RegExp ( ` ${ serieName } [ \\ s:– -]* ` ) , '' ) ,
} ;
}
return null ;
}
function scrapeAll ( scenes , channel ) {
2020-11-23 03:32:56 +00:00
return scenes . map ( ( { query , el } ) => {
const release = { } ;
release . url = query . url ( 'a' ) ;
release . entryId = query . q ( el , null , 'id' ) ? . match ( /post-(\d+)/ ) ? . [ 1 ] ;
release . title = query . cnt ( '.meta h3' ) ;
release . date = query . date ( '.meta .post-meta' , 'MMMM D, YYYY' ) ;
release . poster = query . img ( 'img' ) ;
2020-11-24 03:29:44 +00:00
return {
... release ,
... matchChannel ( release , channel ) ,
} ;
2020-11-23 03:32:56 +00:00
} ) ;
}
2020-11-24 03:29:44 +00:00
async function fetchPhotos ( url ) {
if ( url ) {
const res = await qu . get ( url , '.et_post_gallery' ) ;
if ( res . ok ) {
return res . item . query . urls ( 'a' ) . map ( imgUrl => ( {
src : imgUrl ,
referer : url ,
} ) ) ;
}
}
return null ;
}
async function scrapeScene ( { query } , url , channel , include ) {
2020-11-23 03:32:56 +00:00
const release = { } ;
const script = query . cnt ( 'script.yoast-schema-graph' ) ;
const data = script && JSON . parse ( script ) ;
release . entryId = query . q ( 'article.project' , 'id' ) ? . match ( /post-(\d+)/ ) ? . [ 1 ] ;
release . title = query . cnt ( '.vid_title' ) ;
release . description = query . cnt ( '.vid_desc p' ) ;
release . date = query . date ( '.vid_date' , 'MMMM D, YYYY' ) ;
release . duration = query . dur ( '.vid_length' ) ;
release . actors = query . all ( '.vid_infos a[href*="author/"]' ) . map ( actorEl => ( {
name : query . cnt ( actorEl ) ,
url : query . url ( actorEl , null ) ,
} ) ) ;
release . tags = query . cnts ( '.vid_infos a[rel="tag"]' ) ;
const posterData = data [ '@graph' ] ? . find ( item => item [ '@type' ] === 'ImageObject' ) ;
2020-11-24 03:29:44 +00:00
const poster = posterData ? . url
2020-11-23 03:32:56 +00:00
|| query . q ( 'meta[property="og:image"]' , 'content' )
|| query . q ( 'meta[name="twitter:image"]' , 'content' ) ;
2020-11-24 03:29:44 +00:00
release . poster = {
src : poster ,
referer : url ,
} ;
2020-11-23 03:32:56 +00:00
release . stars = Math . min ( Number ( query . q ( '.post-ratings-image' , 'title' ) ? . match ( /average:\s*(\d\.\d+)/ ) ? . [ 1 ] ) , 5 ) || null ; // rating out of 5, yet sometimes 5.07?
2020-11-24 03:29:44 +00:00
if ( include . photos ) {
release . photos = await fetchPhotos ( query . url ( '.vid_buttons a[href*="project/"]' ) ) ;
}
return {
... release ,
... matchChannel ( release , channel ) ,
} ;
2020-11-23 03:32:56 +00:00
}
async function fetchLatest ( channel ) {
// no apparent pagination, all updates on one page
2020-11-24 03:29:44 +00:00
// using channels in part because main overview contains indistinguishable photo albums
const res = await qu . getAll ( channel . url , '.project' ) ;
2020-11-23 03:32:56 +00:00
if ( res . ok ) {
return scrapeAll ( res . items , channel ) ;
}
return res . status ;
}
2020-11-24 03:29:44 +00:00
async function fetchScene ( url , channel , baseRelease , include ) {
2020-11-23 03:32:56 +00:00
const res = await qu . get ( url ) ;
if ( res . ok ) {
2020-11-24 03:29:44 +00:00
return scrapeScene ( res . item , url , channel , include ) ;
2020-11-23 03:32:56 +00:00
}
return res . status ;
}
module . exports = {
fetchLatest ,
fetchScene ,
} ;