2020-02-12 03:39:57 +00:00
'use strict' ;
2020-11-01 01:58:21 +00:00
const qu = require ( '../utils/qu' ) ;
const http = require ( '../utils/http' ) ;
const slugify = require ( '../utils/slugify' ) ;
2020-02-12 03:39:57 +00:00
2020-11-01 01:58:21 +00:00
function scrapeLatest ( scenes , site ) {
return scenes . map ( ( { query } ) => {
2020-05-14 02:26:05 +00:00
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
const release = { } ;
2020-02-12 03:39:57 +00:00
2020-11-01 01:58:21 +00:00
const titleEl = query . q ( '.galleryTitleText, .articleTitleText' ) ;
2020-05-14 02:26:05 +00:00
const [ title , ... actors ] = titleEl . textContent . split ( '|' ) ;
2020-11-01 01:58:21 +00:00
const date = query . date ( '.articlePostDateText td' , 'MMM D, YYYY' ) ;
2020-02-12 03:39:57 +00:00
2020-11-01 01:58:21 +00:00
const url = query . url ( titleEl , 'a' ) ;
2020-05-14 02:26:05 +00:00
[ release . entryId ] = url . split ( '/' ) . slice ( - 2 ) ;
release . url = ` ${ site . url } ${ url } ` ;
2020-02-12 03:39:57 +00:00
2020-05-14 02:26:05 +00:00
if ( date ) {
release . title = title . trim ( ) ;
release . date = date ;
} else {
// title should contain date instead, not applicable in brief mode
release . title = title . slice ( title . indexOf ( ':' ) + 1 ) . trim ( ) ;
2020-11-01 01:58:21 +00:00
release . date = qu . ed ( title . slice ( 0 , title . indexOf ( ':' ) ) , 'MMM D, YYYY' ) ;
2020-05-14 02:26:05 +00:00
}
2020-02-12 03:39:57 +00:00
2020-05-14 02:26:05 +00:00
release . actors = actors . map ( actor => actor . trim ( ) ) ;
2020-02-12 03:39:57 +00:00
2020-11-01 01:58:21 +00:00
const description = query . q ( '.articleCopyText' , true ) ;
2020-05-14 02:26:05 +00:00
if ( description ) release . description = description . slice ( 0 , description . lastIndexOf ( '(' ) ) ;
2020-02-12 03:39:57 +00:00
2020-11-01 01:58:21 +00:00
const duration = query . dur ( '.articleCopyText a:nth-child(2)' ) ;
2020-05-14 02:26:05 +00:00
if ( duration ) release . duration = duration ;
2020-02-12 03:39:57 +00:00
2020-11-01 01:58:21 +00:00
release . likes = parseInt ( query . q ( '.articlePostDateText td:nth-child(3)' , true ) , 10 ) ;
2020-02-12 03:59:15 +00:00
2020-11-01 01:58:21 +00:00
const cover = query . img ( 'a img' ) ;
2020-11-01 02:25:30 +00:00
2020-05-14 02:26:05 +00:00
release . covers = [ [
cover . replace ( '_thumbnail' , '' ) ,
cover ,
] ] ;
2020-02-12 03:39:57 +00:00
2020-05-14 02:26:05 +00:00
return release ;
} ) ;
2020-02-12 03:39:57 +00:00
}
2020-11-01 01:58:21 +00:00
function scrapeLatestAlt ( scenes , site ) {
return scenes . map ( ( { query } ) => {
const release = { } ;
release . url = query . url ( 'figure a' , 'href' , { origin : site . parameters . latest } ) ;
release . title = query . cnt ( '.has-text-weight-bold' ) ;
release . date = query . date ( 'span.tag' , 'YYYY-MM-DD' ) ;
release . actors = query . cnts ( 'a.tag' ) ;
const cover = query . img ( '.image img' ) ;
release . poster = cover . replace ( 'poster_noplay' , 'trailer_noplay' ) ;
release . covers = [ cover ] ;
release . entryId = ` ${ qu . formatDate ( release . date , 'YYYY-MM-DD' ) } - ${ slugify ( release . title ) } ` ;
return release ;
} ) ;
}
function scrapeScene ( { query } , site ) {
2020-05-14 02:26:05 +00:00
const release = { } ;
2020-02-12 03:39:57 +00:00
2020-11-01 01:58:21 +00:00
const titleEl = query . q ( '.articleTitleText' ) ;
2020-05-14 02:26:05 +00:00
const [ title , ... actors ] = titleEl . textContent . split ( '|' ) ;
2020-02-12 03:39:57 +00:00
2020-11-01 01:58:21 +00:00
const url = query . url ( titleEl , 'a' ) ;
2020-05-14 02:26:05 +00:00
[ release . entryId ] = url . split ( '/' ) . slice ( - 2 ) ;
release . url = ` ${ site . url } ${ url } ` ;
2020-02-12 03:39:57 +00:00
2020-05-14 02:26:05 +00:00
release . title = title . trim ( ) ;
2020-11-01 01:58:21 +00:00
release . description = query . q ( '.articleCopyText' , true ) ;
2020-02-12 03:39:57 +00:00
2020-05-14 02:26:05 +00:00
release . actors = actors . map ( actor => actor . trim ( ) ) ;
2020-11-01 01:58:21 +00:00
release . date = query . date ( '.articlePostDateText' , 'MMMM D, YYYY' ) ;
release . duration = query . dur ( '.articlePostDateText a:nth-child(2)' ) ;
2020-02-12 03:39:57 +00:00
2020-11-01 01:58:21 +00:00
const [ cover , ... photos ] = query . imgs ( 'img[src*="images"]' ) ;
2020-05-14 02:26:05 +00:00
release . covers = [ cover ] ;
release . photos = photos ;
2020-02-12 03:39:57 +00:00
2020-11-01 01:58:21 +00:00
release . poster = query . poster ( ) ;
2020-02-12 03:39:57 +00:00
2020-11-01 01:58:21 +00:00
const trailer = query . trailer ( ) ;
2020-05-14 02:26:05 +00:00
if ( trailer ) release . trailer = { src : trailer } ;
2020-02-12 03:39:57 +00:00
2020-05-14 02:26:05 +00:00
return release ;
2020-02-12 03:39:57 +00:00
}
2020-11-01 01:58:21 +00:00
async function scrapeSceneAlt ( { query } , url , channel , session ) {
const release = { } ;
release . title = query . cnt ( '.columns div.is-size-5' ) ;
release . description = query . cnt ( '.has-background-black-ter > div:nth-child(4)' ) ;
release . date = query . date ( '.has-text-white-ter span.tag' , 'YYYY-MM-DD' ) ;
release . actors = query . cnts ( '.has-text-white-ter a.tag[href*="home.php"]' ) ;
release . tags = query . cnts ( '.has-background-black-ter > div:nth-child(6) > span' ) ;
release . poster = query . img ( '#videoPlayer, #iodvideo' , 'poster' ) ;
release . photos = query . imgs ( 'body > div:nth-child(6) img' ) ;
release . entryId = ` ${ qu . formatDate ( release . date , 'YYYY-MM-DD' ) } - ${ slugify ( release . title ) } ` ;
release . trailer = query . video ( ) ;
if ( ! release . trailer ) {
2020-11-22 03:07:09 +00:00
const trailerRes = await http . get ( ` ${ channel . url } /api/play-api.php ` , { session } ) ;
2020-11-01 01:58:21 +00:00
if ( trailerRes . ok ) {
release . trailer = trailerRes . body ;
}
}
return release ;
}
2020-02-12 03:39:57 +00:00
async function fetchLatest ( site , page = 1 ) {
2020-11-01 01:58:21 +00:00
const url = ( site . parameters ? . scraper === 'alt' && ` ${ site . parameters . latest } /home.php?o=latest&p= ${ page } ` )
// || (site.slug === 'paintoy' && `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`) // paintoy's site is (was?) partially broken, use front page
|| ` ${ site . url } /scripts/switch_tour.php?type=brief&page= ${ page } ` ;
2020-02-12 15:26:08 +00:00
2020-11-01 01:58:21 +00:00
const res = await ( ( site . parameters ? . scraper === 'alt' && qu . getAll ( url , 'body > .columns .column' ) )
// || (site.slug === 'paintoy' && qu.getAll(url, '#articleTable table[cellspacing="2"]'))
|| qu . get ( url ) ) ; // JSON containing html as a property
2020-02-12 03:39:57 +00:00
2020-11-01 01:58:21 +00:00
if ( res . ok ) {
if ( site . parameters ? . scraper === 'alt' ) {
return scrapeLatestAlt ( res . items , site ) ;
}
/ *
if ( site . slug === 'paintoy' ) {
return scrapeLatest ( res . items , site ) ;
}
* /
2020-11-01 02:25:30 +00:00
return scrapeLatest ( qu . extractAll ( res . body . html , '#articleTable > tbody > tr:nth-child(2) > td > table' ) , site ) ;
2020-05-14 02:26:05 +00:00
}
2020-02-12 03:39:57 +00:00
2020-11-01 01:58:21 +00:00
return res . status ;
2020-02-12 03:39:57 +00:00
}
async function fetchScene ( url , site ) {
2020-11-01 01:58:21 +00:00
const session = http . session ( ) ;
2020-11-22 03:07:09 +00:00
const res = await qu . get ( url , null , null , { session } ) ;
2020-11-01 01:58:21 +00:00
if ( res . ok ) {
if ( site . parameters ? . scraper === 'alt' ) {
return scrapeSceneAlt ( res . item , url , site , session ) ;
}
return scrapeScene ( res . item , site ) ;
}
2020-02-12 03:39:57 +00:00
2020-11-01 01:58:21 +00:00
return res . status ;
2020-02-12 03:39:57 +00:00
}
module . exports = {
2020-05-14 02:26:05 +00:00
fetchLatest ,
fetchScene ,
2020-02-12 03:39:57 +00:00
} ;