'use strict'; const bhttp = require('bhttp'); const cheerio = require('cheerio'); const moment = require('moment'); function scrape(html, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); const shootId = $('li').attr('id'); const sceneLinkElement = $('#scene_title_border a'); const url = `${site.url}/${sceneLinkElement.attr('href')}`; const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, '')); // replace weird commas const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate(); const stars = $('img[src*="/star.png"]') .toArray() .map(element => $(element).attr('src')) .length || 0; return { url, shootId, title, actors, date, rating: { stars, }, site, }; } async function fetchLatest(site, page = 1) { const res = page === 1 ? await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=${(page - 1) * 9}&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`) : await bhttp.get(`${site.url}/final_load_latestupdate_grid_view.php?limitstart=0&limitend=${(page - 1) * 8 + 1}&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`); const elements = JSON.parse(res.body.toString()); const latest = Object.values(elements.total_arr).map(html => scrape(html, site)); // total_arr is a key-value object for final_load_latestupdate_grid_view.php return latest; } module.exports = { fetchLatest, };