Completed Perv City scraper. Outputting results as list.

This commit is contained in:
2019-03-04 04:19:03 +01:00
parent 5e4e12fbf8
commit af9565b296
7 changed files with 77 additions and 18 deletions

View File

@@ -2,16 +2,46 @@
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const moment = require('moment');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneLinkElement = $('#scene_title_border a');
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title');
const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, ''));
const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate();
const stars = $('img[src*="/star.png"]').toArray().map(element => $(element).attr('src')).length || null;
return {
url,
title,
actors,
date,
rating: {
likes: null,
dislikes: null,
stars,
},
siteId: site.id,
};
}
async function fetchReleases(site) {
// const session = bhttp.session();
return {};
console.log(site.url, site.parameters.tourId)
const res = await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=0&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
const elements = JSON.parse(res.body.toString());
const res = await bhttp.get(`${site.url}/final_latestupdateview.php&limitstart=0&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
const latest = elements.total_arr.map(html => scrape(html, site));
console.log(res.body.toString());
return {
latest,
};
}
module.exports = fetchReleases;

View File

@@ -2,7 +2,7 @@
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const { parse, startOfDay } = require('date-fns');
const moment = require('moment');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
@@ -22,7 +22,7 @@ function scrape(html, site) {
.map((actorIndex, actorElement) => $(actorElement).attr('title'))
.toArray();
const date = startOfDay(parse($(element).find('.sceneDate').text(), 'MM-DD-YYYY'));
const date = moment.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY').toDate();
return {
url,
@@ -34,6 +34,7 @@ function scrape(html, site) {
dislikes,
stars,
},
siteId: site.id,
};
});
}