Experimentally scraping data from HardX.
This commit is contained in:
45
src/app.js
Normal file
45
src/app.js
Normal file
@@ -0,0 +1,45 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const { parse, format } = require('date-fns');
|
||||
|
||||
async function init() {
|
||||
const baseUrl = 'https://www.hardx.com';
|
||||
const res = await bhttp.get(`${baseUrl}/en/videos`, {});
|
||||
const $ = cheerio.load(res.body.toString(), { normalizeWhitespace: true });
|
||||
const scenesElements = $('.sceneInfo').toArray();
|
||||
|
||||
const scenes = scenesElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.sceneTitle a');
|
||||
const url = `${baseUrl}${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title');
|
||||
|
||||
const [likes, dislikes] = $(element).find('.value')
|
||||
.toArray()
|
||||
.map(value => Number($(value).text()));
|
||||
const stars = Math.floor(((likes * 5 + dislikes) / (likes + dislikes)) * 100) / 100;
|
||||
|
||||
const actors = $(element).find('.sceneActors a')
|
||||
.map((actorIndex, actorElement) => $(actorElement).attr('title'))
|
||||
.toArray();
|
||||
|
||||
const date = parse($(element).find('.sceneDate').text(), 'MM-DD-YYYY');
|
||||
|
||||
return {
|
||||
url,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
rating: {
|
||||
likes,
|
||||
dislikes,
|
||||
stars,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
console.log(scenes);
|
||||
}
|
||||
|
||||
init();
|
||||
Reference in New Issue
Block a user