Experimentally scraping data from HardX.

This commit is contained in:
ThePendulum 2019-03-03 04:18:33 +01:00
parent cf8f299061
commit 71aa31dda6
3 changed files with 1771 additions and 67 deletions

1726
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -1,30 +1,41 @@
{ {
"name": "traxxx", "name": "traxxx",
"version": "1.0.0", "version": "1.0.0",
"description": "All the latest porn releases in one place", "description": "All the latest porn releases in one place",
"main": "src/app.js", "main": "src/app.js",
"scripts": { "scripts": {
"test": "echo \"Error: no test specified\" && exit 1" "start": "node src/app.js",
}, "eslint": "eslint src/",
"repository": { "eslint-watch": "esw --watch src/"
"type": "git", },
"url": "https://gitea.unknown.name/niels/traxxx.git" "repository": {
}, "type": "git",
"keywords": [ "url": "https://gitea.unknown.name/niels/traxxx.git"
"porn", },
"releases", "keywords": [
"updates", "porn",
"nsfw" "releases",
], "updates",
"author": "Niels Simenon", "nsfw"
"license": "ISC", ],
"devDependencies": { "author": "Niels Simenon",
"@babel/cli": "^7.2.3", "license": "ISC",
"@babel/core": "^7.3.4", "devDependencies": {
"@babel/preset-env": "^7.3.4", "@babel/cli": "^7.2.3",
"babel-preset-airbnb": "^3.2.0" "@babel/core": "^7.3.4",
}, "@babel/preset-env": "^7.3.4",
"dependencies": { "babel-eslint": "^10.0.1",
"config": "^3.0.1" "babel-preset-airbnb": "^3.2.0",
} "eslint": "^5.15.0",
"eslint-config-airbnb-base": "^13.1.0",
"eslint-plugin-import": "^2.16.0",
"eslint-watch": "^4.0.2"
},
"dependencies": {
"bhttp": "^1.2.4",
"cheerio": "^1.0.0-rc.2",
"config": "^3.0.1",
"date-fns": "^1.30.1",
"terminal-kit": "^1.27.0"
}
} }

45
src/app.js Normal file
View File

@ -0,0 +1,45 @@
'use strict';
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const { parse, format } = require('date-fns');
async function init() {
const baseUrl = 'https://www.hardx.com';
const res = await bhttp.get(`${baseUrl}/en/videos`, {});
const $ = cheerio.load(res.body.toString(), { normalizeWhitespace: true });
const scenesElements = $('.sceneInfo').toArray();
const scenes = scenesElements.map((element) => {
const sceneLinkElement = $(element).find('.sceneTitle a');
const url = `${baseUrl}${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title');
const [likes, dislikes] = $(element).find('.value')
.toArray()
.map(value => Number($(value).text()));
const stars = Math.floor(((likes * 5 + dislikes) / (likes + dislikes)) * 100) / 100;
const actors = $(element).find('.sceneActors a')
.map((actorIndex, actorElement) => $(actorElement).attr('title'))
.toArray();
const date = parse($(element).find('.sceneDate').text(), 'MM-DD-YYYY');
return {
url,
title,
actors,
date,
rating: {
likes,
dislikes,
stars,
},
};
});
console.log(scenes);
}
init();