Experimentally scraping data from HardX.
This commit is contained in:
parent
cf8f299061
commit
71aa31dda6
File diff suppressed because it is too large
Load Diff
67
package.json
67
package.json
|
@ -1,30 +1,41 @@
|
|||
{
|
||||
"name": "traxxx",
|
||||
"version": "1.0.0",
|
||||
"description": "All the latest porn releases in one place",
|
||||
"main": "src/app.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://gitea.unknown.name/niels/traxxx.git"
|
||||
},
|
||||
"keywords": [
|
||||
"porn",
|
||||
"releases",
|
||||
"updates",
|
||||
"nsfw"
|
||||
],
|
||||
"author": "Niels Simenon",
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"@babel/cli": "^7.2.3",
|
||||
"@babel/core": "^7.3.4",
|
||||
"@babel/preset-env": "^7.3.4",
|
||||
"babel-preset-airbnb": "^3.2.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"config": "^3.0.1"
|
||||
}
|
||||
"name": "traxxx",
|
||||
"version": "1.0.0",
|
||||
"description": "All the latest porn releases in one place",
|
||||
"main": "src/app.js",
|
||||
"scripts": {
|
||||
"start": "node src/app.js",
|
||||
"eslint": "eslint src/",
|
||||
"eslint-watch": "esw --watch src/"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://gitea.unknown.name/niels/traxxx.git"
|
||||
},
|
||||
"keywords": [
|
||||
"porn",
|
||||
"releases",
|
||||
"updates",
|
||||
"nsfw"
|
||||
],
|
||||
"author": "Niels Simenon",
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"@babel/cli": "^7.2.3",
|
||||
"@babel/core": "^7.3.4",
|
||||
"@babel/preset-env": "^7.3.4",
|
||||
"babel-eslint": "^10.0.1",
|
||||
"babel-preset-airbnb": "^3.2.0",
|
||||
"eslint": "^5.15.0",
|
||||
"eslint-config-airbnb-base": "^13.1.0",
|
||||
"eslint-plugin-import": "^2.16.0",
|
||||
"eslint-watch": "^4.0.2"
|
||||
},
|
||||
"dependencies": {
|
||||
"bhttp": "^1.2.4",
|
||||
"cheerio": "^1.0.0-rc.2",
|
||||
"config": "^3.0.1",
|
||||
"date-fns": "^1.30.1",
|
||||
"terminal-kit": "^1.27.0"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const { parse, format } = require('date-fns');
|
||||
|
||||
async function init() {
|
||||
const baseUrl = 'https://www.hardx.com';
|
||||
const res = await bhttp.get(`${baseUrl}/en/videos`, {});
|
||||
const $ = cheerio.load(res.body.toString(), { normalizeWhitespace: true });
|
||||
const scenesElements = $('.sceneInfo').toArray();
|
||||
|
||||
const scenes = scenesElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.sceneTitle a');
|
||||
const url = `${baseUrl}${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title');
|
||||
|
||||
const [likes, dislikes] = $(element).find('.value')
|
||||
.toArray()
|
||||
.map(value => Number($(value).text()));
|
||||
const stars = Math.floor(((likes * 5 + dislikes) / (likes + dislikes)) * 100) / 100;
|
||||
|
||||
const actors = $(element).find('.sceneActors a')
|
||||
.map((actorIndex, actorElement) => $(actorElement).attr('title'))
|
||||
.toArray();
|
||||
|
||||
const date = parse($(element).find('.sceneDate').text(), 'MM-DD-YYYY');
|
||||
|
||||
return {
|
||||
url,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
rating: {
|
||||
likes,
|
||||
dislikes,
|
||||
stars,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
console.log(scenes);
|
||||
}
|
||||
|
||||
init();
|
Loading…
Reference in New Issue