Experimentally scraping data from HardX.
This commit is contained in:
parent
cf8f299061
commit
71aa31dda6
File diff suppressed because it is too large
Load Diff
67
package.json
67
package.json
|
@ -1,30 +1,41 @@
|
||||||
{
|
{
|
||||||
"name": "traxxx",
|
"name": "traxxx",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"description": "All the latest porn releases in one place",
|
"description": "All the latest porn releases in one place",
|
||||||
"main": "src/app.js",
|
"main": "src/app.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
"start": "node src/app.js",
|
||||||
},
|
"eslint": "eslint src/",
|
||||||
"repository": {
|
"eslint-watch": "esw --watch src/"
|
||||||
"type": "git",
|
},
|
||||||
"url": "https://gitea.unknown.name/niels/traxxx.git"
|
"repository": {
|
||||||
},
|
"type": "git",
|
||||||
"keywords": [
|
"url": "https://gitea.unknown.name/niels/traxxx.git"
|
||||||
"porn",
|
},
|
||||||
"releases",
|
"keywords": [
|
||||||
"updates",
|
"porn",
|
||||||
"nsfw"
|
"releases",
|
||||||
],
|
"updates",
|
||||||
"author": "Niels Simenon",
|
"nsfw"
|
||||||
"license": "ISC",
|
],
|
||||||
"devDependencies": {
|
"author": "Niels Simenon",
|
||||||
"@babel/cli": "^7.2.3",
|
"license": "ISC",
|
||||||
"@babel/core": "^7.3.4",
|
"devDependencies": {
|
||||||
"@babel/preset-env": "^7.3.4",
|
"@babel/cli": "^7.2.3",
|
||||||
"babel-preset-airbnb": "^3.2.0"
|
"@babel/core": "^7.3.4",
|
||||||
},
|
"@babel/preset-env": "^7.3.4",
|
||||||
"dependencies": {
|
"babel-eslint": "^10.0.1",
|
||||||
"config": "^3.0.1"
|
"babel-preset-airbnb": "^3.2.0",
|
||||||
}
|
"eslint": "^5.15.0",
|
||||||
|
"eslint-config-airbnb-base": "^13.1.0",
|
||||||
|
"eslint-plugin-import": "^2.16.0",
|
||||||
|
"eslint-watch": "^4.0.2"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"bhttp": "^1.2.4",
|
||||||
|
"cheerio": "^1.0.0-rc.2",
|
||||||
|
"config": "^3.0.1",
|
||||||
|
"date-fns": "^1.30.1",
|
||||||
|
"terminal-kit": "^1.27.0"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const bhttp = require('bhttp');
|
||||||
|
const cheerio = require('cheerio');
|
||||||
|
const { parse, format } = require('date-fns');
|
||||||
|
|
||||||
|
async function init() {
|
||||||
|
const baseUrl = 'https://www.hardx.com';
|
||||||
|
const res = await bhttp.get(`${baseUrl}/en/videos`, {});
|
||||||
|
const $ = cheerio.load(res.body.toString(), { normalizeWhitespace: true });
|
||||||
|
const scenesElements = $('.sceneInfo').toArray();
|
||||||
|
|
||||||
|
const scenes = scenesElements.map((element) => {
|
||||||
|
const sceneLinkElement = $(element).find('.sceneTitle a');
|
||||||
|
const url = `${baseUrl}${sceneLinkElement.attr('href')}`;
|
||||||
|
const title = sceneLinkElement.attr('title');
|
||||||
|
|
||||||
|
const [likes, dislikes] = $(element).find('.value')
|
||||||
|
.toArray()
|
||||||
|
.map(value => Number($(value).text()));
|
||||||
|
const stars = Math.floor(((likes * 5 + dislikes) / (likes + dislikes)) * 100) / 100;
|
||||||
|
|
||||||
|
const actors = $(element).find('.sceneActors a')
|
||||||
|
.map((actorIndex, actorElement) => $(actorElement).attr('title'))
|
||||||
|
.toArray();
|
||||||
|
|
||||||
|
const date = parse($(element).find('.sceneDate').text(), 'MM-DD-YYYY');
|
||||||
|
|
||||||
|
return {
|
||||||
|
url,
|
||||||
|
title,
|
||||||
|
actors,
|
||||||
|
date,
|
||||||
|
rating: {
|
||||||
|
likes,
|
||||||
|
dislikes,
|
||||||
|
stars,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(scenes);
|
||||||
|
}
|
||||||
|
|
||||||
|
init();
|
Loading…
Reference in New Issue