Added generic ElevatedX scraper. Changed FCUK to ExploitedX network. Testing ElevatedX scraper with ExploitedX network.

This commit is contained in:
DebaucheryLibrarian
2021-02-13 04:49:00 +01:00
parent 4c306effb7
commit f79505f3f6
74 changed files with 158 additions and 44 deletions

View File

@@ -435,6 +435,10 @@ async function storeFile(media, options) {
return storeImageFile(media, hashDir, hashSubDir, filename, filedir, filepath, options);
}
if (['posters', 'photos', 'covers'].includes(media.role)) {
throw new Error(`Media for '${media.role}' must be an image, but '${media.meta.mimetype}' was detected`);
}
const [stat] = await Promise.all([
fsPromises.stat(media.file.path),
fsPromises.mkdir(path.join(config.media.path, filedir), { recursive: true }),

112
src/scrapers/elevatedx.js Normal file
View File

@@ -0,0 +1,112 @@
'use strict';
const format = require('template-format');
const qu = require('../utils/q');
const slugify = require('../utils/slugify');
function deriveEntryId(release) {
if (release.date && release.url) {
const slug = new URL(release.url).pathname.match(/\/trailers\/(.*).html/)[1];
return `${slugify(qu.formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(slug)}`;
}
if (release.date && release.title) {
return `${slugify(qu.formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
}
return null;
}
function scrapeAllClassic(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('.updateInfo h5 a:not([href*="content/"]):not([href*="#coming"])');
release.entryId = query.url('.updateThumb img', 'alt');
release.title = query.cnt('.updateInfo h5 a');
release.actors = query.cnts('.tour_update_models a');
release.date = query.date('.availdate, .updateInfo p span:nth-child(2)', 'MM/DD/YYYY');
release.poster = query.img('.updateThumb img');
const trailer = query.q('.updateInfo h5 a', 'onclick')?.match(/'(.+)'/)?.[1];
if (trailer) {
release.trailer = `${channel.url}${trailer}`;
}
return release;
});
}
function scrapeAllTubular(scenes, channel, accNetworkReleases) {
return scenes.map(({ query }) => {
const release = {};
release.title = query.q('h4 a', 'title') || query.q('h4 a', true);
release.url = query.url('h4 a');
release.date = query.date('.more-info-div', 'MMM D, YYYY');
release.duration = query.dur('.more-info-div');
const posterPath = query.q('.img-div img', 'src0_1x') || query.img('img.video_placeholder');
if (posterPath) {
const poster = /^http/.test(posterPath) ? posterPath : `${channel.parameters?.media || channel.url}${posterPath}`;
release.poster = [
poster.replace('-1x', '-3x'),
poster.replace('-1x', '-2x'),
poster,
];
}
release.teaser = query.video();
// release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
if (channel.parameters?.accFilter && accNetworkReleases?.map(accRelease => accRelease.entryId).includes(release.entryId)) {
// filter out releases that were already scraped from a categorized site, requeryires sequeryential site scraping
return null;
}
return release;
});
}
async function fetchLatest(site, page = 1, options, preData, allScraper) {
const url = (site.parameters?.latest && format(site.parameters.latest, { page }))
|| `${site.url}/categories/movies_${page}_d.html`;
const res = await qu.getAll(url, '.modelfeature, .item-video, .bodyArea .updateItem');
if (!res.ok) {
return res.status;
}
return allScraper(res.items, site, preData?.uniqueReleases);
}
async function fetchLatestClassic(channel, page, options, preData) {
return fetchLatest(channel, page, options, preData, scrapeAllClassic);
}
async function fetchLatestTubular(channel, page, options, preData) {
return fetchLatest(channel, page, options, preData, scrapeAllTubular);
}
module.exports = {
classic: {
fetchLatest: fetchLatestClassic,
scrapeAll: scrapeAllClassic,
},
tubular: {
fetchLatest: fetchLatestTubular,
scrapeAll: scrapeAllTubular,
},
};

View File

@@ -4,6 +4,8 @@ const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
const { feetInchesToCm } = require('../utils/convert');
const elevatedx = require('./elevatedx');
function scrapeLatestBlog(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
@@ -134,52 +136,41 @@ function scrapeProfile({ query }, entity) {
}
async function fetchLatestBlog(channel, page) {
/*
const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`;
const res = await qu.getAll(url, '.videos');
const url = `${channel.parameters?.latest || channel.url}/movies_${page}_d.html`;
const res = await qu.getAll(url, '.item-update');
*/
return res.ok ? scrapeLatestBlog(res.items, channel) : res.status;
const scenes = await elevatedx.tubular.fetchLatest(channel, page);
console.log(scenes);
// return res.ok ? scrapeLatestBlog(res.items, channel) : res.status;
}
async function fetchLatest(channel, page = 1) {
if (channel.parameters?.blog) {
return fetchLatestBlog(channel, page);
}
/*
const url = `${channel.url}/categories/Movies_${page}_d.html`;
const res = await qu.getAll(url, '.bodyArea .updateItem');
return res.ok ? scrapeAll(res.items, channel) : res.status;
*/
const scenes = await elevatedx.classic.fetchLatest(channel, page);
return scenes;
}
async function fetchUpcoming(channel) {
if (channel.parameters?.blog) {
return [];
}
const res = await qu.getAll(channel.url, '#owl-upcomingScenes .updateItem');
return res.ok ? scrapeAll(res.items, channel) : res.status;
}
async function fetchScene(url, channel) {
const res = await qu.get(url);
if (res.ok) {
if (channel.parameters?.blog) {
return scrapeSceneBlog(res.item, url, channel);
}
return scrapeScene(res.item, url, channel);
}
return res.status;
}
async function fetchProfile(baseActor, entity) {
const modelsRes = await qu.getAll(`${entity.url}/free/girls.php?alpha=${baseActor.name.slice(0, 1)}`, '.model');
console.log(baseActor);
if (modelsRes.ok) {
const models = modelsRes.items.filter(({ query }) => query.cnt('strong') === baseActor.name);
@@ -199,8 +190,12 @@ async function fetchProfile(baseActor, entity) {
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest: elevatedx.classic.fetchLatest,
fetchUpcoming,
fetchProfile,
scrapeScene,
blog: {
fetchLatest: elevatedx.tubular.fetchLatest,
scrapeScene: scrapeSceneBlog,
},
};

View File

@@ -368,4 +368,5 @@ module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
scrapeAllT1,
};

View File

@@ -18,7 +18,7 @@ const dorcel = require('./dorcel');
const elegantangel = require('./elegantangel');
const famedigital = require('./famedigital');
const firstanalquest = require('./firstanalquest');
const fcuk = require('./fcuk');
const exploitedx = require('./exploitedx');
const fullpornnetwork = require('./fullpornnetwork');
const gamma = require('./gamma');
const hitzefrei = require('./hitzefrei');
@@ -87,7 +87,7 @@ const scrapers = {
dorcel,
elegantangel,
famedigital,
fcuk,
exploitedx,
firstanalquest,
forbondage: porndoe,
fullpornnetwork,
@@ -179,7 +179,7 @@ const scrapers = {
dtfsluts: fullpornnetwork,
elegantangel,
evilangel: gamma,
exploitedcollegegirls: fcuk,
exploitedcollegegirls: exploitedx,
eyeontheguy: hush,
fakehub: mindgeek,
firstanalquest,