forked from DebaucheryLibrarian/traxxx
Added partial 'fcuk' (Exploited College Girls) scraper. Added file parameter for actor names and scene URLs.
This commit is contained in:
11
src/app.js
11
src/app.js
@@ -10,6 +10,7 @@ const fetchUpdates = require('./updates');
|
||||
const { fetchScenes, fetchMovies } = require('./deep');
|
||||
const { storeReleases, updateReleasesSearch } = require('./store-releases');
|
||||
const { scrapeActors } = require('./actors');
|
||||
const getFileEntries = require('./utils/file-entries');
|
||||
|
||||
async function init() {
|
||||
if (argv.server) {
|
||||
@@ -21,13 +22,19 @@ async function init() {
|
||||
await updateReleasesSearch();
|
||||
}
|
||||
|
||||
const actors = argv.actors && await scrapeActors(argv.actors);
|
||||
const actorsFromFile = argv.actorsFile && await getFileEntries(argv.actorsFile);
|
||||
const actorNames = (argv.actors || []).concat(actorsFromFile || []);
|
||||
|
||||
const actors = actorNames.length > 0 && await scrapeActors(actorNames);
|
||||
const actorBaseScenes = argv.actors && argv.actorScenes && actors.map(actor => actor.releases).flat().filter(Boolean);
|
||||
|
||||
const updateBaseScenes = (argv.all || argv.channels || argv.networks) && await fetchUpdates();
|
||||
|
||||
const scenesFromFile = argv.scenesFile && await getFileEntries(argv.scenesFile);
|
||||
const sceneUrls = (argv.scenes || []).concat(scenesFromFile || []);
|
||||
|
||||
const deepScenes = argv.deep
|
||||
? await fetchScenes([...(argv.scenes || []), ...(updateBaseScenes || []), ...(actorBaseScenes || [])])
|
||||
? await fetchScenes([...(sceneUrls), ...(updateBaseScenes || []), ...(actorBaseScenes || [])])
|
||||
: [...(updateBaseScenes || []), ...(actorBaseScenes || [])];
|
||||
|
||||
const sceneMovies = deepScenes && argv.sceneMovies && deepScenes.map(scene => scene.movie).filter(Boolean);
|
||||
|
||||
12
src/argv.js
12
src/argv.js
@@ -30,6 +30,10 @@ const { argv } = yargs
|
||||
type: 'array',
|
||||
alias: 'actor',
|
||||
})
|
||||
.option('actors-file', {
|
||||
describe: 'Scrape actors names from file',
|
||||
type: 'string',
|
||||
})
|
||||
.option('actor-scenes', {
|
||||
describe: 'Fetch all scenes for an actor',
|
||||
type: 'boolean',
|
||||
@@ -53,10 +57,14 @@ const { argv } = yargs
|
||||
alias: 'with-profiles',
|
||||
default: false,
|
||||
})
|
||||
.option('scene', {
|
||||
.option('scenes', {
|
||||
describe: 'Scrape scene info from URL',
|
||||
type: 'array',
|
||||
alias: 'scenes',
|
||||
alias: 'scene',
|
||||
})
|
||||
.option('scenes-file', {
|
||||
describe: 'Scrape scene info from URLs in a file',
|
||||
type: 'string',
|
||||
})
|
||||
.option('movie', {
|
||||
describe: 'Scrape movie info from URL',
|
||||
|
||||
@@ -101,7 +101,7 @@ function toBaseSource(rawSource) {
|
||||
if (rawSource.attempts) baseSource.attempts = rawSource.attempts;
|
||||
if (rawSource.queueMethod) baseSource.queueMethod = rawSource.queueMethod;
|
||||
|
||||
if (rawSource.copyright) baseSource.copyright = rawSource.copyright;
|
||||
if (rawSource.credit !== undefined) baseSource.credit = rawSource.credit;
|
||||
if (rawSource.comment) baseSource.comment = rawSource.comment;
|
||||
if (rawSource.group) baseSource.group = rawSource.group;
|
||||
|
||||
@@ -569,7 +569,7 @@ function curateMediaEntry(media, index) {
|
||||
source: media.src,
|
||||
source_page: media.url,
|
||||
scraper: media.scraper,
|
||||
copyright: media.copyright,
|
||||
credit: media.credit,
|
||||
comment: media.comment,
|
||||
};
|
||||
|
||||
@@ -685,7 +685,7 @@ async function associateAvatars(profiles) {
|
||||
? {
|
||||
...profile,
|
||||
avatarBaseMedia: toBaseMedias([profile.avatar], 'avatars', {
|
||||
copyright: profile.network?.name || profile.site?.name || null,
|
||||
credit: (profile.credit !== undefined && (profile.network?.name || profile.site?.name)) || null,
|
||||
scraper: profile.scraper || null,
|
||||
})[0],
|
||||
}
|
||||
|
||||
@@ -69,7 +69,7 @@ function scrapeProfile(html) {
|
||||
|
||||
profile.avatar = {
|
||||
src: `http://www.boobpedia.com${avatarPath}`,
|
||||
copyright: null,
|
||||
credit: null,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
71
src/scrapers/fcuk.js
Normal file
71
src/scrapers/fcuk.js
Normal file
@@ -0,0 +1,71 @@
|
||||
'use strict';
|
||||
|
||||
const qu = require('../utils/qu');
|
||||
|
||||
function scrapeLatest(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('a.more:not([href*="/join.php"])', 'href', { origin: channel.url });
|
||||
|
||||
if (release.url) {
|
||||
release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-');
|
||||
} else {
|
||||
release.entryId = query.img('.bigthumb').match(/\/scenes\/(\w+)/)?.[1];
|
||||
}
|
||||
|
||||
release.title = query.q('h5 strong', true)?.match(/. - (.+)$/)[1] || query.text('.videos h3');
|
||||
release.description = query.text('p');
|
||||
release.date = query.date('h5 strong, .videos h3', 'MMM. DD, YYYY', /\w+. \d{2}, \d{4}/);
|
||||
|
||||
release.poster = query.img('.bigthumb', null, { origin: channel.url });
|
||||
release.photos = query.imgs('.smallthumb', null, { origin: channel.url });
|
||||
|
||||
release.tags = query.all('a[href*="/keywords"]', true);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, url, channel) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-');
|
||||
|
||||
release.title = query.q('h4 strong, .videos h3', true);
|
||||
release.description = query.q('#about p, .videos p', true);
|
||||
|
||||
const actors = query.urls('a[href*="/girl/"]').map(actorUrl => actorUrl.match(/video-(\w+)/)?.[1]).filter(Boolean);
|
||||
|
||||
if (actors.length > 0) {
|
||||
release.actors = actors;
|
||||
} else {
|
||||
// release.actors = [query.q('.previewmed h5 strong', true)?.match(/^([\w\s]+),/)?.[0] || query.q('.videos h3', true)].filter(Boolean);
|
||||
release.actors = [release.title];
|
||||
}
|
||||
|
||||
release.tags = query.all('.info a[href*="/keywords"], .buttons a[href*="/keywords"]', true);
|
||||
|
||||
release.poster = query.img('#info .main-preview, .bigthumb', null, { origin: channel.url });
|
||||
release.photos = [query.img('.previewmed img', null, { origin: channel.url })].concat(query.imgs('.hd-clip img, .smallthumb', null, { origin: channel.url })).filter(photo => photo);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`;
|
||||
const res = await qu.getAll(url, '.videos');
|
||||
|
||||
return res.ok ? scrapeLatest(res.items, channel) : res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel) {
|
||||
const res = await qu.get(url);
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, channel) : res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
@@ -52,7 +52,7 @@ function scrapeProfile(html, actorName) {
|
||||
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), el => el.href);
|
||||
|
||||
const avatar = document.querySelector('.profile-image-large img').src;
|
||||
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, copyright: null };
|
||||
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, credit: null };
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ const evilangel = require('./evilangel');
|
||||
const fakehub = require('./fakehub');
|
||||
const famedigital = require('./famedigital');
|
||||
const fantasymassage = require('./fantasymassage');
|
||||
const fcuk = require('./fcuk');
|
||||
const fullpornnetwork = require('./fullpornnetwork');
|
||||
const girlsway = require('./girlsway');
|
||||
const hush = require('./hush');
|
||||
@@ -93,6 +94,7 @@ module.exports = {
|
||||
fakehub,
|
||||
famedigital,
|
||||
fantasymassage,
|
||||
fcuk,
|
||||
forbondage: porndoe,
|
||||
fullpornnetwork,
|
||||
girlsway,
|
||||
|
||||
16
src/utils/file-entries.js
Normal file
16
src/utils/file-entries.js
Normal file
@@ -0,0 +1,16 @@
|
||||
'use strict';
|
||||
|
||||
const fs = require('fs');
|
||||
|
||||
async function getFileEntries(location) {
|
||||
if (!location) {
|
||||
throw new Error('No filepath provided');
|
||||
}
|
||||
|
||||
const file = await fs.promises.readFile(location, 'utf-8');
|
||||
const entries = file.split(/\n/).map(entry => entry.trim()).filter(Boolean);
|
||||
|
||||
return entries;
|
||||
}
|
||||
|
||||
module.exports = getFileEntries;
|
||||
@@ -125,7 +125,7 @@ function date(context, selector, format, match, attr = 'textContent') {
|
||||
return extractDate(dateString, format, match);
|
||||
}
|
||||
|
||||
function image(context, selector = 'img', attr, origin, protocol = 'https') {
|
||||
function image(context, selector = 'img', attr, { origin, protocol = 'https' } = {}) {
|
||||
const imageEl = (attr && q(context, selector, attr))
|
||||
|| q(context, selector, 'data-src')
|
||||
|| q(context, selector, 'src');
|
||||
@@ -133,7 +133,7 @@ function image(context, selector = 'img', attr, origin, protocol = 'https') {
|
||||
return prefixUrl(imageEl, origin, protocol);
|
||||
}
|
||||
|
||||
function images(context, selector = 'img', attr, origin, protocol = 'https') {
|
||||
function images(context, selector = 'img', attr, { origin, protocol = 'https' } = {}) {
|
||||
const attribute = attr
|
||||
|| (q(context, selector, 'data-src') && 'data-src')
|
||||
|| (q(context, selector, 'src') && 'src');
|
||||
@@ -143,31 +143,31 @@ function images(context, selector = 'img', attr, origin, protocol = 'https') {
|
||||
return imageEls.map(imageEl => prefixUrl(imageEl, origin, protocol));
|
||||
}
|
||||
|
||||
function url(context, selector = 'a', attr = 'href', origin, protocol = 'https') {
|
||||
function url(context, selector = 'a', attr = 'href', { origin, protocol = 'https' } = {}) {
|
||||
const urlEl = q(context, selector, attr);
|
||||
|
||||
return attr ? prefixUrl(urlEl, origin, protocol) : urlEl;
|
||||
}
|
||||
|
||||
function urls(context, selector = 'a', attr = 'href', origin, protocol = 'https') {
|
||||
function urls(context, selector = 'a', attr = 'href', { origin, protocol = 'https' } = {}) {
|
||||
const urlEls = all(context, selector, attr);
|
||||
|
||||
return attr ? urlEls.map(urlEl => prefixUrl(urlEl, origin, protocol)) : urlEls;
|
||||
}
|
||||
|
||||
function poster(context, selector = 'video', attr = 'poster', origin, protocol = 'https') {
|
||||
function poster(context, selector = 'video', attr = 'poster', { origin, protocol = 'https' } = {}) {
|
||||
const posterEl = q(context, selector, attr);
|
||||
|
||||
return attr ? prefixUrl(posterEl, origin, protocol) : posterEl;
|
||||
}
|
||||
|
||||
function video(context, selector = 'source', attr = 'src', origin, protocol = 'https') {
|
||||
function video(context, selector = 'source', attr = 'src', { origin, protocol = 'https' } = {}) {
|
||||
const trailerEl = q(context, selector, attr);
|
||||
|
||||
return attr ? prefixUrl(trailerEl, origin, protocol) : trailerEl;
|
||||
}
|
||||
|
||||
function videos(context, selector = 'source', attr = 'src', origin, protocol = 'https') {
|
||||
function videos(context, selector = 'source', attr = 'src', { origin, protocol = 'https' } = {}) {
|
||||
const trailerEls = all(context, selector, attr);
|
||||
|
||||
return attr ? trailerEls.map(trailerEl => prefixUrl(trailerEl, origin, protocol)) : trailerEls;
|
||||
|
||||
Reference in New Issue
Block a user