Added partial 'fcuk' (Exploited College Girls) scraper. Added file parameter for actor names and scene URLs.

This commit is contained in:
DebaucheryLibrarian 2020-07-15 03:24:47 +02:00
parent eca54c2a09
commit 17b3ba1272
54 changed files with 187 additions and 20 deletions

View File

@ -37,7 +37,7 @@ exports.up = knex => Promise.resolve()
table.float('entropy');
table.text('scraper', 32);
table.text('copyright', 100);
table.text('credit', 100);
table.text('source', 2100);
table.text('source_page', 2100);
@ -279,6 +279,9 @@ exports.up = knex => Promise.resolve()
table.integer('hip', 3);
table.boolean('natural_boobs');
table.integer('penis_length', 3);
table.integer('penis_girth', 3);
table.integer('height', 3);
table.integer('weight', 3);
table.text('eyes');
@ -349,6 +352,9 @@ exports.up = knex => Promise.resolve()
table.integer('hip', 3);
table.boolean('natural_boobs');
table.integer('penis_length', 3);
table.integer('penis_girth', 3);
table.integer('height', 3);
table.integer('weight', 3);
table.text('eyes');

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 108 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 932 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 94 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 80 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 170 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

View File

@ -139,6 +139,10 @@ const networks = [
url: 'https://www.cherrypimps.com',
description: 'CherryPimps your premium porn site to Download and Stream the hottest and most exclusive 4K HD videos and pictures on your phone, tablet, TV or console.',
},
{
slug: 'fcuk',
name: 'Fcuk',
},
{
slug: 'freeones',
name: 'FreeOnes',

View File

@ -2085,6 +2085,60 @@ const sites = [
url: 'https://www.freeones.com',
parent: 'freeones',
},
// FCUK
{
name: 'Exploited College Girls',
slug: 'exploitedcollegegirls',
alias: ['excogi', 'ecg'],
url: 'https://exploitedcollegegirls.com',
parent: 'fcuk',
parameters: {
blog: true,
},
},
{
name: 'Backroom Casting Couch',
slug: 'backroomcastingcouch',
url: 'https://backroomcastingcouch.com',
parent: 'fcuk',
parameters: {
blog: true,
},
},
{
name: 'Black Ambush',
slug: 'blackambush',
alias: ['interracial', 'bbc'],
url: 'https://blackambush.com',
parent: 'fcuk',
parameters: {
blog: true,
},
},
{
name: 'Nebraska Coeds',
slug: 'nebraskacoeds',
url: 'https://nebraskacoeds.com',
parent: 'fcuk',
},
{
name: 'South Beach Coeds',
slug: 'southbeachcoeds',
url: 'https://southbeachcoeds.com',
parent: 'fcuk',
},
{
name: 'Spring Break Life',
slug: 'springbreaklife',
url: 'https://springbreaklife.com',
parent: 'fcuk',
},
{
name: 'Euro Coeds',
slug: 'eurocoeds',
url: 'https://eurocoeds.com',
parent: 'fcuk',
},
// FOR BONDAGE
{
name: 'Crowd Bondage',

View File

@ -571,7 +571,7 @@ const sfw = Object.entries({
['iFBIdX54BOk', 'Keagan Henman'],
],
})
.map(([category, photos]) => photos.map(([photo, copyright], index) => ({
.map(([category, photos]) => photos.map(([photo, credit], index) => ({
id: photo,
path: `sfw/${category}/${photo}.jpeg`,
thumbnail: `sfw/${category}/thumbs/${photo}.jpeg`,
@ -580,8 +580,7 @@ const sfw = Object.entries({
sfw_media_id: null,
group: category,
index,
copyright,
comment: `Courtesy of ${copyright}`,
credit,
})))
.flat();

View File

@ -10,6 +10,7 @@ const fetchUpdates = require('./updates');
const { fetchScenes, fetchMovies } = require('./deep');
const { storeReleases, updateReleasesSearch } = require('./store-releases');
const { scrapeActors } = require('./actors');
const getFileEntries = require('./utils/file-entries');
async function init() {
if (argv.server) {
@ -21,13 +22,19 @@ async function init() {
await updateReleasesSearch();
}
const actors = argv.actors && await scrapeActors(argv.actors);
const actorsFromFile = argv.actorsFile && await getFileEntries(argv.actorsFile);
const actorNames = (argv.actors || []).concat(actorsFromFile || []);
const actors = actorNames.length > 0 && await scrapeActors(actorNames);
const actorBaseScenes = argv.actors && argv.actorScenes && actors.map(actor => actor.releases).flat().filter(Boolean);
const updateBaseScenes = (argv.all || argv.channels || argv.networks) && await fetchUpdates();
const scenesFromFile = argv.scenesFile && await getFileEntries(argv.scenesFile);
const sceneUrls = (argv.scenes || []).concat(scenesFromFile || []);
const deepScenes = argv.deep
? await fetchScenes([...(argv.scenes || []), ...(updateBaseScenes || []), ...(actorBaseScenes || [])])
? await fetchScenes([...(sceneUrls), ...(updateBaseScenes || []), ...(actorBaseScenes || [])])
: [...(updateBaseScenes || []), ...(actorBaseScenes || [])];
const sceneMovies = deepScenes && argv.sceneMovies && deepScenes.map(scene => scene.movie).filter(Boolean);

View File

@ -30,6 +30,10 @@ const { argv } = yargs
type: 'array',
alias: 'actor',
})
.option('actors-file', {
describe: 'Scrape actors names from file',
type: 'string',
})
.option('actor-scenes', {
describe: 'Fetch all scenes for an actor',
type: 'boolean',
@ -53,10 +57,14 @@ const { argv } = yargs
alias: 'with-profiles',
default: false,
})
.option('scene', {
.option('scenes', {
describe: 'Scrape scene info from URL',
type: 'array',
alias: 'scenes',
alias: 'scene',
})
.option('scenes-file', {
describe: 'Scrape scene info from URLs in a file',
type: 'string',
})
.option('movie', {
describe: 'Scrape movie info from URL',

View File

@ -101,7 +101,7 @@ function toBaseSource(rawSource) {
if (rawSource.attempts) baseSource.attempts = rawSource.attempts;
if (rawSource.queueMethod) baseSource.queueMethod = rawSource.queueMethod;
if (rawSource.copyright) baseSource.copyright = rawSource.copyright;
if (rawSource.credit !== undefined) baseSource.credit = rawSource.credit;
if (rawSource.comment) baseSource.comment = rawSource.comment;
if (rawSource.group) baseSource.group = rawSource.group;
@ -569,7 +569,7 @@ function curateMediaEntry(media, index) {
source: media.src,
source_page: media.url,
scraper: media.scraper,
copyright: media.copyright,
credit: media.credit,
comment: media.comment,
};
@ -685,7 +685,7 @@ async function associateAvatars(profiles) {
? {
...profile,
avatarBaseMedia: toBaseMedias([profile.avatar], 'avatars', {
copyright: profile.network?.name || profile.site?.name || null,
credit: (profile.credit !== undefined && (profile.network?.name || profile.site?.name)) || null,
scraper: profile.scraper || null,
})[0],
}

View File

@ -69,7 +69,7 @@ function scrapeProfile(html) {
profile.avatar = {
src: `http://www.boobpedia.com${avatarPath}`,
copyright: null,
credit: null,
};
}

71
src/scrapers/fcuk.js Normal file
View File

@ -0,0 +1,71 @@
'use strict';
const qu = require('../utils/qu');
function scrapeLatest(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('a.more:not([href*="/join.php"])', 'href', { origin: channel.url });
if (release.url) {
release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-');
} else {
release.entryId = query.img('.bigthumb').match(/\/scenes\/(\w+)/)?.[1];
}
release.title = query.q('h5 strong', true)?.match(/. - (.+)$/)[1] || query.text('.videos h3');
release.description = query.text('p');
release.date = query.date('h5 strong, .videos h3', 'MMM. DD, YYYY', /\w+. \d{2}, \d{4}/);
release.poster = query.img('.bigthumb', null, { origin: channel.url });
release.photos = query.imgs('.smallthumb', null, { origin: channel.url });
release.tags = query.all('a[href*="/keywords"]', true);
return release;
});
}
function scrapeScene({ query }, url, channel) {
const release = {};
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-');
release.title = query.q('h4 strong, .videos h3', true);
release.description = query.q('#about p, .videos p', true);
const actors = query.urls('a[href*="/girl/"]').map(actorUrl => actorUrl.match(/video-(\w+)/)?.[1]).filter(Boolean);
if (actors.length > 0) {
release.actors = actors;
} else {
// release.actors = [query.q('.previewmed h5 strong', true)?.match(/^([\w\s]+),/)?.[0] || query.q('.videos h3', true)].filter(Boolean);
release.actors = [release.title];
}
release.tags = query.all('.info a[href*="/keywords"], .buttons a[href*="/keywords"]', true);
release.poster = query.img('#info .main-preview, .bigthumb', null, { origin: channel.url });
release.photos = [query.img('.previewmed img', null, { origin: channel.url })].concat(query.imgs('.hd-clip img, .smallthumb', null, { origin: channel.url })).filter(photo => photo);
return release;
}
async function fetchLatest(channel, page = 1) {
const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`;
const res = await qu.getAll(url, '.videos');
return res.ok ? scrapeLatest(res.items, channel) : res.status;
}
async function fetchScene(url, channel) {
const res = await qu.get(url);
return res.ok ? scrapeScene(res.item, url, channel) : res.status;
}
module.exports = {
fetchLatest,
fetchScene,
};

View File

@ -52,7 +52,7 @@ function scrapeProfile(html, actorName) {
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), el => el.href);
const avatar = document.querySelector('.profile-image-large img').src;
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, copyright: null };
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, credit: null };
return profile;
}

View File

@ -19,6 +19,7 @@ const evilangel = require('./evilangel');
const fakehub = require('./fakehub');
const famedigital = require('./famedigital');
const fantasymassage = require('./fantasymassage');
const fcuk = require('./fcuk');
const fullpornnetwork = require('./fullpornnetwork');
const girlsway = require('./girlsway');
const hush = require('./hush');
@ -93,6 +94,7 @@ module.exports = {
fakehub,
famedigital,
fantasymassage,
fcuk,
forbondage: porndoe,
fullpornnetwork,
girlsway,

16
src/utils/file-entries.js Normal file
View File

@ -0,0 +1,16 @@
'use strict';
const fs = require('fs');
async function getFileEntries(location) {
if (!location) {
throw new Error('No filepath provided');
}
const file = await fs.promises.readFile(location, 'utf-8');
const entries = file.split(/\n/).map(entry => entry.trim()).filter(Boolean);
return entries;
}
module.exports = getFileEntries;

View File

@ -125,7 +125,7 @@ function date(context, selector, format, match, attr = 'textContent') {
return extractDate(dateString, format, match);
}
function image(context, selector = 'img', attr, origin, protocol = 'https') {
function image(context, selector = 'img', attr, { origin, protocol = 'https' } = {}) {
const imageEl = (attr && q(context, selector, attr))
|| q(context, selector, 'data-src')
|| q(context, selector, 'src');
@ -133,7 +133,7 @@ function image(context, selector = 'img', attr, origin, protocol = 'https') {
return prefixUrl(imageEl, origin, protocol);
}
function images(context, selector = 'img', attr, origin, protocol = 'https') {
function images(context, selector = 'img', attr, { origin, protocol = 'https' } = {}) {
const attribute = attr
|| (q(context, selector, 'data-src') && 'data-src')
|| (q(context, selector, 'src') && 'src');
@ -143,31 +143,31 @@ function images(context, selector = 'img', attr, origin, protocol = 'https') {
return imageEls.map(imageEl => prefixUrl(imageEl, origin, protocol));
}
function url(context, selector = 'a', attr = 'href', origin, protocol = 'https') {
function url(context, selector = 'a', attr = 'href', { origin, protocol = 'https' } = {}) {
const urlEl = q(context, selector, attr);
return attr ? prefixUrl(urlEl, origin, protocol) : urlEl;
}
function urls(context, selector = 'a', attr = 'href', origin, protocol = 'https') {
function urls(context, selector = 'a', attr = 'href', { origin, protocol = 'https' } = {}) {
const urlEls = all(context, selector, attr);
return attr ? urlEls.map(urlEl => prefixUrl(urlEl, origin, protocol)) : urlEls;
}
function poster(context, selector = 'video', attr = 'poster', origin, protocol = 'https') {
function poster(context, selector = 'video', attr = 'poster', { origin, protocol = 'https' } = {}) {
const posterEl = q(context, selector, attr);
return attr ? prefixUrl(posterEl, origin, protocol) : posterEl;
}
function video(context, selector = 'source', attr = 'src', origin, protocol = 'https') {
function video(context, selector = 'source', attr = 'src', { origin, protocol = 'https' } = {}) {
const trailerEl = q(context, selector, attr);
return attr ? prefixUrl(trailerEl, origin, protocol) : trailerEl;
}
function videos(context, selector = 'source', attr = 'src', origin, protocol = 'https') {
function videos(context, selector = 'source', attr = 'src', { origin, protocol = 'https' } = {}) {
const trailerEls = all(context, selector, attr);
return attr ? trailerEls.map(trailerEl => prefixUrl(trailerEl, origin, protocol)) : trailerEls;