Added partial 'fcuk' (Exploited College Girls) scraper. Added file parameter for actor names and scene URLs.
|
@ -37,7 +37,7 @@ exports.up = knex => Promise.resolve()
|
|||
table.float('entropy');
|
||||
|
||||
table.text('scraper', 32);
|
||||
table.text('copyright', 100);
|
||||
table.text('credit', 100);
|
||||
|
||||
table.text('source', 2100);
|
||||
table.text('source_page', 2100);
|
||||
|
@ -279,6 +279,9 @@ exports.up = knex => Promise.resolve()
|
|||
table.integer('hip', 3);
|
||||
table.boolean('natural_boobs');
|
||||
|
||||
table.integer('penis_length', 3);
|
||||
table.integer('penis_girth', 3);
|
||||
|
||||
table.integer('height', 3);
|
||||
table.integer('weight', 3);
|
||||
table.text('eyes');
|
||||
|
@ -349,6 +352,9 @@ exports.up = knex => Promise.resolve()
|
|||
table.integer('hip', 3);
|
||||
table.boolean('natural_boobs');
|
||||
|
||||
table.integer('penis_length', 3);
|
||||
table.integer('penis_girth', 3);
|
||||
|
||||
table.integer('height', 3);
|
||||
table.integer('weight', 3);
|
||||
table.text('eyes');
|
||||
|
|
After Width: | Height: | Size: 33 KiB |
After Width: | Height: | Size: 11 KiB |
After Width: | Height: | Size: 108 KiB |
After Width: | Height: | Size: 68 KiB |
After Width: | Height: | Size: 29 KiB |
After Width: | Height: | Size: 932 B |
After Width: | Height: | Size: 2.3 KiB |
After Width: | Height: | Size: 3.0 KiB |
After Width: | Height: | Size: 1.9 KiB |
After Width: | Height: | Size: 3.1 KiB |
After Width: | Height: | Size: 9.9 KiB |
After Width: | Height: | Size: 7.8 KiB |
After Width: | Height: | Size: 2.1 KiB |
After Width: | Height: | Size: 3.6 KiB |
After Width: | Height: | Size: 2.1 KiB |
After Width: | Height: | Size: 5.1 KiB |
After Width: | Height: | Size: 2.2 KiB |
After Width: | Height: | Size: 33 KiB |
After Width: | Height: | Size: 33 KiB |
After Width: | Height: | Size: 15 KiB |
After Width: | Height: | Size: 94 KiB |
After Width: | Height: | Size: 68 KiB |
After Width: | Height: | Size: 29 KiB |
After Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 37 KiB |
After Width: | Height: | Size: 37 KiB |
After Width: | Height: | Size: 80 KiB |
After Width: | Height: | Size: 2.3 KiB |
After Width: | Height: | Size: 170 KiB |
After Width: | Height: | Size: 37 KiB |
After Width: | Height: | Size: 24 KiB |
After Width: | Height: | Size: 8.7 KiB |
After Width: | Height: | Size: 31 KiB |
After Width: | Height: | Size: 33 KiB |
After Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 3.5 KiB |
After Width: | Height: | Size: 49 KiB |
After Width: | Height: | Size: 3.5 KiB |
After Width: | Height: | Size: 84 KiB |
After Width: | Height: | Size: 16 KiB |
|
@ -139,6 +139,10 @@ const networks = [
|
|||
url: 'https://www.cherrypimps.com',
|
||||
description: 'CherryPimps your premium porn site to Download and Stream the hottest and most exclusive 4K HD videos and pictures on your phone, tablet, TV or console.',
|
||||
},
|
||||
{
|
||||
slug: 'fcuk',
|
||||
name: 'Fcuk',
|
||||
},
|
||||
{
|
||||
slug: 'freeones',
|
||||
name: 'FreeOnes',
|
||||
|
|
|
@ -2085,6 +2085,60 @@ const sites = [
|
|||
url: 'https://www.freeones.com',
|
||||
parent: 'freeones',
|
||||
},
|
||||
// FCUK
|
||||
{
|
||||
name: 'Exploited College Girls',
|
||||
slug: 'exploitedcollegegirls',
|
||||
alias: ['excogi', 'ecg'],
|
||||
url: 'https://exploitedcollegegirls.com',
|
||||
parent: 'fcuk',
|
||||
parameters: {
|
||||
blog: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'Backroom Casting Couch',
|
||||
slug: 'backroomcastingcouch',
|
||||
url: 'https://backroomcastingcouch.com',
|
||||
parent: 'fcuk',
|
||||
parameters: {
|
||||
blog: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'Black Ambush',
|
||||
slug: 'blackambush',
|
||||
alias: ['interracial', 'bbc'],
|
||||
url: 'https://blackambush.com',
|
||||
parent: 'fcuk',
|
||||
parameters: {
|
||||
blog: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'Nebraska Coeds',
|
||||
slug: 'nebraskacoeds',
|
||||
url: 'https://nebraskacoeds.com',
|
||||
parent: 'fcuk',
|
||||
},
|
||||
{
|
||||
name: 'South Beach Coeds',
|
||||
slug: 'southbeachcoeds',
|
||||
url: 'https://southbeachcoeds.com',
|
||||
parent: 'fcuk',
|
||||
},
|
||||
{
|
||||
name: 'Spring Break Life',
|
||||
slug: 'springbreaklife',
|
||||
url: 'https://springbreaklife.com',
|
||||
parent: 'fcuk',
|
||||
},
|
||||
{
|
||||
name: 'Euro Coeds',
|
||||
slug: 'eurocoeds',
|
||||
url: 'https://eurocoeds.com',
|
||||
parent: 'fcuk',
|
||||
},
|
||||
// FOR BONDAGE
|
||||
{
|
||||
name: 'Crowd Bondage',
|
||||
|
|
|
@ -571,7 +571,7 @@ const sfw = Object.entries({
|
|||
['iFBIdX54BOk', 'Keagan Henman'],
|
||||
],
|
||||
})
|
||||
.map(([category, photos]) => photos.map(([photo, copyright], index) => ({
|
||||
.map(([category, photos]) => photos.map(([photo, credit], index) => ({
|
||||
id: photo,
|
||||
path: `sfw/${category}/${photo}.jpeg`,
|
||||
thumbnail: `sfw/${category}/thumbs/${photo}.jpeg`,
|
||||
|
@ -580,8 +580,7 @@ const sfw = Object.entries({
|
|||
sfw_media_id: null,
|
||||
group: category,
|
||||
index,
|
||||
copyright,
|
||||
comment: `Courtesy of ${copyright}`,
|
||||
credit,
|
||||
})))
|
||||
.flat();
|
||||
|
||||
|
|
11
src/app.js
|
@ -10,6 +10,7 @@ const fetchUpdates = require('./updates');
|
|||
const { fetchScenes, fetchMovies } = require('./deep');
|
||||
const { storeReleases, updateReleasesSearch } = require('./store-releases');
|
||||
const { scrapeActors } = require('./actors');
|
||||
const getFileEntries = require('./utils/file-entries');
|
||||
|
||||
async function init() {
|
||||
if (argv.server) {
|
||||
|
@ -21,13 +22,19 @@ async function init() {
|
|||
await updateReleasesSearch();
|
||||
}
|
||||
|
||||
const actors = argv.actors && await scrapeActors(argv.actors);
|
||||
const actorsFromFile = argv.actorsFile && await getFileEntries(argv.actorsFile);
|
||||
const actorNames = (argv.actors || []).concat(actorsFromFile || []);
|
||||
|
||||
const actors = actorNames.length > 0 && await scrapeActors(actorNames);
|
||||
const actorBaseScenes = argv.actors && argv.actorScenes && actors.map(actor => actor.releases).flat().filter(Boolean);
|
||||
|
||||
const updateBaseScenes = (argv.all || argv.channels || argv.networks) && await fetchUpdates();
|
||||
|
||||
const scenesFromFile = argv.scenesFile && await getFileEntries(argv.scenesFile);
|
||||
const sceneUrls = (argv.scenes || []).concat(scenesFromFile || []);
|
||||
|
||||
const deepScenes = argv.deep
|
||||
? await fetchScenes([...(argv.scenes || []), ...(updateBaseScenes || []), ...(actorBaseScenes || [])])
|
||||
? await fetchScenes([...(sceneUrls), ...(updateBaseScenes || []), ...(actorBaseScenes || [])])
|
||||
: [...(updateBaseScenes || []), ...(actorBaseScenes || [])];
|
||||
|
||||
const sceneMovies = deepScenes && argv.sceneMovies && deepScenes.map(scene => scene.movie).filter(Boolean);
|
||||
|
|
12
src/argv.js
|
@ -30,6 +30,10 @@ const { argv } = yargs
|
|||
type: 'array',
|
||||
alias: 'actor',
|
||||
})
|
||||
.option('actors-file', {
|
||||
describe: 'Scrape actors names from file',
|
||||
type: 'string',
|
||||
})
|
||||
.option('actor-scenes', {
|
||||
describe: 'Fetch all scenes for an actor',
|
||||
type: 'boolean',
|
||||
|
@ -53,10 +57,14 @@ const { argv } = yargs
|
|||
alias: 'with-profiles',
|
||||
default: false,
|
||||
})
|
||||
.option('scene', {
|
||||
.option('scenes', {
|
||||
describe: 'Scrape scene info from URL',
|
||||
type: 'array',
|
||||
alias: 'scenes',
|
||||
alias: 'scene',
|
||||
})
|
||||
.option('scenes-file', {
|
||||
describe: 'Scrape scene info from URLs in a file',
|
||||
type: 'string',
|
||||
})
|
||||
.option('movie', {
|
||||
describe: 'Scrape movie info from URL',
|
||||
|
|
|
@ -101,7 +101,7 @@ function toBaseSource(rawSource) {
|
|||
if (rawSource.attempts) baseSource.attempts = rawSource.attempts;
|
||||
if (rawSource.queueMethod) baseSource.queueMethod = rawSource.queueMethod;
|
||||
|
||||
if (rawSource.copyright) baseSource.copyright = rawSource.copyright;
|
||||
if (rawSource.credit !== undefined) baseSource.credit = rawSource.credit;
|
||||
if (rawSource.comment) baseSource.comment = rawSource.comment;
|
||||
if (rawSource.group) baseSource.group = rawSource.group;
|
||||
|
||||
|
@ -569,7 +569,7 @@ function curateMediaEntry(media, index) {
|
|||
source: media.src,
|
||||
source_page: media.url,
|
||||
scraper: media.scraper,
|
||||
copyright: media.copyright,
|
||||
credit: media.credit,
|
||||
comment: media.comment,
|
||||
};
|
||||
|
||||
|
@ -685,7 +685,7 @@ async function associateAvatars(profiles) {
|
|||
? {
|
||||
...profile,
|
||||
avatarBaseMedia: toBaseMedias([profile.avatar], 'avatars', {
|
||||
copyright: profile.network?.name || profile.site?.name || null,
|
||||
credit: (profile.credit !== undefined && (profile.network?.name || profile.site?.name)) || null,
|
||||
scraper: profile.scraper || null,
|
||||
})[0],
|
||||
}
|
||||
|
|
|
@ -69,7 +69,7 @@ function scrapeProfile(html) {
|
|||
|
||||
profile.avatar = {
|
||||
src: `http://www.boobpedia.com${avatarPath}`,
|
||||
copyright: null,
|
||||
credit: null,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
'use strict';
|
||||
|
||||
const qu = require('../utils/qu');
|
||||
|
||||
function scrapeLatest(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('a.more:not([href*="/join.php"])', 'href', { origin: channel.url });
|
||||
|
||||
if (release.url) {
|
||||
release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-');
|
||||
} else {
|
||||
release.entryId = query.img('.bigthumb').match(/\/scenes\/(\w+)/)?.[1];
|
||||
}
|
||||
|
||||
release.title = query.q('h5 strong', true)?.match(/. - (.+)$/)[1] || query.text('.videos h3');
|
||||
release.description = query.text('p');
|
||||
release.date = query.date('h5 strong, .videos h3', 'MMM. DD, YYYY', /\w+. \d{2}, \d{4}/);
|
||||
|
||||
release.poster = query.img('.bigthumb', null, { origin: channel.url });
|
||||
release.photos = query.imgs('.smallthumb', null, { origin: channel.url });
|
||||
|
||||
release.tags = query.all('a[href*="/keywords"]', true);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, url, channel) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-');
|
||||
|
||||
release.title = query.q('h4 strong, .videos h3', true);
|
||||
release.description = query.q('#about p, .videos p', true);
|
||||
|
||||
const actors = query.urls('a[href*="/girl/"]').map(actorUrl => actorUrl.match(/video-(\w+)/)?.[1]).filter(Boolean);
|
||||
|
||||
if (actors.length > 0) {
|
||||
release.actors = actors;
|
||||
} else {
|
||||
// release.actors = [query.q('.previewmed h5 strong', true)?.match(/^([\w\s]+),/)?.[0] || query.q('.videos h3', true)].filter(Boolean);
|
||||
release.actors = [release.title];
|
||||
}
|
||||
|
||||
release.tags = query.all('.info a[href*="/keywords"], .buttons a[href*="/keywords"]', true);
|
||||
|
||||
release.poster = query.img('#info .main-preview, .bigthumb', null, { origin: channel.url });
|
||||
release.photos = [query.img('.previewmed img', null, { origin: channel.url })].concat(query.imgs('.hd-clip img, .smallthumb', null, { origin: channel.url })).filter(photo => photo);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`;
|
||||
const res = await qu.getAll(url, '.videos');
|
||||
|
||||
return res.ok ? scrapeLatest(res.items, channel) : res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel) {
|
||||
const res = await qu.get(url);
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, channel) : res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
|
@ -52,7 +52,7 @@ function scrapeProfile(html, actorName) {
|
|||
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), el => el.href);
|
||||
|
||||
const avatar = document.querySelector('.profile-image-large img').src;
|
||||
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, copyright: null };
|
||||
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, credit: null };
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ const evilangel = require('./evilangel');
|
|||
const fakehub = require('./fakehub');
|
||||
const famedigital = require('./famedigital');
|
||||
const fantasymassage = require('./fantasymassage');
|
||||
const fcuk = require('./fcuk');
|
||||
const fullpornnetwork = require('./fullpornnetwork');
|
||||
const girlsway = require('./girlsway');
|
||||
const hush = require('./hush');
|
||||
|
@ -93,6 +94,7 @@ module.exports = {
|
|||
fakehub,
|
||||
famedigital,
|
||||
fantasymassage,
|
||||
fcuk,
|
||||
forbondage: porndoe,
|
||||
fullpornnetwork,
|
||||
girlsway,
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
'use strict';
|
||||
|
||||
const fs = require('fs');
|
||||
|
||||
async function getFileEntries(location) {
|
||||
if (!location) {
|
||||
throw new Error('No filepath provided');
|
||||
}
|
||||
|
||||
const file = await fs.promises.readFile(location, 'utf-8');
|
||||
const entries = file.split(/\n/).map(entry => entry.trim()).filter(Boolean);
|
||||
|
||||
return entries;
|
||||
}
|
||||
|
||||
module.exports = getFileEntries;
|
|
@ -125,7 +125,7 @@ function date(context, selector, format, match, attr = 'textContent') {
|
|||
return extractDate(dateString, format, match);
|
||||
}
|
||||
|
||||
function image(context, selector = 'img', attr, origin, protocol = 'https') {
|
||||
function image(context, selector = 'img', attr, { origin, protocol = 'https' } = {}) {
|
||||
const imageEl = (attr && q(context, selector, attr))
|
||||
|| q(context, selector, 'data-src')
|
||||
|| q(context, selector, 'src');
|
||||
|
@ -133,7 +133,7 @@ function image(context, selector = 'img', attr, origin, protocol = 'https') {
|
|||
return prefixUrl(imageEl, origin, protocol);
|
||||
}
|
||||
|
||||
function images(context, selector = 'img', attr, origin, protocol = 'https') {
|
||||
function images(context, selector = 'img', attr, { origin, protocol = 'https' } = {}) {
|
||||
const attribute = attr
|
||||
|| (q(context, selector, 'data-src') && 'data-src')
|
||||
|| (q(context, selector, 'src') && 'src');
|
||||
|
@ -143,31 +143,31 @@ function images(context, selector = 'img', attr, origin, protocol = 'https') {
|
|||
return imageEls.map(imageEl => prefixUrl(imageEl, origin, protocol));
|
||||
}
|
||||
|
||||
function url(context, selector = 'a', attr = 'href', origin, protocol = 'https') {
|
||||
function url(context, selector = 'a', attr = 'href', { origin, protocol = 'https' } = {}) {
|
||||
const urlEl = q(context, selector, attr);
|
||||
|
||||
return attr ? prefixUrl(urlEl, origin, protocol) : urlEl;
|
||||
}
|
||||
|
||||
function urls(context, selector = 'a', attr = 'href', origin, protocol = 'https') {
|
||||
function urls(context, selector = 'a', attr = 'href', { origin, protocol = 'https' } = {}) {
|
||||
const urlEls = all(context, selector, attr);
|
||||
|
||||
return attr ? urlEls.map(urlEl => prefixUrl(urlEl, origin, protocol)) : urlEls;
|
||||
}
|
||||
|
||||
function poster(context, selector = 'video', attr = 'poster', origin, protocol = 'https') {
|
||||
function poster(context, selector = 'video', attr = 'poster', { origin, protocol = 'https' } = {}) {
|
||||
const posterEl = q(context, selector, attr);
|
||||
|
||||
return attr ? prefixUrl(posterEl, origin, protocol) : posterEl;
|
||||
}
|
||||
|
||||
function video(context, selector = 'source', attr = 'src', origin, protocol = 'https') {
|
||||
function video(context, selector = 'source', attr = 'src', { origin, protocol = 'https' } = {}) {
|
||||
const trailerEl = q(context, selector, attr);
|
||||
|
||||
return attr ? prefixUrl(trailerEl, origin, protocol) : trailerEl;
|
||||
}
|
||||
|
||||
function videos(context, selector = 'source', attr = 'src', origin, protocol = 'https') {
|
||||
function videos(context, selector = 'source', attr = 'src', { origin, protocol = 'https' } = {}) {
|
||||
const trailerEls = all(context, selector, attr);
|
||||
|
||||
return attr ? trailerEls.map(trailerEl => prefixUrl(trailerEl, origin, protocol)) : trailerEls;
|
||||
|
|