Added default deep scrape fetch method. Added Karups scene and profile scraper. Added schoolgirl tag photo.
After Width: | Height: | Size: 1.5 KiB |
After Width: | Height: | Size: 5.9 KiB |
After Width: | Height: | Size: 2.5 KiB |
After Width: | Height: | Size: 2.5 KiB |
After Width: | Height: | Size: 5.9 KiB |
After Width: | Height: | Size: 8.2 KiB |
After Width: | Height: | Size: 5.9 KiB |
After Width: | Height: | Size: 6.8 KiB |
After Width: | Height: | Size: 6.8 KiB |
After Width: | Height: | Size: 757 KiB |
After Width: | Height: | Size: 844 KiB |
After Width: | Height: | Size: 530 KiB |
After Width: | Height: | Size: 6.3 KiB |
After Width: | Height: | Size: 6.7 KiB |
After Width: | Height: | Size: 6.1 KiB |
After Width: | Height: | Size: 359 KiB |
After Width: | Height: | Size: 383 KiB |
After Width: | Height: | Size: 321 KiB |
Before Width: | Height: | Size: 37 KiB After Width: | Height: | Size: 29 KiB |
After Width: | Height: | Size: 29 KiB |
After Width: | Height: | Size: 31 KiB |
After Width: | Height: | Size: 26 KiB |
|
@ -299,6 +299,11 @@ const networks = [
|
||||||
name: 'Jules Jordan',
|
name: 'Jules Jordan',
|
||||||
url: 'https://www.julesjordan.com',
|
url: 'https://www.julesjordan.com',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
slug: 'karups',
|
||||||
|
name: 'Karups',
|
||||||
|
url: 'https://www.karups.com',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
slug: 'kellymadison',
|
slug: 'kellymadison',
|
||||||
name: 'Kelly Madison Media',
|
name: 'Kelly Madison Media',
|
||||||
|
|
|
@ -3562,6 +3562,31 @@ const sites = [
|
||||||
tags: ['lesbian'],
|
tags: ['lesbian'],
|
||||||
parent: 'julesjordan',
|
parent: 'julesjordan',
|
||||||
},
|
},
|
||||||
|
// KARUPS
|
||||||
|
{
|
||||||
|
slug: 'karupsprivatecollection',
|
||||||
|
name: 'Private Collection',
|
||||||
|
alias: ['kpc'],
|
||||||
|
url: 'https://www.karups.com/site/kpc',
|
||||||
|
hasLogo: false,
|
||||||
|
parent: 'karups',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
slug: 'karupshometownamateurs',
|
||||||
|
name: 'Hometown Amateurs',
|
||||||
|
alias: ['kha'],
|
||||||
|
url: 'https://www.karups.com/site/kha',
|
||||||
|
hasLogo: false,
|
||||||
|
parent: 'karups',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
slug: 'karupsolderwomen',
|
||||||
|
name: 'Older Women',
|
||||||
|
alias: ['kow'],
|
||||||
|
url: 'https://www.karups.com/site/kow',
|
||||||
|
hasLogo: false,
|
||||||
|
parent: 'karups',
|
||||||
|
},
|
||||||
// KELLY MADISON MEDIA
|
// KELLY MADISON MEDIA
|
||||||
{
|
{
|
||||||
slug: 'teenfidelity',
|
slug: 'teenfidelity',
|
||||||
|
|
|
@ -655,7 +655,7 @@ const tagPosters = [
|
||||||
['pussy-eating', 5, 'Claudia Macc and Victoria Pure for Euro Girls On Girls'],
|
['pussy-eating', 5, 'Claudia Macc and Victoria Pure for Euro Girls On Girls'],
|
||||||
['redhead', 1, 'Lacy Lennon for Wicked'],
|
['redhead', 1, 'Lacy Lennon for Wicked'],
|
||||||
['squirting', 0, 'Veronica Rodriguez in "Hot Latina Squirting" for Jules Jordan'],
|
['squirting', 0, 'Veronica Rodriguez in "Hot Latina Squirting" for Jules Jordan'],
|
||||||
['schoolgirl', 1, 'Eliza Ibarra for Brazzers'],
|
['schoolgirl', 2, 'Cindy Shine in "Schoolgirl Stars in Interracial DP Threesome" for Private'],
|
||||||
['swallowing', 'poster'],
|
['swallowing', 'poster'],
|
||||||
['tattoos', 1, 'Joanna Angel for Joanna Angel'],
|
['tattoos', 1, 'Joanna Angel for Joanna Angel'],
|
||||||
['teen', 0, 'Alexa Flexy for Sensual Girl'],
|
['teen', 0, 'Alexa Flexy for Sensual Girl'],
|
||||||
|
@ -872,6 +872,7 @@ const tagPhotos = [
|
||||||
['pussy-eating', 3, 'Kylie Page and Kalina Ryu in "Training My Masseuse" for All Girl Massage'],
|
['pussy-eating', 3, 'Kylie Page and Kalina Ryu in "Training My Masseuse" for All Girl Massage'],
|
||||||
['pussy-eating', 1, 'Anikka Albrite and Riley Reid for In The Crack'],
|
['pussy-eating', 1, 'Anikka Albrite and Riley Reid for In The Crack'],
|
||||||
['redhead', 0, 'Penny Pax in "The Submission of Emma Marx: Boundaries" for New Sensations'],
|
['redhead', 0, 'Penny Pax in "The Submission of Emma Marx: Boundaries" for New Sensations'],
|
||||||
|
['schoolgirl', 1, 'Eliza Ibarra for Brazzers'],
|
||||||
['squirting', 1, 'Abella Danger and Karma Rx in "Neon Dreaming" for Brazzers'],
|
['squirting', 1, 'Abella Danger and Karma Rx in "Neon Dreaming" for Brazzers'],
|
||||||
['tattoos', 0, 'Tigerlilly in "Wrapped In Blue" for Suicide Girls'],
|
['tattoos', 0, 'Tigerlilly in "Wrapped In Blue" for Suicide Girls'],
|
||||||
['teen', 1, 'Stalfra aka Precious for Nubiles'],
|
['teen', 1, 'Stalfra aka Precious for Nubiles'],
|
||||||
|
|
25
src/deep.js
|
@ -7,6 +7,7 @@ const argv = require('./argv');
|
||||||
const include = require('./utils/argv-include')(argv);
|
const include = require('./utils/argv-include')(argv);
|
||||||
const logger = require('./logger')(__filename);
|
const logger = require('./logger')(__filename);
|
||||||
const knex = require('./knex');
|
const knex = require('./knex');
|
||||||
|
const qu = require('./utils/qu');
|
||||||
const scrapers = require('./scrapers/scrapers');
|
const scrapers = require('./scrapers/scrapers');
|
||||||
|
|
||||||
function urlToSiteSlug(url) {
|
function urlToSiteSlug(url) {
|
||||||
|
@ -87,6 +88,24 @@ function toBaseReleases(baseReleasesOrUrls, entity = null) {
|
||||||
.filter(Boolean);
|
.filter(Boolean);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function fetchScene(scraper, url, entity, baseRelease, options) {
|
||||||
|
if (scraper.fetchScene) {
|
||||||
|
return scraper.fetchScene(baseRelease.url, entity, baseRelease, options, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scraper.scrapeScene) {
|
||||||
|
const res = await qu.get(url);
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return scraper.scrapeScene(res.item, url, entity, baseRelease, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
async function scrapeRelease(baseRelease, entities, type = 'scene') {
|
async function scrapeRelease(baseRelease, entities, type = 'scene') {
|
||||||
const entity = baseRelease.entity || entities[urlToSiteSlug(baseRelease.url)];
|
const entity = baseRelease.entity || entities[urlToSiteSlug(baseRelease.url)];
|
||||||
|
|
||||||
|
@ -110,8 +129,8 @@ async function scrapeRelease(baseRelease, entities, type = 'scene') {
|
||||||
return baseRelease;
|
return baseRelease;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((type === 'scene' && !layoutScraper.fetchScene) || (type === 'movie' && !layoutScraper.fetchMovie)) {
|
if ((type === 'scene' && !layoutScraper.fetchScene && !layoutScraper.scrapeScene) || (type === 'movie' && !layoutScraper.fetchMovie)) {
|
||||||
logger.warn(`The '${entity.name}'-scraper cannot fetch individual ${type}s`);
|
logger.warn(`The '${entity.name}'-scraper cannot scrape individual ${type}s`);
|
||||||
return baseRelease;
|
return baseRelease;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -119,7 +138,7 @@ async function scrapeRelease(baseRelease, entities, type = 'scene') {
|
||||||
logger.verbose(`Fetching ${type} ${baseRelease.url}`);
|
logger.verbose(`Fetching ${type} ${baseRelease.url}`);
|
||||||
|
|
||||||
const scrapedRelease = type === 'scene'
|
const scrapedRelease = type === 'scene'
|
||||||
? await layoutScraper.fetchScene(baseRelease.url, entity, baseRelease, include, null)
|
? await fetchScene(scraper, baseRelease.url, entity, baseRelease, include, null)
|
||||||
: await layoutScraper.fetchMovie(baseRelease.url, entity, baseRelease, include, null);
|
: await layoutScraper.fetchMovie(baseRelease.url, entity, baseRelease, include, null);
|
||||||
|
|
||||||
const mergedRelease = {
|
const mergedRelease = {
|
||||||
|
|
|
@ -0,0 +1,98 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const qu = require('../utils/qu');
|
||||||
|
const slugify = require('../utils/slugify');
|
||||||
|
|
||||||
|
const channelSlugs = {
|
||||||
|
kpc: 'karupsprivatecollection',
|
||||||
|
kha: 'karupshometownamateurs',
|
||||||
|
kow: 'karupsolderwomen',
|
||||||
|
};
|
||||||
|
|
||||||
|
function scrapeAll(scenes) {
|
||||||
|
return scenes.map(({ query }) => {
|
||||||
|
const release = {};
|
||||||
|
|
||||||
|
release.url = query.url('a');
|
||||||
|
release.entryId = new URL(release.url).pathname.match(/(\d+)\.html/)?.[1];
|
||||||
|
|
||||||
|
release.title = query.cnt('.title');
|
||||||
|
release.date = query.date('.date', 'MMM Do, YYYY');
|
||||||
|
|
||||||
|
release.channel = channelSlugs[query.cnt('.site')];
|
||||||
|
|
||||||
|
release.poster = query.img('.thumb img');
|
||||||
|
|
||||||
|
return release;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeScene({ query }, url) {
|
||||||
|
const release = {};
|
||||||
|
|
||||||
|
release.entryId = new URL(url).pathname.match(/(\d+)\.html/)?.[1];
|
||||||
|
|
||||||
|
release.title = query.cnt('.title');
|
||||||
|
release.date = query.date('.date .content', 'MMM Do, YYYY');
|
||||||
|
|
||||||
|
release.actors = query.all('.models .content a').map(modelEl => ({
|
||||||
|
name: query.cnt(modelEl),
|
||||||
|
url: query.url(modelEl, null),
|
||||||
|
}));
|
||||||
|
|
||||||
|
release.poster = query.poster();
|
||||||
|
release.photos = query.imgs('.video-thumbs img').slice(1);
|
||||||
|
|
||||||
|
release.trailer = query.video();
|
||||||
|
|
||||||
|
return release;
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeProfile({ query }, entity) {
|
||||||
|
const profile = {};
|
||||||
|
|
||||||
|
profile.gender = 'female';
|
||||||
|
|
||||||
|
profile.avatar = query.img('.model-thumb img[src*=".jpg"]');
|
||||||
|
profile.scenes = scrapeAll(qu.initAll(query.all('.listing-videos .item')), entity);
|
||||||
|
|
||||||
|
return profile;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchLatest(channel, page) {
|
||||||
|
const res = await qu.getAll(`${channel.url}/videos/page${page}.html`, '.listing-videos .item');
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return scrapeAll(res.items, channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchProfile(baseActor, entity) {
|
||||||
|
const searchRes = await qu.getAll(`https://www.karups.com/models/search/${baseActor.slug}/`, '.listing-models .item');
|
||||||
|
|
||||||
|
if (!searchRes.ok) {
|
||||||
|
return searchRes.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
const actorUrl = searchRes.items.find(item => slugify(item.query.cnt('.title')) === baseActor.slug)?.query.url('a');
|
||||||
|
|
||||||
|
if (!actorUrl) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const actorRes = await qu.get(actorUrl);
|
||||||
|
|
||||||
|
if (actorRes.ok) {
|
||||||
|
return scrapeProfile(actorRes.item, entity);
|
||||||
|
}
|
||||||
|
|
||||||
|
return actorRes.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
fetchLatest,
|
||||||
|
fetchProfile,
|
||||||
|
scrapeScene,
|
||||||
|
};
|
|
@ -35,6 +35,7 @@ const inthecrack = require('./inthecrack');
|
||||||
const jayrock = require('./jayrock');
|
const jayrock = require('./jayrock');
|
||||||
const jesseloadsmonsterfacials = require('./jesseloadsmonsterfacials');
|
const jesseloadsmonsterfacials = require('./jesseloadsmonsterfacials');
|
||||||
const julesjordan = require('./julesjordan');
|
const julesjordan = require('./julesjordan');
|
||||||
|
const karups = require('./karups');
|
||||||
const kellymadison = require('./kellymadison');
|
const kellymadison = require('./kellymadison');
|
||||||
const killergram = require('./killergram');
|
const killergram = require('./killergram');
|
||||||
const kink = require('./kink');
|
const kink = require('./kink');
|
||||||
|
@ -127,6 +128,7 @@ const scrapers = {
|
||||||
jayrock,
|
jayrock,
|
||||||
jesseloadsmonsterfacials,
|
jesseloadsmonsterfacials,
|
||||||
julesjordan,
|
julesjordan,
|
||||||
|
karups,
|
||||||
kellymadison,
|
kellymadison,
|
||||||
killergram,
|
killergram,
|
||||||
kink,
|
kink,
|
||||||
|
@ -226,6 +228,7 @@ const scrapers = {
|
||||||
inthecrack,
|
inthecrack,
|
||||||
jamesdeen: fullpornnetwork,
|
jamesdeen: fullpornnetwork,
|
||||||
julesjordan,
|
julesjordan,
|
||||||
|
karups,
|
||||||
kellymadison,
|
kellymadison,
|
||||||
killergram,
|
killergram,
|
||||||
kink,
|
kink,
|
||||||
|
|