Added series as channels with logos and photo album scraping to Little Caprice. Added various tag photos.

This commit is contained in:
DebaucheryLibrarian 2020-11-24 04:29:44 +01:00
parent 711a9441a6
commit 980efbc93d
66 changed files with 172 additions and 19 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.9 KiB

After

Width:  |  Height:  |  Size: 5.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 99 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 74 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.9 KiB

After

Width:  |  Height:  |  Size: 5.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 76 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.9 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 471 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 640 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 252 KiB

After

Width:  |  Height:  |  Size: 4.9 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 252 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.4 KiB

After

Width:  |  Height:  |  Size: 4.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.7 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 550 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 839 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.5 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

View File

@ -842,6 +842,11 @@ const tags = [
group: 'finish', group: 'finish',
priority: 6, priority: 6,
}, },
{
name: 'swinging',
slug: 'swinging',
group: 'group',
},
{ {
name: 'tattoos', name: 'tattoos',
slug: 'tattoos', slug: 'tattoos',
@ -1726,6 +1731,10 @@ const aliases = [
name: 'swallow', name: 'swallow',
for: 'swallowing', for: 'swallowing',
}, },
{
name: 'swingers',
for: 'swinging',
},
{ {
name: 'strap-on', name: 'strap-on',
for: 'strap-on-dildo', for: 'strap-on-dildo',

View File

@ -314,6 +314,11 @@ const networks = [
url: 'https://letsdoeit.com', url: 'https://letsdoeit.com',
parent: 'porndoe', parent: 'porndoe',
}, },
{
slug: 'littlecapricedreams',
name: 'Little Caprice Dreams',
url: 'https://www.littlecaprice-dreams.com',
},
{ {
slug: 'mamacitaz', slug: 'mamacitaz',
name: 'Mamacitaz', name: 'Mamacitaz',

View File

@ -4226,12 +4226,83 @@ const sites = [
parent: 'letsdoeit', parent: 'letsdoeit',
}, },
// LITTLE CAPRICE // LITTLE CAPRICE
/*
{ {
name: 'Little Caprice Dreams', name: 'Little Caprice Dreams',
slug: 'littlecapricedreams', slug: 'littlecapricedreams',
alias: ['little caprice'], alias: ['little caprice'],
url: 'https://www.littlecaprice-dreams.com', url: 'https://www.littlecaprice-dreams.com',
}, },
*/
{
name: 'Caprice Casting',
slug: 'capricecasting',
url: 'https://www.littlecaprice-dreams.com/caprice-casting',
parent: 'littlecapricedreams',
},
{
name: 'Caprice Divas',
slug: 'capricedivas',
url: 'https://www.littlecaprice-dreams.com/caprice-divas',
parent: 'littlecapricedreams',
},
{
name: 'Nassty',
slug: 'nassty',
url: 'https://www.littlecaprice-dreams.com/nassty',
parent: 'littlecapricedreams',
},
{
name: 'POV Dreams',
slug: 'povdreams',
url: 'https://www.littlecaprice-dreams.com/pov-dreams',
parent: 'littlecapricedreams',
},
{
name: 'Porn Lifestyle',
slug: 'pornlifestyle',
url: 'https://www.littlecaprice-dreams.com/porn-lifestyle',
parent: 'littlecapricedreams',
},
{
name: 'Public Sex',
slug: 'publicsex',
url: 'https://www.littlecaprice-dreams.com/public-sex',
parent: 'littlecapricedreams',
},
{
name: 'Super Private X',
slug: 'superprivatex',
url: 'https://www.littlecaprice-dreams.com/superprivatex',
parent: 'littlecapricedreams',
},
{
name: 'Sex Lessons',
slug: 'sexlessons',
url: 'https://www.littlecaprice-dreams.com/sexlessons',
parent: 'littlecapricedreams',
},
{
name: 'Virtual Reality',
slug: 'littlecapricevr',
url: 'https://www.littlecaprice-dreams.com/virtual-reality-little-caprice',
tags: ['virtual-reality'],
hasLogo: false,
parent: 'littlecapricedreams',
},
{
name: 'We Cum To You',
slug: 'wecumtoyou',
url: 'https://www.littlecaprice-dreams.com/wecumtoyou-swingers',
tags: ['swinging', 'orgy'],
parent: 'littlecapricedreams',
},
{
name: 'Xpervo',
slug: 'xpervo',
url: 'https://www.littlecaprice-dreams.com/xpervo',
parent: 'littlecapricedreams',
},
// MAMACITAZ // MAMACITAZ
{ {
name: 'Her Big Ass', name: 'Her Big Ass',

View File

@ -792,16 +792,18 @@ const tagPhotos = [
['facefucking', 3, 'Adriana Chechik in "Performing Magic Butt Tricks With Jules Jordan. What Will Disappear In Her Ass?" for Jules Jordan'], ['facefucking', 3, 'Adriana Chechik in "Performing Magic Butt Tricks With Jules Jordan. What Will Disappear In Her Ass?" for Jules Jordan'],
['fake-boobs', 14, 'Rikki Six for Dream Dolls'], ['fake-boobs', 14, 'Rikki Six for Dream Dolls'],
['fake-boobs', 2, 'Gia Milana in "Hot Anal Latina" for HardX'], ['fake-boobs', 2, 'Gia Milana in "Hot Anal Latina" for HardX'],
['fake-boobs', 17, 'Felina in "With Flors On The Floor" for LouisDeMirabert'],
['fake-boobs', 18, 'Ebony Godess for Action Girls'],
['fake-boobs', 1, 'Lela Star in "Thick" for Jules Jordan'], ['fake-boobs', 1, 'Lela Star in "Thick" for Jules Jordan'],
['fake-boobs', 16, 'Marsha May in "Once You Go Black 7" for Jules Jordan'], ['fake-boobs', 16, 'Marsha May in "Once You Go Black 7" for Jules Jordan'],
['fake-boobs', 9, 'Putri Cinta for Watch 4 Beauty'],
['fake-boobs', 10, 'Tia Cyrus in "Titty-Fucked Yoga Goddess" for Latina Sex Tapes'], ['fake-boobs', 10, 'Tia Cyrus in "Titty-Fucked Yoga Goddess" for Latina Sex Tapes'],
['fake-boobs', 9, 'Putri Cinta for StasyQ'],
['fake-boobs', 11, 'Jessa Rhodes and Cali Carter in "Busty Anal Workout" for LesbianX'], ['fake-boobs', 11, 'Jessa Rhodes and Cali Carter in "Busty Anal Workout" for LesbianX'],
['fake-boobs', 13, 'Kitana Lure for Asshole Fever'],
['fake-boobs', 3, 'Ashly Anderson for Passion HD'], ['fake-boobs', 3, 'Ashly Anderson for Passion HD'],
['fake-boobs', 15, 'Amber Jade and Karma Rx in "Amber In The Hills: Part 1" for Brazzers'], ['fake-boobs', 13, 'Kitana Lure for Asshole Fever'],
['fake-boobs', 8, 'Amber Alena for Score'], ['fake-boobs', 8, 'Amber Alena for Score'],
['fake-boobs', 4, 'Capri Cavanni for Big Tits in Sports'], ['fake-boobs', 4, 'Capri Cavanni for Big Tits in Sports'],
['fake-boobs', 15, 'Amber Jade and Karma Rx in "Amber In The Hills: Part 1" for Brazzers'],
// ['fake-boobs', 6, 'Cathy Heaven in "Heavenly Ass" for Big Wett Butts'], // ['fake-boobs', 6, 'Cathy Heaven in "Heavenly Ass" for Big Wett Butts'],
['fake-boobs', 12, 'Nikki Monroe and Kortney Kane for Big Tits In Uniform'], ['fake-boobs', 12, 'Nikki Monroe and Kortney Kane for Big Tits In Uniform'],
['fake-cum', 0, 'Jynx Maze for Cumshot Surprise (Porn Pros)'], ['fake-cum', 0, 'Jynx Maze for Cumshot Surprise (Porn Pros)'],

View File

@ -57,7 +57,7 @@ function toBaseReleases(baseReleasesOrUrls, entity = null) {
// base release with URL // base release with URL
return { return {
...baseReleaseOrUrl, ...baseReleaseOrUrl,
entity, entity: baseReleaseOrUrl.entity || entity,
deep: false, deep: false,
}; };
} }
@ -75,7 +75,7 @@ function toBaseReleases(baseReleasesOrUrls, entity = null) {
// base release without URL, prepare for passthrough // base release without URL, prepare for passthrough
return { return {
...baseReleaseOrUrl, ...baseReleaseOrUrl,
entity, entity: baseReleaseOrUrl.entity || entity,
deep: false, deep: false,
}; };
} }

View File

@ -39,6 +39,16 @@ function curateEntity(entity, includeParameters = false) {
}, includeParameters)); }, includeParameters));
} }
if (entity.siblings) {
curatedEntity.parent = {
...curatedEntity.parent,
children: entity.siblings.map(sibling => curateEntity({
...sibling,
parent: curatedEntity.parent,
}, includeParameters)),
};
}
if (entity.tags) { if (entity.tags) {
curatedEntity.tags = entity.tags.map(tag => ({ curatedEntity.tags = entity.tags.map(tag => ({
id: tag.id, id: tag.id,
@ -72,9 +82,10 @@ async function fetchIncludedEntities() {
WITH RECURSIVE channels AS ( WITH RECURSIVE channels AS (
/* select configured channels and networks */ /* select configured channels and networks */
SELECT SELECT
entities.* entities.*, json_agg(siblings) as siblings
FROM FROM
entities entities
LEFT JOIN entities AS siblings ON siblings.parent_id = entities.parent_id
WHERE WHERE
CASE WHEN :includeAll CASE WHEN :includeAll
THEN THEN
@ -91,12 +102,13 @@ async function fetchIncludedEntities() {
AND entities.type = 'network') AND entities.type = 'network')
OR (entities.slug = ANY(:excludedChannels) OR (entities.slug = ANY(:excludedChannels)
AND entities.type = 'channel')) AND entities.type = 'channel'))
GROUP BY entities.id
UNION ALL UNION ALL
/* select recursive children of configured networks */ /* select recursive children of configured networks */
SELECT SELECT
entities.* entities.*, null as siblings
FROM FROM
entities entities
INNER JOIN INNER JOIN
@ -117,7 +129,7 @@ async function fetchIncludedEntities() {
WHERE WHERE
channels.type = 'channel' channels.type = 'channel'
GROUP BY GROUP BY
entities.id entities.id;
`, include); `, include);
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true)); const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));

View File

@ -1,8 +1,33 @@
'use strict'; 'use strict';
const qu = require('../utils/qu'); const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
function scrapeAll(scenes) { function matchChannel(release, channel) {
const series = channel.children || channel.parent.children;
console.log(channel, series);
const serieNames = series.reduce((acc, serie) => ({
...acc,
[serie.name]: serie,
[serie.slug]: serie,
}), {});
const serieName = release.title.match(new RegExp(Object.keys(serieNames).join('|'), 'i'))?.[0];
const serie = serieName && serieNames[slugify(serieName, '')];
if (serie) {
return {
channel: serie.slug,
title: release.title.replace(new RegExp(`${serieName}[\\s:-]*`), ''),
};
}
return null;
}
function scrapeAll(scenes, channel) {
return scenes.map(({ query, el }) => { return scenes.map(({ query, el }) => {
const release = {}; const release = {};
@ -14,11 +39,29 @@ function scrapeAll(scenes) {
release.poster = query.img('img'); release.poster = query.img('img');
return release; return {
...release,
...matchChannel(release, channel),
};
}); });
} }
function scrapeScene({ query }) { async function fetchPhotos(url) {
if (url) {
const res = await qu.get(url, '.et_post_gallery');
if (res.ok) {
return res.item.query.urls('a').map(imgUrl => ({
src: imgUrl,
referer: url,
}));
}
}
return null;
}
async function scrapeScene({ query }, url, channel, include) {
const release = {}; const release = {};
const script = query.cnt('script.yoast-schema-graph'); const script = query.cnt('script.yoast-schema-graph');
@ -41,20 +84,31 @@ function scrapeScene({ query }) {
const posterData = data['@graph']?.find(item => item['@type'] === 'ImageObject'); const posterData = data['@graph']?.find(item => item['@type'] === 'ImageObject');
release.poster = posterData?.url const poster = posterData?.url
|| query.q('meta[property="og:image"]', 'content') || query.q('meta[property="og:image"]', 'content')
|| query.q('meta[name="twitter:image"]', 'content'); || query.q('meta[name="twitter:image"]', 'content');
release.poster = {
src: poster,
referer: url,
};
release.stars = Math.min(Number(query.q('.post-ratings-image', 'title')?.match(/average:\s*(\d\.\d+)/)?.[1]), 5) || null; // rating out of 5, yet sometimes 5.07? release.stars = Math.min(Number(query.q('.post-ratings-image', 'title')?.match(/average:\s*(\d\.\d+)/)?.[1]), 5) || null; // rating out of 5, yet sometimes 5.07?
// TODO: photo gallery, find if any video has a trailer if (include.photos) {
console.log(release); release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]'));
return release; }
return {
...release,
...matchChannel(release, channel),
};
} }
async function fetchLatest(channel) { async function fetchLatest(channel) {
// no apparent pagination, all updates on one page // no apparent pagination, all updates on one page
const res = await qu.getAll(`${channel.url}/videos/`, '.project'); // using channels in part because main overview contains indistinguishable photo albums
const res = await qu.getAll(channel.url, '.project');
if (res.ok) { if (res.ok) {
return scrapeAll(res.items, channel); return scrapeAll(res.items, channel);
@ -63,11 +117,11 @@ async function fetchLatest(channel) {
return res.status; return res.status;
} }
async function fetchScene(url, channel) { async function fetchScene(url, channel, baseRelease, include) {
const res = await qu.get(url); const res = await qu.get(url);
if (res.ok) { if (res.ok) {
return scrapeScene(res.item, channel); return scrapeScene(res.item, url, channel, include);
} }
return res.status; return res.status;

View File

@ -7,7 +7,7 @@ const logger = require('../logger')(__filename);
function getVirtualConsole(filepath) { function getVirtualConsole(filepath) {
const virtualConsole = new VirtualConsole(); const virtualConsole = new VirtualConsole();
const context = path.basename(filepath); const context = path.basename(filepath).replace(path.extname(filepath), '');
virtualConsole.on('error', message => logger.warn(`Error from JSDOM in ${context}: ${message}`)); virtualConsole.on('error', message => logger.warn(`Error from JSDOM in ${context}: ${message}`));
virtualConsole.on('jsdomError', message => logger.warn(`Error from JSDOM in ${context}: ${message}`)); virtualConsole.on('jsdomError', message => logger.warn(`Error from JSDOM in ${context}: ${message}`));