Added series as channels with logos and photo album scraping to Little Caprice. Added various tag photos.
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 47 KiB |
After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 5.3 KiB |
Before Width: | Height: | Size: 5.9 KiB After Width: | Height: | Size: 5.9 KiB |
After Width: | Height: | Size: 10 KiB |
After Width: | Height: | Size: 3.1 KiB |
After Width: | Height: | Size: 3.7 KiB |
After Width: | Height: | Size: 2.5 KiB |
After Width: | Height: | Size: 2.2 KiB |
After Width: | Height: | Size: 4.0 KiB |
After Width: | Height: | Size: 2.4 KiB |
After Width: | Height: | Size: 2.6 KiB |
After Width: | Height: | Size: 2.8 KiB |
Before Width: | Height: | Size: 39 KiB |
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 29 KiB |
After Width: | Height: | Size: 99 KiB |
After Width: | Height: | Size: 36 KiB |
After Width: | Height: | Size: 87 KiB |
After Width: | Height: | Size: 41 KiB |
After Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 24 KiB |
After Width: | Height: | Size: 74 KiB |
After Width: | Height: | Size: 43 KiB |
After Width: | Height: | Size: 46 KiB |
After Width: | Height: | Size: 39 KiB |
Before Width: | Height: | Size: 5.9 KiB After Width: | Height: | Size: 5.9 KiB |
After Width: | Height: | Size: 34 KiB |
After Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 30 KiB |
After Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 21 KiB |
After Width: | Height: | Size: 46 KiB |
After Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 76 KiB |
After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 5.9 MiB |
After Width: | Height: | Size: 471 KiB |
After Width: | Height: | Size: 640 KiB |
Before Width: | Height: | Size: 252 KiB After Width: | Height: | Size: 4.9 MiB |
After Width: | Height: | Size: 252 KiB |
After Width: | Height: | Size: 11 KiB |
After Width: | Height: | Size: 7.5 KiB |
After Width: | Height: | Size: 6.0 KiB |
Before Width: | Height: | Size: 6.4 KiB After Width: | Height: | Size: 4.7 KiB |
After Width: | Height: | Size: 6.4 KiB |
After Width: | Height: | Size: 5.7 MiB |
After Width: | Height: | Size: 550 KiB |
After Width: | Height: | Size: 839 KiB |
After Width: | Height: | Size: 6.5 MiB |
After Width: | Height: | Size: 53 KiB |
After Width: | Height: | Size: 29 KiB |
After Width: | Height: | Size: 32 KiB |
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 17 KiB |
After Width: | Height: | Size: 24 KiB |
|
@ -842,6 +842,11 @@ const tags = [
|
||||||
group: 'finish',
|
group: 'finish',
|
||||||
priority: 6,
|
priority: 6,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: 'swinging',
|
||||||
|
slug: 'swinging',
|
||||||
|
group: 'group',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: 'tattoos',
|
name: 'tattoos',
|
||||||
slug: 'tattoos',
|
slug: 'tattoos',
|
||||||
|
@ -1726,6 +1731,10 @@ const aliases = [
|
||||||
name: 'swallow',
|
name: 'swallow',
|
||||||
for: 'swallowing',
|
for: 'swallowing',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: 'swingers',
|
||||||
|
for: 'swinging',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: 'strap-on',
|
name: 'strap-on',
|
||||||
for: 'strap-on-dildo',
|
for: 'strap-on-dildo',
|
||||||
|
|
|
@ -314,6 +314,11 @@ const networks = [
|
||||||
url: 'https://letsdoeit.com',
|
url: 'https://letsdoeit.com',
|
||||||
parent: 'porndoe',
|
parent: 'porndoe',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
slug: 'littlecapricedreams',
|
||||||
|
name: 'Little Caprice Dreams',
|
||||||
|
url: 'https://www.littlecaprice-dreams.com',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
slug: 'mamacitaz',
|
slug: 'mamacitaz',
|
||||||
name: 'Mamacitaz',
|
name: 'Mamacitaz',
|
||||||
|
|
|
@ -4226,12 +4226,83 @@ const sites = [
|
||||||
parent: 'letsdoeit',
|
parent: 'letsdoeit',
|
||||||
},
|
},
|
||||||
// LITTLE CAPRICE
|
// LITTLE CAPRICE
|
||||||
|
/*
|
||||||
{
|
{
|
||||||
name: 'Little Caprice Dreams',
|
name: 'Little Caprice Dreams',
|
||||||
slug: 'littlecapricedreams',
|
slug: 'littlecapricedreams',
|
||||||
alias: ['little caprice'],
|
alias: ['little caprice'],
|
||||||
url: 'https://www.littlecaprice-dreams.com',
|
url: 'https://www.littlecaprice-dreams.com',
|
||||||
},
|
},
|
||||||
|
*/
|
||||||
|
{
|
||||||
|
name: 'Caprice Casting',
|
||||||
|
slug: 'capricecasting',
|
||||||
|
url: 'https://www.littlecaprice-dreams.com/caprice-casting',
|
||||||
|
parent: 'littlecapricedreams',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Caprice Divas',
|
||||||
|
slug: 'capricedivas',
|
||||||
|
url: 'https://www.littlecaprice-dreams.com/caprice-divas',
|
||||||
|
parent: 'littlecapricedreams',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Nassty',
|
||||||
|
slug: 'nassty',
|
||||||
|
url: 'https://www.littlecaprice-dreams.com/nassty',
|
||||||
|
parent: 'littlecapricedreams',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'POV Dreams',
|
||||||
|
slug: 'povdreams',
|
||||||
|
url: 'https://www.littlecaprice-dreams.com/pov-dreams',
|
||||||
|
parent: 'littlecapricedreams',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Porn Lifestyle',
|
||||||
|
slug: 'pornlifestyle',
|
||||||
|
url: 'https://www.littlecaprice-dreams.com/porn-lifestyle',
|
||||||
|
parent: 'littlecapricedreams',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Public Sex',
|
||||||
|
slug: 'publicsex',
|
||||||
|
url: 'https://www.littlecaprice-dreams.com/public-sex',
|
||||||
|
parent: 'littlecapricedreams',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Super Private X',
|
||||||
|
slug: 'superprivatex',
|
||||||
|
url: 'https://www.littlecaprice-dreams.com/superprivatex',
|
||||||
|
parent: 'littlecapricedreams',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Sex Lessons',
|
||||||
|
slug: 'sexlessons',
|
||||||
|
url: 'https://www.littlecaprice-dreams.com/sexlessons',
|
||||||
|
parent: 'littlecapricedreams',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Virtual Reality',
|
||||||
|
slug: 'littlecapricevr',
|
||||||
|
url: 'https://www.littlecaprice-dreams.com/virtual-reality-little-caprice',
|
||||||
|
tags: ['virtual-reality'],
|
||||||
|
hasLogo: false,
|
||||||
|
parent: 'littlecapricedreams',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'We Cum To You',
|
||||||
|
slug: 'wecumtoyou',
|
||||||
|
url: 'https://www.littlecaprice-dreams.com/wecumtoyou-swingers',
|
||||||
|
tags: ['swinging', 'orgy'],
|
||||||
|
parent: 'littlecapricedreams',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Xpervo',
|
||||||
|
slug: 'xpervo',
|
||||||
|
url: 'https://www.littlecaprice-dreams.com/xpervo',
|
||||||
|
parent: 'littlecapricedreams',
|
||||||
|
},
|
||||||
// MAMACITAZ
|
// MAMACITAZ
|
||||||
{
|
{
|
||||||
name: 'Her Big Ass',
|
name: 'Her Big Ass',
|
||||||
|
|
|
@ -792,16 +792,18 @@ const tagPhotos = [
|
||||||
['facefucking', 3, 'Adriana Chechik in "Performing Magic Butt Tricks With Jules Jordan. What Will Disappear In Her Ass?" for Jules Jordan'],
|
['facefucking', 3, 'Adriana Chechik in "Performing Magic Butt Tricks With Jules Jordan. What Will Disappear In Her Ass?" for Jules Jordan'],
|
||||||
['fake-boobs', 14, 'Rikki Six for Dream Dolls'],
|
['fake-boobs', 14, 'Rikki Six for Dream Dolls'],
|
||||||
['fake-boobs', 2, 'Gia Milana in "Hot Anal Latina" for HardX'],
|
['fake-boobs', 2, 'Gia Milana in "Hot Anal Latina" for HardX'],
|
||||||
|
['fake-boobs', 17, 'Felina in "With Flors On The Floor" for LouisDeMirabert'],
|
||||||
|
['fake-boobs', 18, 'Ebony Godess for Action Girls'],
|
||||||
['fake-boobs', 1, 'Lela Star in "Thick" for Jules Jordan'],
|
['fake-boobs', 1, 'Lela Star in "Thick" for Jules Jordan'],
|
||||||
['fake-boobs', 16, 'Marsha May in "Once You Go Black 7" for Jules Jordan'],
|
['fake-boobs', 16, 'Marsha May in "Once You Go Black 7" for Jules Jordan'],
|
||||||
|
['fake-boobs', 9, 'Putri Cinta for Watch 4 Beauty'],
|
||||||
['fake-boobs', 10, 'Tia Cyrus in "Titty-Fucked Yoga Goddess" for Latina Sex Tapes'],
|
['fake-boobs', 10, 'Tia Cyrus in "Titty-Fucked Yoga Goddess" for Latina Sex Tapes'],
|
||||||
['fake-boobs', 9, 'Putri Cinta for StasyQ'],
|
|
||||||
['fake-boobs', 11, 'Jessa Rhodes and Cali Carter in "Busty Anal Workout" for LesbianX'],
|
['fake-boobs', 11, 'Jessa Rhodes and Cali Carter in "Busty Anal Workout" for LesbianX'],
|
||||||
['fake-boobs', 13, 'Kitana Lure for Asshole Fever'],
|
|
||||||
['fake-boobs', 3, 'Ashly Anderson for Passion HD'],
|
['fake-boobs', 3, 'Ashly Anderson for Passion HD'],
|
||||||
['fake-boobs', 15, 'Amber Jade and Karma Rx in "Amber In The Hills: Part 1" for Brazzers'],
|
['fake-boobs', 13, 'Kitana Lure for Asshole Fever'],
|
||||||
['fake-boobs', 8, 'Amber Alena for Score'],
|
['fake-boobs', 8, 'Amber Alena for Score'],
|
||||||
['fake-boobs', 4, 'Capri Cavanni for Big Tits in Sports'],
|
['fake-boobs', 4, 'Capri Cavanni for Big Tits in Sports'],
|
||||||
|
['fake-boobs', 15, 'Amber Jade and Karma Rx in "Amber In The Hills: Part 1" for Brazzers'],
|
||||||
// ['fake-boobs', 6, 'Cathy Heaven in "Heavenly Ass" for Big Wett Butts'],
|
// ['fake-boobs', 6, 'Cathy Heaven in "Heavenly Ass" for Big Wett Butts'],
|
||||||
['fake-boobs', 12, 'Nikki Monroe and Kortney Kane for Big Tits In Uniform'],
|
['fake-boobs', 12, 'Nikki Monroe and Kortney Kane for Big Tits In Uniform'],
|
||||||
['fake-cum', 0, 'Jynx Maze for Cumshot Surprise (Porn Pros)'],
|
['fake-cum', 0, 'Jynx Maze for Cumshot Surprise (Porn Pros)'],
|
||||||
|
|
|
@ -57,7 +57,7 @@ function toBaseReleases(baseReleasesOrUrls, entity = null) {
|
||||||
// base release with URL
|
// base release with URL
|
||||||
return {
|
return {
|
||||||
...baseReleaseOrUrl,
|
...baseReleaseOrUrl,
|
||||||
entity,
|
entity: baseReleaseOrUrl.entity || entity,
|
||||||
deep: false,
|
deep: false,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -75,7 +75,7 @@ function toBaseReleases(baseReleasesOrUrls, entity = null) {
|
||||||
// base release without URL, prepare for passthrough
|
// base release without URL, prepare for passthrough
|
||||||
return {
|
return {
|
||||||
...baseReleaseOrUrl,
|
...baseReleaseOrUrl,
|
||||||
entity,
|
entity: baseReleaseOrUrl.entity || entity,
|
||||||
deep: false,
|
deep: false,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,16 @@ function curateEntity(entity, includeParameters = false) {
|
||||||
}, includeParameters));
|
}, includeParameters));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (entity.siblings) {
|
||||||
|
curatedEntity.parent = {
|
||||||
|
...curatedEntity.parent,
|
||||||
|
children: entity.siblings.map(sibling => curateEntity({
|
||||||
|
...sibling,
|
||||||
|
parent: curatedEntity.parent,
|
||||||
|
}, includeParameters)),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
if (entity.tags) {
|
if (entity.tags) {
|
||||||
curatedEntity.tags = entity.tags.map(tag => ({
|
curatedEntity.tags = entity.tags.map(tag => ({
|
||||||
id: tag.id,
|
id: tag.id,
|
||||||
|
@ -72,9 +82,10 @@ async function fetchIncludedEntities() {
|
||||||
WITH RECURSIVE channels AS (
|
WITH RECURSIVE channels AS (
|
||||||
/* select configured channels and networks */
|
/* select configured channels and networks */
|
||||||
SELECT
|
SELECT
|
||||||
entities.*
|
entities.*, json_agg(siblings) as siblings
|
||||||
FROM
|
FROM
|
||||||
entities
|
entities
|
||||||
|
LEFT JOIN entities AS siblings ON siblings.parent_id = entities.parent_id
|
||||||
WHERE
|
WHERE
|
||||||
CASE WHEN :includeAll
|
CASE WHEN :includeAll
|
||||||
THEN
|
THEN
|
||||||
|
@ -91,12 +102,13 @@ async function fetchIncludedEntities() {
|
||||||
AND entities.type = 'network')
|
AND entities.type = 'network')
|
||||||
OR (entities.slug = ANY(:excludedChannels)
|
OR (entities.slug = ANY(:excludedChannels)
|
||||||
AND entities.type = 'channel'))
|
AND entities.type = 'channel'))
|
||||||
|
GROUP BY entities.id
|
||||||
|
|
||||||
UNION ALL
|
UNION ALL
|
||||||
|
|
||||||
/* select recursive children of configured networks */
|
/* select recursive children of configured networks */
|
||||||
SELECT
|
SELECT
|
||||||
entities.*
|
entities.*, null as siblings
|
||||||
FROM
|
FROM
|
||||||
entities
|
entities
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
|
@ -117,7 +129,7 @@ async function fetchIncludedEntities() {
|
||||||
WHERE
|
WHERE
|
||||||
channels.type = 'channel'
|
channels.type = 'channel'
|
||||||
GROUP BY
|
GROUP BY
|
||||||
entities.id
|
entities.id;
|
||||||
`, include);
|
`, include);
|
||||||
|
|
||||||
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
|
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
|
||||||
|
|
|
@ -1,8 +1,33 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const qu = require('../utils/qu');
|
const qu = require('../utils/qu');
|
||||||
|
const slugify = require('../utils/slugify');
|
||||||
|
|
||||||
function scrapeAll(scenes) {
|
function matchChannel(release, channel) {
|
||||||
|
const series = channel.children || channel.parent.children;
|
||||||
|
|
||||||
|
console.log(channel, series);
|
||||||
|
|
||||||
|
const serieNames = series.reduce((acc, serie) => ({
|
||||||
|
...acc,
|
||||||
|
[serie.name]: serie,
|
||||||
|
[serie.slug]: serie,
|
||||||
|
}), {});
|
||||||
|
|
||||||
|
const serieName = release.title.match(new RegExp(Object.keys(serieNames).join('|'), 'i'))?.[0];
|
||||||
|
const serie = serieName && serieNames[slugify(serieName, '')];
|
||||||
|
|
||||||
|
if (serie) {
|
||||||
|
return {
|
||||||
|
channel: serie.slug,
|
||||||
|
title: release.title.replace(new RegExp(`${serieName}[\\s:–-]*`), ''),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeAll(scenes, channel) {
|
||||||
return scenes.map(({ query, el }) => {
|
return scenes.map(({ query, el }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
|
@ -14,11 +39,29 @@ function scrapeAll(scenes) {
|
||||||
|
|
||||||
release.poster = query.img('img');
|
release.poster = query.img('img');
|
||||||
|
|
||||||
return release;
|
return {
|
||||||
|
...release,
|
||||||
|
...matchChannel(release, channel),
|
||||||
|
};
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene({ query }) {
|
async function fetchPhotos(url) {
|
||||||
|
if (url) {
|
||||||
|
const res = await qu.get(url, '.et_post_gallery');
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return res.item.query.urls('a').map(imgUrl => ({
|
||||||
|
src: imgUrl,
|
||||||
|
referer: url,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrapeScene({ query }, url, channel, include) {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
const script = query.cnt('script.yoast-schema-graph');
|
const script = query.cnt('script.yoast-schema-graph');
|
||||||
|
@ -41,20 +84,31 @@ function scrapeScene({ query }) {
|
||||||
|
|
||||||
const posterData = data['@graph']?.find(item => item['@type'] === 'ImageObject');
|
const posterData = data['@graph']?.find(item => item['@type'] === 'ImageObject');
|
||||||
|
|
||||||
release.poster = posterData?.url
|
const poster = posterData?.url
|
||||||
|| query.q('meta[property="og:image"]', 'content')
|
|| query.q('meta[property="og:image"]', 'content')
|
||||||
|| query.q('meta[name="twitter:image"]', 'content');
|
|| query.q('meta[name="twitter:image"]', 'content');
|
||||||
|
|
||||||
|
release.poster = {
|
||||||
|
src: poster,
|
||||||
|
referer: url,
|
||||||
|
};
|
||||||
|
|
||||||
release.stars = Math.min(Number(query.q('.post-ratings-image', 'title')?.match(/average:\s*(\d\.\d+)/)?.[1]), 5) || null; // rating out of 5, yet sometimes 5.07?
|
release.stars = Math.min(Number(query.q('.post-ratings-image', 'title')?.match(/average:\s*(\d\.\d+)/)?.[1]), 5) || null; // rating out of 5, yet sometimes 5.07?
|
||||||
|
|
||||||
// TODO: photo gallery, find if any video has a trailer
|
if (include.photos) {
|
||||||
console.log(release);
|
release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]'));
|
||||||
return release;
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
...release,
|
||||||
|
...matchChannel(release, channel),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(channel) {
|
async function fetchLatest(channel) {
|
||||||
// no apparent pagination, all updates on one page
|
// no apparent pagination, all updates on one page
|
||||||
const res = await qu.getAll(`${channel.url}/videos/`, '.project');
|
// using channels in part because main overview contains indistinguishable photo albums
|
||||||
|
const res = await qu.getAll(channel.url, '.project');
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
return scrapeAll(res.items, channel);
|
return scrapeAll(res.items, channel);
|
||||||
|
@ -63,11 +117,11 @@ async function fetchLatest(channel) {
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, channel) {
|
async function fetchScene(url, channel, baseRelease, include) {
|
||||||
const res = await qu.get(url);
|
const res = await qu.get(url);
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
return scrapeScene(res.item, channel);
|
return scrapeScene(res.item, url, channel, include);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
return res.status;
|
||||||
|
|
|
@ -7,7 +7,7 @@ const logger = require('../logger')(__filename);
|
||||||
|
|
||||||
function getVirtualConsole(filepath) {
|
function getVirtualConsole(filepath) {
|
||||||
const virtualConsole = new VirtualConsole();
|
const virtualConsole = new VirtualConsole();
|
||||||
const context = path.basename(filepath);
|
const context = path.basename(filepath).replace(path.extname(filepath), '');
|
||||||
|
|
||||||
virtualConsole.on('error', message => logger.warn(`Error from JSDOM in ${context}: ${message}`));
|
virtualConsole.on('error', message => logger.warn(`Error from JSDOM in ${context}: ${message}`));
|
||||||
virtualConsole.on('jsdomError', message => logger.warn(`Error from JSDOM in ${context}: ${message}`));
|
virtualConsole.on('jsdomError', message => logger.warn(`Error from JSDOM in ${context}: ${message}`));
|
||||||
|
|