Scraping Dogfart scenes from native sites.

This commit is contained in:
DebaucheryLibrarian 2022-04-03 23:00:05 +02:00
parent 08df432665
commit 5cbf122d6f
2 changed files with 41 additions and 61 deletions

View File

@ -2714,163 +2714,142 @@ const sites = [
{
slug: 'blacksonblondes',
name: 'Blacks On Blondes',
url: 'https://www.blacksonblondes.com/tour',
url: 'https://www.blacksonblondes.com',
description: 'Blacks On Blondes is the Worlds Largest and Best Interracial Sex and Interracial Porn website. Black Men and White Women. BlacksOnBlondes has 23 years worth of Hardcore Interracial Content. Featuring the entire Legendary Dogfart Movie Archive',
parent: 'dogfartnetwork',
},
{
slug: 'cuckoldsessions',
name: 'Cuckold Sessions',
url: 'https://www.cuckoldsessions.com/tour',
description: 'Dogfart, the #1 Interracial Network in the World Presents CuckoldSessions.com/tour - Hardcore Cuckold Fetish Videos',
url: 'https://www.cuckoldsessions.com',
description: 'Dogfart, the #1 Interracial Network in the World Presents CuckoldSessions.com - Hardcore Cuckold Fetish Videos',
parent: 'dogfartnetwork',
},
{
slug: 'gloryhole',
name: 'Glory Hole',
url: 'https://www.gloryhole.com/tour',
description: '',
url: 'https://www.gloryhole.com',
parent: 'dogfartnetwork',
},
{
slug: 'blacksoncougars',
name: 'Blacks On Cougars',
url: 'https://www.blacksoncougars.com/tour',
description: '',
url: 'https://www.blacksoncougars.com',
parent: 'dogfartnetwork',
},
{
slug: 'wefuckblackgirls',
name: 'We Fuck Black Girls',
alias: ['wfbg'],
url: 'https://www.wefuckblackgirls.com/tour',
description: '',
url: 'https://www.wefuckblackgirls.com',
parent: 'dogfartnetwork',
},
{
slug: 'watchingmymomgoblack',
name: 'Watching My Mom Go Black',
url: 'https://www.watchingmymomgoblack.com/tour',
description: '',
url: 'https://www.watchingmymomgoblack.com',
parent: 'dogfartnetwork',
},
{
slug: 'interracialblowbang',
name: 'Interracial Blowbang',
url: 'https://www.interracialblowbang.com/tour',
description: '',
url: 'https://www.interracialblowbang.com',
parent: 'dogfartnetwork',
},
{
slug: 'cumbang',
name: 'Cumbang',
url: 'https://www.cumbang.com/tour',
description: '',
url: 'https://www.cumbang.com',
parent: 'dogfartnetwork',
},
{
slug: 'interracialpickups',
name: 'Interracial Pickups',
url: 'https://www.interracialpickups.com/tour',
description: '',
url: 'https://www.interracialpickups.com',
parent: 'dogfartnetwork',
},
{
slug: 'watchingmydaughtergoblack',
name: 'Watching My Daughter Go Black',
url: 'https://www.watchingmydaughtergoblack.com/tour',
description: '',
url: 'https://www.watchingmydaughtergoblack.com',
parent: 'dogfartnetwork',
},
{
slug: 'zebragirls',
name: 'Zebra Girls',
url: 'https://www.zebragirls.com/tour',
description: '',
url: 'https://www.zebragirls.com',
parent: 'dogfartnetwork',
},
{
slug: 'gloryholeinitiations',
name: 'Gloryhole Initiations',
url: 'https://www.gloryhole-initiations.com/tour',
description: '',
url: 'https://www.gloryhole-initiations.com',
parent: 'dogfartnetwork',
},
{
slug: 'dogfartbehindthescenes',
name: 'Dogfart Behind The Scenes',
url: 'https://www.dogfartbehindthescenes.com/tour',
description: '',
url: 'https://www.dogfartbehindthescenes.com',
parent: 'dogfartnetwork',
},
{
slug: 'blackmeatwhitefeet',
name: 'Black Meat White Feet',
url: 'https://www.blackmeatwhitefeet.com/tour',
description: '',
url: 'https://www.blackmeatwhitefeet.com',
parent: 'dogfartnetwork',
},
{
slug: 'springthomas',
name: 'Spring Thomas',
url: 'https://www.springthomas.com/tour',
description: '',
url: 'https://www.springthomas.com',
parent: 'dogfartnetwork',
},
{
slug: 'katiethomas',
name: 'Katie Thomas',
url: 'https://www.katiethomas.com/tour',
description: '',
url: 'https://www.katiethomas.com',
parent: 'dogfartnetwork',
},
{
slug: 'ruthblackwell',
name: 'Ruth Blackwell',
url: 'https://www.ruthblackwell.com/tour',
description: '',
url: 'https://www.ruthblackwell.com',
parent: 'dogfartnetwork',
},
{
slug: 'candymonroe',
name: 'Candy Monroe',
url: 'https://www.candymonroe.com/tour',
description: '',
url: 'https://www.candymonroe.com',
parent: 'dogfartnetwork',
},
{
slug: 'wifewriting',
name: 'Wife Writing',
url: 'https://www.wifewriting.com/tour',
description: '',
url: 'https://www.wifewriting.com',
parent: 'dogfartnetwork',
},
{
slug: 'barbcummings',
name: 'Barb Cummings',
url: 'https://www.barbcummings.com/tour',
description: '',
url: 'https://www.barbcummings.com',
parent: 'dogfartnetwork',
},
{
slug: 'theminion',
name: 'The Minion',
url: 'https://www.theminion.com/tour',
description: '',
url: 'https://www.theminion.com',
parent: 'dogfartnetwork',
},
{
slug: 'blacksonboys',
name: 'Blacks On Boys',
url: 'https://www.blacksonboys.com/tour',
description: '',
url: 'https://www.blacksonboys.com',
parent: 'dogfartnetwork',
},
{
slug: 'gloryholesandhandjobs',
name: 'Gloryholes And Handjobs',
url: 'https://www.gloryholesandhandjobs.com/tour',
description: '',
url: 'https://www.gloryholesandhandjobs.com',
parent: 'dogfartnetwork',
},
// DORCEL

View File

@ -10,7 +10,7 @@ async function getPhotos(albumUrl) {
return [];
}
const lastPhotoPage = res.item.query.urls('.preview-image-container a').at(-1);
const lastPhotoPage = res.item.query.urls('.pics-container .preview-image-container a').at(-1);
const lastPhotoIndex = parseInt(lastPhotoPage.match(/\d+.jpg/)[0], 10);
const photoUrls = Array.from({ length: lastPhotoIndex }, (value, index) => {
@ -31,19 +31,19 @@ function scrapeLatest(scenes, site, filter = true) {
const siteUrl = query.cnt('.recent-details-title .help-block, .model-details-title .site-name');
release.url = query.url('.thumbnail', 'href', { origin: site.type === 'network' ? site.url : site.parent.url });
release.url = query.url('.thumbnail, .preview-image-container > a', 'href', { origin: site.url });
release.entryId = `${site.slug}_${new URL(release.url).pathname.split('/')[4]}`;
release.title = query.cnt('.scene-title');
release.actors = release.title.split(/[,&]|\band\b/).map((actor) => actor.replace(/BTS/i, '').trim());
// release.actors = release.title.split(/[,&]|\band\b/).map((actor) => actor.replace(/BTS/i, '').trim()); // the titles don't always list the actors, e.g. BarbCummings.com
// release.poster = `https:${element.querySelector('img').src}`;
release.poster = query.img();
release.teaser = query.el('.thumbnail', 'data-preview_clip_url');
release.teaser = query.video('.thumbnail, .preview-thumbnail', 'data-preview_clip_url');
release.channel = siteUrl?.match(/(.*).com/)?.[1].toLowerCase();
if (filter && `www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) {
if (filter && siteUrl && `www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) {
// different dogfart site
return { ...acc, unextracted: [...acc.unextracted, release] };
}
@ -59,26 +59,26 @@ async function scrapeScene({ query }, url, channel, baseScene, parameters) {
const release = {};
const { origin, pathname } = new URL(url);
release.channel = query.cnt('.site-name').split('.')[0].toLowerCase();
release.channel = channel.type === 'channel' ? channel.slug : query.cnt('.site-name').split('.')[0].toLowerCase();
release.entryId = `${release.channel}_${pathname.split('/').slice(-2)[0]}`;
release.title = query.cnt('.description-title');
release.actors = query.all('.more-scenes a').map((actorEl) => ({
release.title = query.cnt('.description-title') || query.text('.scene-title');
release.actors = query.all('.more-scenes a, .starring-list a').map((actorEl) => ({
name: query.cnt(actorEl),
url: query.url(actorEl, null, 'href', { origin: channel.url }),
}));
release.description = query.meta('meta[itemprop="description"]') || qu.cnt('.description').replace(/[ \t\n]{2,}/g, ' ').replace('...read more', '').trim();
release.description = query.meta('meta[itemprop="description"]') || query.cnt('.description').replace(/[ \t\n]{2,}/g, ' ').replace('...read more', '').trim();
release.date = query.date('meta[itemprop="uploadDate"]', null, null, 'content');
release.duration = query.duration('.extra-info p:nth-child(2)');
release.duration = query.duration('.extra-info p:nth-child(2), .run-time-container');
release.tags = query.cnts('.scene-details .categories a');
release.tags = query.exists('.scene-details .categories a') ? query.cnts('.scene-details .categories a') : query.text('.categories')?.split(/,\s+/);
const trailer = query.video('.html5-video', 'data-trailer');
const lastPhotosUrl = query.urls('.pagination a').at(-1);
release.poster = query.poster('.html5-video', 'data-poster');
release.poster = query.poster('.html5-video', 'data-poster') || query.img('.trailer-image');
if (trailer && !trailer?.includes('join')) {
release.trailer = trailer;
@ -88,16 +88,17 @@ async function scrapeScene({ query }, url, channel, baseScene, parameters) {
release.photos = await getPhotos(`${origin}${pathname}${lastPhotosUrl}`, channel, url);
}
release.stars = Number(((query.number('span[itemprop="average"]') || query.number('span[itemprop="ratingValue"]')) / 2).toFixed(2));
release.stars = Number(((query.number('span[itemprop="average"], span[itemprop="ratingValue"]') || query.number('canvas[data-score]', null, 'data-score')) / 2).toFixed(2));
return release;
}
async function fetchLatest(site, page = 1) {
const res = await qu.getAll(`https://dogfartnetwork.com/tour/scenes/?p=${page}`, '.recent-updates');
async function fetchLatest(channel, page = 1) {
// const res = await qu.getAll(`https://dogfartnetwork.com/tour/scenes/?p=${page}`, '.recent-updates');
const res = await qu.getAll(`${channel.url}/tour/scenes/?p=${page}`, '.recent-updates, .preview-image-container');
if (res.ok) {
return scrapeLatest(res.items, site);
return scrapeLatest(res.items, channel);
}
return res.status;