Scraping Dogfart scenes from native sites.

This commit is contained in:
DebaucheryLibrarian 2022-04-03 23:00:05 +02:00
parent 08df432665
commit 5cbf122d6f
2 changed files with 41 additions and 61 deletions

View File

@ -2714,163 +2714,142 @@ const sites = [
{ {
slug: 'blacksonblondes', slug: 'blacksonblondes',
name: 'Blacks On Blondes', name: 'Blacks On Blondes',
url: 'https://www.blacksonblondes.com/tour', url: 'https://www.blacksonblondes.com',
description: 'Blacks On Blondes is the Worlds Largest and Best Interracial Sex and Interracial Porn website. Black Men and White Women. BlacksOnBlondes has 23 years worth of Hardcore Interracial Content. Featuring the entire Legendary Dogfart Movie Archive', description: 'Blacks On Blondes is the Worlds Largest and Best Interracial Sex and Interracial Porn website. Black Men and White Women. BlacksOnBlondes has 23 years worth of Hardcore Interracial Content. Featuring the entire Legendary Dogfart Movie Archive',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'cuckoldsessions', slug: 'cuckoldsessions',
name: 'Cuckold Sessions', name: 'Cuckold Sessions',
url: 'https://www.cuckoldsessions.com/tour', url: 'https://www.cuckoldsessions.com',
description: 'Dogfart, the #1 Interracial Network in the World Presents CuckoldSessions.com/tour - Hardcore Cuckold Fetish Videos', description: 'Dogfart, the #1 Interracial Network in the World Presents CuckoldSessions.com - Hardcore Cuckold Fetish Videos',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'gloryhole', slug: 'gloryhole',
name: 'Glory Hole', name: 'Glory Hole',
url: 'https://www.gloryhole.com/tour', url: 'https://www.gloryhole.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'blacksoncougars', slug: 'blacksoncougars',
name: 'Blacks On Cougars', name: 'Blacks On Cougars',
url: 'https://www.blacksoncougars.com/tour', url: 'https://www.blacksoncougars.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'wefuckblackgirls', slug: 'wefuckblackgirls',
name: 'We Fuck Black Girls', name: 'We Fuck Black Girls',
alias: ['wfbg'], alias: ['wfbg'],
url: 'https://www.wefuckblackgirls.com/tour', url: 'https://www.wefuckblackgirls.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'watchingmymomgoblack', slug: 'watchingmymomgoblack',
name: 'Watching My Mom Go Black', name: 'Watching My Mom Go Black',
url: 'https://www.watchingmymomgoblack.com/tour', url: 'https://www.watchingmymomgoblack.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'interracialblowbang', slug: 'interracialblowbang',
name: 'Interracial Blowbang', name: 'Interracial Blowbang',
url: 'https://www.interracialblowbang.com/tour', url: 'https://www.interracialblowbang.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'cumbang', slug: 'cumbang',
name: 'Cumbang', name: 'Cumbang',
url: 'https://www.cumbang.com/tour', url: 'https://www.cumbang.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'interracialpickups', slug: 'interracialpickups',
name: 'Interracial Pickups', name: 'Interracial Pickups',
url: 'https://www.interracialpickups.com/tour', url: 'https://www.interracialpickups.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'watchingmydaughtergoblack', slug: 'watchingmydaughtergoblack',
name: 'Watching My Daughter Go Black', name: 'Watching My Daughter Go Black',
url: 'https://www.watchingmydaughtergoblack.com/tour', url: 'https://www.watchingmydaughtergoblack.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'zebragirls', slug: 'zebragirls',
name: 'Zebra Girls', name: 'Zebra Girls',
url: 'https://www.zebragirls.com/tour', url: 'https://www.zebragirls.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'gloryholeinitiations', slug: 'gloryholeinitiations',
name: 'Gloryhole Initiations', name: 'Gloryhole Initiations',
url: 'https://www.gloryhole-initiations.com/tour', url: 'https://www.gloryhole-initiations.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'dogfartbehindthescenes', slug: 'dogfartbehindthescenes',
name: 'Dogfart Behind The Scenes', name: 'Dogfart Behind The Scenes',
url: 'https://www.dogfartbehindthescenes.com/tour', url: 'https://www.dogfartbehindthescenes.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'blackmeatwhitefeet', slug: 'blackmeatwhitefeet',
name: 'Black Meat White Feet', name: 'Black Meat White Feet',
url: 'https://www.blackmeatwhitefeet.com/tour', url: 'https://www.blackmeatwhitefeet.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'springthomas', slug: 'springthomas',
name: 'Spring Thomas', name: 'Spring Thomas',
url: 'https://www.springthomas.com/tour', url: 'https://www.springthomas.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'katiethomas', slug: 'katiethomas',
name: 'Katie Thomas', name: 'Katie Thomas',
url: 'https://www.katiethomas.com/tour', url: 'https://www.katiethomas.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'ruthblackwell', slug: 'ruthblackwell',
name: 'Ruth Blackwell', name: 'Ruth Blackwell',
url: 'https://www.ruthblackwell.com/tour', url: 'https://www.ruthblackwell.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'candymonroe', slug: 'candymonroe',
name: 'Candy Monroe', name: 'Candy Monroe',
url: 'https://www.candymonroe.com/tour', url: 'https://www.candymonroe.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'wifewriting', slug: 'wifewriting',
name: 'Wife Writing', name: 'Wife Writing',
url: 'https://www.wifewriting.com/tour', url: 'https://www.wifewriting.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'barbcummings', slug: 'barbcummings',
name: 'Barb Cummings', name: 'Barb Cummings',
url: 'https://www.barbcummings.com/tour', url: 'https://www.barbcummings.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'theminion', slug: 'theminion',
name: 'The Minion', name: 'The Minion',
url: 'https://www.theminion.com/tour', url: 'https://www.theminion.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'blacksonboys', slug: 'blacksonboys',
name: 'Blacks On Boys', name: 'Blacks On Boys',
url: 'https://www.blacksonboys.com/tour', url: 'https://www.blacksonboys.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
{ {
slug: 'gloryholesandhandjobs', slug: 'gloryholesandhandjobs',
name: 'Gloryholes And Handjobs', name: 'Gloryholes And Handjobs',
url: 'https://www.gloryholesandhandjobs.com/tour', url: 'https://www.gloryholesandhandjobs.com',
description: '',
parent: 'dogfartnetwork', parent: 'dogfartnetwork',
}, },
// DORCEL // DORCEL

View File

@ -10,7 +10,7 @@ async function getPhotos(albumUrl) {
return []; return [];
} }
const lastPhotoPage = res.item.query.urls('.preview-image-container a').at(-1); const lastPhotoPage = res.item.query.urls('.pics-container .preview-image-container a').at(-1);
const lastPhotoIndex = parseInt(lastPhotoPage.match(/\d+.jpg/)[0], 10); const lastPhotoIndex = parseInt(lastPhotoPage.match(/\d+.jpg/)[0], 10);
const photoUrls = Array.from({ length: lastPhotoIndex }, (value, index) => { const photoUrls = Array.from({ length: lastPhotoIndex }, (value, index) => {
@ -31,19 +31,19 @@ function scrapeLatest(scenes, site, filter = true) {
const siteUrl = query.cnt('.recent-details-title .help-block, .model-details-title .site-name'); const siteUrl = query.cnt('.recent-details-title .help-block, .model-details-title .site-name');
release.url = query.url('.thumbnail', 'href', { origin: site.type === 'network' ? site.url : site.parent.url }); release.url = query.url('.thumbnail, .preview-image-container > a', 'href', { origin: site.url });
release.entryId = `${site.slug}_${new URL(release.url).pathname.split('/')[4]}`; release.entryId = `${site.slug}_${new URL(release.url).pathname.split('/')[4]}`;
release.title = query.cnt('.scene-title'); release.title = query.cnt('.scene-title');
release.actors = release.title.split(/[,&]|\band\b/).map((actor) => actor.replace(/BTS/i, '').trim()); // release.actors = release.title.split(/[,&]|\band\b/).map((actor) => actor.replace(/BTS/i, '').trim()); // the titles don't always list the actors, e.g. BarbCummings.com
// release.poster = `https:${element.querySelector('img').src}`; // release.poster = `https:${element.querySelector('img').src}`;
release.poster = query.img(); release.poster = query.img();
release.teaser = query.el('.thumbnail', 'data-preview_clip_url'); release.teaser = query.video('.thumbnail, .preview-thumbnail', 'data-preview_clip_url');
release.channel = siteUrl?.match(/(.*).com/)?.[1].toLowerCase(); release.channel = siteUrl?.match(/(.*).com/)?.[1].toLowerCase();
if (filter && `www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) { if (filter && siteUrl && `www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) {
// different dogfart site // different dogfart site
return { ...acc, unextracted: [...acc.unextracted, release] }; return { ...acc, unextracted: [...acc.unextracted, release] };
} }
@ -59,26 +59,26 @@ async function scrapeScene({ query }, url, channel, baseScene, parameters) {
const release = {}; const release = {};
const { origin, pathname } = new URL(url); const { origin, pathname } = new URL(url);
release.channel = query.cnt('.site-name').split('.')[0].toLowerCase(); release.channel = channel.type === 'channel' ? channel.slug : query.cnt('.site-name').split('.')[0].toLowerCase();
release.entryId = `${release.channel}_${pathname.split('/').slice(-2)[0]}`; release.entryId = `${release.channel}_${pathname.split('/').slice(-2)[0]}`;
release.title = query.cnt('.description-title'); release.title = query.cnt('.description-title') || query.text('.scene-title');
release.actors = query.all('.more-scenes a').map((actorEl) => ({ release.actors = query.all('.more-scenes a, .starring-list a').map((actorEl) => ({
name: query.cnt(actorEl), name: query.cnt(actorEl),
url: query.url(actorEl, null, 'href', { origin: channel.url }), url: query.url(actorEl, null, 'href', { origin: channel.url }),
})); }));
release.description = query.meta('meta[itemprop="description"]') || qu.cnt('.description').replace(/[ \t\n]{2,}/g, ' ').replace('...read more', '').trim(); release.description = query.meta('meta[itemprop="description"]') || query.cnt('.description').replace(/[ \t\n]{2,}/g, ' ').replace('...read more', '').trim();
release.date = query.date('meta[itemprop="uploadDate"]', null, null, 'content'); release.date = query.date('meta[itemprop="uploadDate"]', null, null, 'content');
release.duration = query.duration('.extra-info p:nth-child(2)'); release.duration = query.duration('.extra-info p:nth-child(2), .run-time-container');
release.tags = query.cnts('.scene-details .categories a'); release.tags = query.exists('.scene-details .categories a') ? query.cnts('.scene-details .categories a') : query.text('.categories')?.split(/,\s+/);
const trailer = query.video('.html5-video', 'data-trailer'); const trailer = query.video('.html5-video', 'data-trailer');
const lastPhotosUrl = query.urls('.pagination a').at(-1); const lastPhotosUrl = query.urls('.pagination a').at(-1);
release.poster = query.poster('.html5-video', 'data-poster'); release.poster = query.poster('.html5-video', 'data-poster') || query.img('.trailer-image');
if (trailer && !trailer?.includes('join')) { if (trailer && !trailer?.includes('join')) {
release.trailer = trailer; release.trailer = trailer;
@ -88,16 +88,17 @@ async function scrapeScene({ query }, url, channel, baseScene, parameters) {
release.photos = await getPhotos(`${origin}${pathname}${lastPhotosUrl}`, channel, url); release.photos = await getPhotos(`${origin}${pathname}${lastPhotosUrl}`, channel, url);
} }
release.stars = Number(((query.number('span[itemprop="average"]') || query.number('span[itemprop="ratingValue"]')) / 2).toFixed(2)); release.stars = Number(((query.number('span[itemprop="average"], span[itemprop="ratingValue"]') || query.number('canvas[data-score]', null, 'data-score')) / 2).toFixed(2));
return release; return release;
} }
async function fetchLatest(site, page = 1) { async function fetchLatest(channel, page = 1) {
const res = await qu.getAll(`https://dogfartnetwork.com/tour/scenes/?p=${page}`, '.recent-updates'); // const res = await qu.getAll(`https://dogfartnetwork.com/tour/scenes/?p=${page}`, '.recent-updates');
const res = await qu.getAll(`${channel.url}/tour/scenes/?p=${page}`, '.recent-updates, .preview-image-container');
if (res.ok) { if (res.ok) {
return scrapeLatest(res.items, site); return scrapeLatest(res.items, channel);
} }
return res.status; return res.status;