Scraping Dogfart scenes from native sites.
This commit is contained in:
parent
08df432665
commit
5cbf122d6f
|
@ -2714,163 +2714,142 @@ const sites = [
|
|||
{
|
||||
slug: 'blacksonblondes',
|
||||
name: 'Blacks On Blondes',
|
||||
url: 'https://www.blacksonblondes.com/tour',
|
||||
url: 'https://www.blacksonblondes.com',
|
||||
description: 'Blacks On Blondes is the Worlds Largest and Best Interracial Sex and Interracial Porn website. Black Men and White Women. BlacksOnBlondes has 23 years worth of Hardcore Interracial Content. Featuring the entire Legendary Dogfart Movie Archive',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'cuckoldsessions',
|
||||
name: 'Cuckold Sessions',
|
||||
url: 'https://www.cuckoldsessions.com/tour',
|
||||
description: 'Dogfart, the #1 Interracial Network in the World Presents CuckoldSessions.com/tour - Hardcore Cuckold Fetish Videos',
|
||||
url: 'https://www.cuckoldsessions.com',
|
||||
description: 'Dogfart, the #1 Interracial Network in the World Presents CuckoldSessions.com - Hardcore Cuckold Fetish Videos',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'gloryhole',
|
||||
name: 'Glory Hole',
|
||||
url: 'https://www.gloryhole.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.gloryhole.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'blacksoncougars',
|
||||
name: 'Blacks On Cougars',
|
||||
url: 'https://www.blacksoncougars.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.blacksoncougars.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'wefuckblackgirls',
|
||||
name: 'We Fuck Black Girls',
|
||||
alias: ['wfbg'],
|
||||
url: 'https://www.wefuckblackgirls.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.wefuckblackgirls.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'watchingmymomgoblack',
|
||||
name: 'Watching My Mom Go Black',
|
||||
url: 'https://www.watchingmymomgoblack.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.watchingmymomgoblack.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'interracialblowbang',
|
||||
name: 'Interracial Blowbang',
|
||||
url: 'https://www.interracialblowbang.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.interracialblowbang.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'cumbang',
|
||||
name: 'Cumbang',
|
||||
url: 'https://www.cumbang.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.cumbang.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'interracialpickups',
|
||||
name: 'Interracial Pickups',
|
||||
url: 'https://www.interracialpickups.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.interracialpickups.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'watchingmydaughtergoblack',
|
||||
name: 'Watching My Daughter Go Black',
|
||||
url: 'https://www.watchingmydaughtergoblack.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.watchingmydaughtergoblack.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'zebragirls',
|
||||
name: 'Zebra Girls',
|
||||
url: 'https://www.zebragirls.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.zebragirls.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'gloryholeinitiations',
|
||||
name: 'Gloryhole Initiations',
|
||||
url: 'https://www.gloryhole-initiations.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.gloryhole-initiations.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'dogfartbehindthescenes',
|
||||
name: 'Dogfart Behind The Scenes',
|
||||
url: 'https://www.dogfartbehindthescenes.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.dogfartbehindthescenes.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'blackmeatwhitefeet',
|
||||
name: 'Black Meat White Feet',
|
||||
url: 'https://www.blackmeatwhitefeet.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.blackmeatwhitefeet.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'springthomas',
|
||||
name: 'Spring Thomas',
|
||||
url: 'https://www.springthomas.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.springthomas.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'katiethomas',
|
||||
name: 'Katie Thomas',
|
||||
url: 'https://www.katiethomas.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.katiethomas.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'ruthblackwell',
|
||||
name: 'Ruth Blackwell',
|
||||
url: 'https://www.ruthblackwell.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.ruthblackwell.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'candymonroe',
|
||||
name: 'Candy Monroe',
|
||||
url: 'https://www.candymonroe.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.candymonroe.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'wifewriting',
|
||||
name: 'Wife Writing',
|
||||
url: 'https://www.wifewriting.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.wifewriting.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'barbcummings',
|
||||
name: 'Barb Cummings',
|
||||
url: 'https://www.barbcummings.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.barbcummings.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'theminion',
|
||||
name: 'The Minion',
|
||||
url: 'https://www.theminion.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.theminion.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'blacksonboys',
|
||||
name: 'Blacks On Boys',
|
||||
url: 'https://www.blacksonboys.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.blacksonboys.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
{
|
||||
slug: 'gloryholesandhandjobs',
|
||||
name: 'Gloryholes And Handjobs',
|
||||
url: 'https://www.gloryholesandhandjobs.com/tour',
|
||||
description: '',
|
||||
url: 'https://www.gloryholesandhandjobs.com',
|
||||
parent: 'dogfartnetwork',
|
||||
},
|
||||
// DORCEL
|
||||
|
|
|
@ -10,7 +10,7 @@ async function getPhotos(albumUrl) {
|
|||
return [];
|
||||
}
|
||||
|
||||
const lastPhotoPage = res.item.query.urls('.preview-image-container a').at(-1);
|
||||
const lastPhotoPage = res.item.query.urls('.pics-container .preview-image-container a').at(-1);
|
||||
const lastPhotoIndex = parseInt(lastPhotoPage.match(/\d+.jpg/)[0], 10);
|
||||
|
||||
const photoUrls = Array.from({ length: lastPhotoIndex }, (value, index) => {
|
||||
|
@ -31,19 +31,19 @@ function scrapeLatest(scenes, site, filter = true) {
|
|||
|
||||
const siteUrl = query.cnt('.recent-details-title .help-block, .model-details-title .site-name');
|
||||
|
||||
release.url = query.url('.thumbnail', 'href', { origin: site.type === 'network' ? site.url : site.parent.url });
|
||||
release.url = query.url('.thumbnail, .preview-image-container > a', 'href', { origin: site.url });
|
||||
release.entryId = `${site.slug}_${new URL(release.url).pathname.split('/')[4]}`;
|
||||
|
||||
release.title = query.cnt('.scene-title');
|
||||
release.actors = release.title.split(/[,&]|\band\b/).map((actor) => actor.replace(/BTS/i, '').trim());
|
||||
// release.actors = release.title.split(/[,&]|\band\b/).map((actor) => actor.replace(/BTS/i, '').trim()); // the titles don't always list the actors, e.g. BarbCummings.com
|
||||
|
||||
// release.poster = `https:${element.querySelector('img').src}`;
|
||||
release.poster = query.img();
|
||||
release.teaser = query.el('.thumbnail', 'data-preview_clip_url');
|
||||
release.teaser = query.video('.thumbnail, .preview-thumbnail', 'data-preview_clip_url');
|
||||
|
||||
release.channel = siteUrl?.match(/(.*).com/)?.[1].toLowerCase();
|
||||
|
||||
if (filter && `www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) {
|
||||
if (filter && siteUrl && `www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) {
|
||||
// different dogfart site
|
||||
return { ...acc, unextracted: [...acc.unextracted, release] };
|
||||
}
|
||||
|
@ -59,26 +59,26 @@ async function scrapeScene({ query }, url, channel, baseScene, parameters) {
|
|||
const release = {};
|
||||
const { origin, pathname } = new URL(url);
|
||||
|
||||
release.channel = query.cnt('.site-name').split('.')[0].toLowerCase();
|
||||
release.channel = channel.type === 'channel' ? channel.slug : query.cnt('.site-name').split('.')[0].toLowerCase();
|
||||
release.entryId = `${release.channel}_${pathname.split('/').slice(-2)[0]}`;
|
||||
|
||||
release.title = query.cnt('.description-title');
|
||||
release.actors = query.all('.more-scenes a').map((actorEl) => ({
|
||||
release.title = query.cnt('.description-title') || query.text('.scene-title');
|
||||
release.actors = query.all('.more-scenes a, .starring-list a').map((actorEl) => ({
|
||||
name: query.cnt(actorEl),
|
||||
url: query.url(actorEl, null, 'href', { origin: channel.url }),
|
||||
}));
|
||||
|
||||
release.description = query.meta('meta[itemprop="description"]') || qu.cnt('.description').replace(/[ \t\n]{2,}/g, ' ').replace('...read more', '').trim();
|
||||
release.description = query.meta('meta[itemprop="description"]') || query.cnt('.description').replace(/[ \t\n]{2,}/g, ' ').replace('...read more', '').trim();
|
||||
|
||||
release.date = query.date('meta[itemprop="uploadDate"]', null, null, 'content');
|
||||
release.duration = query.duration('.extra-info p:nth-child(2)');
|
||||
release.duration = query.duration('.extra-info p:nth-child(2), .run-time-container');
|
||||
|
||||
release.tags = query.cnts('.scene-details .categories a');
|
||||
release.tags = query.exists('.scene-details .categories a') ? query.cnts('.scene-details .categories a') : query.text('.categories')?.split(/,\s+/);
|
||||
|
||||
const trailer = query.video('.html5-video', 'data-trailer');
|
||||
const lastPhotosUrl = query.urls('.pagination a').at(-1);
|
||||
|
||||
release.poster = query.poster('.html5-video', 'data-poster');
|
||||
release.poster = query.poster('.html5-video', 'data-poster') || query.img('.trailer-image');
|
||||
|
||||
if (trailer && !trailer?.includes('join')) {
|
||||
release.trailer = trailer;
|
||||
|
@ -88,16 +88,17 @@ async function scrapeScene({ query }, url, channel, baseScene, parameters) {
|
|||
release.photos = await getPhotos(`${origin}${pathname}${lastPhotosUrl}`, channel, url);
|
||||
}
|
||||
|
||||
release.stars = Number(((query.number('span[itemprop="average"]') || query.number('span[itemprop="ratingValue"]')) / 2).toFixed(2));
|
||||
release.stars = Number(((query.number('span[itemprop="average"], span[itemprop="ratingValue"]') || query.number('canvas[data-score]', null, 'data-score')) / 2).toFixed(2));
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await qu.getAll(`https://dogfartnetwork.com/tour/scenes/?p=${page}`, '.recent-updates');
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
// const res = await qu.getAll(`https://dogfartnetwork.com/tour/scenes/?p=${page}`, '.recent-updates');
|
||||
const res = await qu.getAll(`${channel.url}/tour/scenes/?p=${page}`, '.recent-updates, .preview-image-container');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeLatest(res.items, site);
|
||||
return scrapeLatest(res.items, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
|
|
Loading…
Reference in New Issue