Scraping Dogfart scenes from native sites.
This commit is contained in:
parent
08df432665
commit
5cbf122d6f
|
@ -2714,163 +2714,142 @@ const sites = [
|
||||||
{
|
{
|
||||||
slug: 'blacksonblondes',
|
slug: 'blacksonblondes',
|
||||||
name: 'Blacks On Blondes',
|
name: 'Blacks On Blondes',
|
||||||
url: 'https://www.blacksonblondes.com/tour',
|
url: 'https://www.blacksonblondes.com',
|
||||||
description: 'Blacks On Blondes is the Worlds Largest and Best Interracial Sex and Interracial Porn website. Black Men and White Women. BlacksOnBlondes has 23 years worth of Hardcore Interracial Content. Featuring the entire Legendary Dogfart Movie Archive',
|
description: 'Blacks On Blondes is the Worlds Largest and Best Interracial Sex and Interracial Porn website. Black Men and White Women. BlacksOnBlondes has 23 years worth of Hardcore Interracial Content. Featuring the entire Legendary Dogfart Movie Archive',
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'cuckoldsessions',
|
slug: 'cuckoldsessions',
|
||||||
name: 'Cuckold Sessions',
|
name: 'Cuckold Sessions',
|
||||||
url: 'https://www.cuckoldsessions.com/tour',
|
url: 'https://www.cuckoldsessions.com',
|
||||||
description: 'Dogfart, the #1 Interracial Network in the World Presents CuckoldSessions.com/tour - Hardcore Cuckold Fetish Videos',
|
description: 'Dogfart, the #1 Interracial Network in the World Presents CuckoldSessions.com - Hardcore Cuckold Fetish Videos',
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'gloryhole',
|
slug: 'gloryhole',
|
||||||
name: 'Glory Hole',
|
name: 'Glory Hole',
|
||||||
url: 'https://www.gloryhole.com/tour',
|
url: 'https://www.gloryhole.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'blacksoncougars',
|
slug: 'blacksoncougars',
|
||||||
name: 'Blacks On Cougars',
|
name: 'Blacks On Cougars',
|
||||||
url: 'https://www.blacksoncougars.com/tour',
|
url: 'https://www.blacksoncougars.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'wefuckblackgirls',
|
slug: 'wefuckblackgirls',
|
||||||
name: 'We Fuck Black Girls',
|
name: 'We Fuck Black Girls',
|
||||||
alias: ['wfbg'],
|
alias: ['wfbg'],
|
||||||
url: 'https://www.wefuckblackgirls.com/tour',
|
url: 'https://www.wefuckblackgirls.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'watchingmymomgoblack',
|
slug: 'watchingmymomgoblack',
|
||||||
name: 'Watching My Mom Go Black',
|
name: 'Watching My Mom Go Black',
|
||||||
url: 'https://www.watchingmymomgoblack.com/tour',
|
url: 'https://www.watchingmymomgoblack.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'interracialblowbang',
|
slug: 'interracialblowbang',
|
||||||
name: 'Interracial Blowbang',
|
name: 'Interracial Blowbang',
|
||||||
url: 'https://www.interracialblowbang.com/tour',
|
url: 'https://www.interracialblowbang.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'cumbang',
|
slug: 'cumbang',
|
||||||
name: 'Cumbang',
|
name: 'Cumbang',
|
||||||
url: 'https://www.cumbang.com/tour',
|
url: 'https://www.cumbang.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'interracialpickups',
|
slug: 'interracialpickups',
|
||||||
name: 'Interracial Pickups',
|
name: 'Interracial Pickups',
|
||||||
url: 'https://www.interracialpickups.com/tour',
|
url: 'https://www.interracialpickups.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'watchingmydaughtergoblack',
|
slug: 'watchingmydaughtergoblack',
|
||||||
name: 'Watching My Daughter Go Black',
|
name: 'Watching My Daughter Go Black',
|
||||||
url: 'https://www.watchingmydaughtergoblack.com/tour',
|
url: 'https://www.watchingmydaughtergoblack.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'zebragirls',
|
slug: 'zebragirls',
|
||||||
name: 'Zebra Girls',
|
name: 'Zebra Girls',
|
||||||
url: 'https://www.zebragirls.com/tour',
|
url: 'https://www.zebragirls.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'gloryholeinitiations',
|
slug: 'gloryholeinitiations',
|
||||||
name: 'Gloryhole Initiations',
|
name: 'Gloryhole Initiations',
|
||||||
url: 'https://www.gloryhole-initiations.com/tour',
|
url: 'https://www.gloryhole-initiations.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'dogfartbehindthescenes',
|
slug: 'dogfartbehindthescenes',
|
||||||
name: 'Dogfart Behind The Scenes',
|
name: 'Dogfart Behind The Scenes',
|
||||||
url: 'https://www.dogfartbehindthescenes.com/tour',
|
url: 'https://www.dogfartbehindthescenes.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'blackmeatwhitefeet',
|
slug: 'blackmeatwhitefeet',
|
||||||
name: 'Black Meat White Feet',
|
name: 'Black Meat White Feet',
|
||||||
url: 'https://www.blackmeatwhitefeet.com/tour',
|
url: 'https://www.blackmeatwhitefeet.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'springthomas',
|
slug: 'springthomas',
|
||||||
name: 'Spring Thomas',
|
name: 'Spring Thomas',
|
||||||
url: 'https://www.springthomas.com/tour',
|
url: 'https://www.springthomas.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'katiethomas',
|
slug: 'katiethomas',
|
||||||
name: 'Katie Thomas',
|
name: 'Katie Thomas',
|
||||||
url: 'https://www.katiethomas.com/tour',
|
url: 'https://www.katiethomas.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'ruthblackwell',
|
slug: 'ruthblackwell',
|
||||||
name: 'Ruth Blackwell',
|
name: 'Ruth Blackwell',
|
||||||
url: 'https://www.ruthblackwell.com/tour',
|
url: 'https://www.ruthblackwell.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'candymonroe',
|
slug: 'candymonroe',
|
||||||
name: 'Candy Monroe',
|
name: 'Candy Monroe',
|
||||||
url: 'https://www.candymonroe.com/tour',
|
url: 'https://www.candymonroe.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'wifewriting',
|
slug: 'wifewriting',
|
||||||
name: 'Wife Writing',
|
name: 'Wife Writing',
|
||||||
url: 'https://www.wifewriting.com/tour',
|
url: 'https://www.wifewriting.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'barbcummings',
|
slug: 'barbcummings',
|
||||||
name: 'Barb Cummings',
|
name: 'Barb Cummings',
|
||||||
url: 'https://www.barbcummings.com/tour',
|
url: 'https://www.barbcummings.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'theminion',
|
slug: 'theminion',
|
||||||
name: 'The Minion',
|
name: 'The Minion',
|
||||||
url: 'https://www.theminion.com/tour',
|
url: 'https://www.theminion.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'blacksonboys',
|
slug: 'blacksonboys',
|
||||||
name: 'Blacks On Boys',
|
name: 'Blacks On Boys',
|
||||||
url: 'https://www.blacksonboys.com/tour',
|
url: 'https://www.blacksonboys.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'gloryholesandhandjobs',
|
slug: 'gloryholesandhandjobs',
|
||||||
name: 'Gloryholes And Handjobs',
|
name: 'Gloryholes And Handjobs',
|
||||||
url: 'https://www.gloryholesandhandjobs.com/tour',
|
url: 'https://www.gloryholesandhandjobs.com',
|
||||||
description: '',
|
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
},
|
},
|
||||||
// DORCEL
|
// DORCEL
|
||||||
|
|
|
@ -10,7 +10,7 @@ async function getPhotos(albumUrl) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
const lastPhotoPage = res.item.query.urls('.preview-image-container a').at(-1);
|
const lastPhotoPage = res.item.query.urls('.pics-container .preview-image-container a').at(-1);
|
||||||
const lastPhotoIndex = parseInt(lastPhotoPage.match(/\d+.jpg/)[0], 10);
|
const lastPhotoIndex = parseInt(lastPhotoPage.match(/\d+.jpg/)[0], 10);
|
||||||
|
|
||||||
const photoUrls = Array.from({ length: lastPhotoIndex }, (value, index) => {
|
const photoUrls = Array.from({ length: lastPhotoIndex }, (value, index) => {
|
||||||
|
@ -31,19 +31,19 @@ function scrapeLatest(scenes, site, filter = true) {
|
||||||
|
|
||||||
const siteUrl = query.cnt('.recent-details-title .help-block, .model-details-title .site-name');
|
const siteUrl = query.cnt('.recent-details-title .help-block, .model-details-title .site-name');
|
||||||
|
|
||||||
release.url = query.url('.thumbnail', 'href', { origin: site.type === 'network' ? site.url : site.parent.url });
|
release.url = query.url('.thumbnail, .preview-image-container > a', 'href', { origin: site.url });
|
||||||
release.entryId = `${site.slug}_${new URL(release.url).pathname.split('/')[4]}`;
|
release.entryId = `${site.slug}_${new URL(release.url).pathname.split('/')[4]}`;
|
||||||
|
|
||||||
release.title = query.cnt('.scene-title');
|
release.title = query.cnt('.scene-title');
|
||||||
release.actors = release.title.split(/[,&]|\band\b/).map((actor) => actor.replace(/BTS/i, '').trim());
|
// release.actors = release.title.split(/[,&]|\band\b/).map((actor) => actor.replace(/BTS/i, '').trim()); // the titles don't always list the actors, e.g. BarbCummings.com
|
||||||
|
|
||||||
// release.poster = `https:${element.querySelector('img').src}`;
|
// release.poster = `https:${element.querySelector('img').src}`;
|
||||||
release.poster = query.img();
|
release.poster = query.img();
|
||||||
release.teaser = query.el('.thumbnail', 'data-preview_clip_url');
|
release.teaser = query.video('.thumbnail, .preview-thumbnail', 'data-preview_clip_url');
|
||||||
|
|
||||||
release.channel = siteUrl?.match(/(.*).com/)?.[1].toLowerCase();
|
release.channel = siteUrl?.match(/(.*).com/)?.[1].toLowerCase();
|
||||||
|
|
||||||
if (filter && `www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) {
|
if (filter && siteUrl && `www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) {
|
||||||
// different dogfart site
|
// different dogfart site
|
||||||
return { ...acc, unextracted: [...acc.unextracted, release] };
|
return { ...acc, unextracted: [...acc.unextracted, release] };
|
||||||
}
|
}
|
||||||
|
@ -59,26 +59,26 @@ async function scrapeScene({ query }, url, channel, baseScene, parameters) {
|
||||||
const release = {};
|
const release = {};
|
||||||
const { origin, pathname } = new URL(url);
|
const { origin, pathname } = new URL(url);
|
||||||
|
|
||||||
release.channel = query.cnt('.site-name').split('.')[0].toLowerCase();
|
release.channel = channel.type === 'channel' ? channel.slug : query.cnt('.site-name').split('.')[0].toLowerCase();
|
||||||
release.entryId = `${release.channel}_${pathname.split('/').slice(-2)[0]}`;
|
release.entryId = `${release.channel}_${pathname.split('/').slice(-2)[0]}`;
|
||||||
|
|
||||||
release.title = query.cnt('.description-title');
|
release.title = query.cnt('.description-title') || query.text('.scene-title');
|
||||||
release.actors = query.all('.more-scenes a').map((actorEl) => ({
|
release.actors = query.all('.more-scenes a, .starring-list a').map((actorEl) => ({
|
||||||
name: query.cnt(actorEl),
|
name: query.cnt(actorEl),
|
||||||
url: query.url(actorEl, null, 'href', { origin: channel.url }),
|
url: query.url(actorEl, null, 'href', { origin: channel.url }),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
release.description = query.meta('meta[itemprop="description"]') || qu.cnt('.description').replace(/[ \t\n]{2,}/g, ' ').replace('...read more', '').trim();
|
release.description = query.meta('meta[itemprop="description"]') || query.cnt('.description').replace(/[ \t\n]{2,}/g, ' ').replace('...read more', '').trim();
|
||||||
|
|
||||||
release.date = query.date('meta[itemprop="uploadDate"]', null, null, 'content');
|
release.date = query.date('meta[itemprop="uploadDate"]', null, null, 'content');
|
||||||
release.duration = query.duration('.extra-info p:nth-child(2)');
|
release.duration = query.duration('.extra-info p:nth-child(2), .run-time-container');
|
||||||
|
|
||||||
release.tags = query.cnts('.scene-details .categories a');
|
release.tags = query.exists('.scene-details .categories a') ? query.cnts('.scene-details .categories a') : query.text('.categories')?.split(/,\s+/);
|
||||||
|
|
||||||
const trailer = query.video('.html5-video', 'data-trailer');
|
const trailer = query.video('.html5-video', 'data-trailer');
|
||||||
const lastPhotosUrl = query.urls('.pagination a').at(-1);
|
const lastPhotosUrl = query.urls('.pagination a').at(-1);
|
||||||
|
|
||||||
release.poster = query.poster('.html5-video', 'data-poster');
|
release.poster = query.poster('.html5-video', 'data-poster') || query.img('.trailer-image');
|
||||||
|
|
||||||
if (trailer && !trailer?.includes('join')) {
|
if (trailer && !trailer?.includes('join')) {
|
||||||
release.trailer = trailer;
|
release.trailer = trailer;
|
||||||
|
@ -88,16 +88,17 @@ async function scrapeScene({ query }, url, channel, baseScene, parameters) {
|
||||||
release.photos = await getPhotos(`${origin}${pathname}${lastPhotosUrl}`, channel, url);
|
release.photos = await getPhotos(`${origin}${pathname}${lastPhotosUrl}`, channel, url);
|
||||||
}
|
}
|
||||||
|
|
||||||
release.stars = Number(((query.number('span[itemprop="average"]') || query.number('span[itemprop="ratingValue"]')) / 2).toFixed(2));
|
release.stars = Number(((query.number('span[itemprop="average"], span[itemprop="ratingValue"]') || query.number('canvas[data-score]', null, 'data-score')) / 2).toFixed(2));
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1) {
|
async function fetchLatest(channel, page = 1) {
|
||||||
const res = await qu.getAll(`https://dogfartnetwork.com/tour/scenes/?p=${page}`, '.recent-updates');
|
// const res = await qu.getAll(`https://dogfartnetwork.com/tour/scenes/?p=${page}`, '.recent-updates');
|
||||||
|
const res = await qu.getAll(`${channel.url}/tour/scenes/?p=${page}`, '.recent-updates, .preview-image-container');
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
return scrapeLatest(res.items, site);
|
return scrapeLatest(res.items, channel);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
return res.status;
|
||||||
|
|
Loading…
Reference in New Issue