Fixed PornDoe scraper.

This commit is contained in:
DebaucheryLibrarian 2021-08-26 01:14:54 +02:00
parent 23537e0243
commit 69a7e8f13d
57 changed files with 65 additions and 30 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 546 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 753 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 278 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 294 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

View File

@ -590,8 +590,8 @@ const tags = [
slug: 'humiliation',
},
{
name: 'indian',
slug: 'indian',
name: 'desi',
slug: 'desi',
},
{
name: 'innie pussy',
@ -671,6 +671,10 @@ const tags = [
description: 'Two men fucking one woman, but not eachother. Typically involves a \'spitroast\', where one guy gets a blowjob and the other fucks her pussy or ass.',
group: 'group',
},
{
name: 'milk',
slug: 'milk',
},
{
name: 'miniskirt',
slug: 'miniskirt',
@ -1652,6 +1656,10 @@ const aliases = [
name: 'incest fantasy',
for: 'family',
},
{
name: 'indian',
for: 'desi',
},
{
name: 'innie',
for: 'innie-pussy',

View File

@ -598,6 +598,7 @@ const tagMedia = [
['airtight', 6, 'Remy Lacroix in "Ass Worship 14"', 'julesjordan'],
['airtight', 'anissa_kate_legalporno', 'Anissa Kate in GP1962', 'legalporno'],
['airtight', 'adriana_chechik_hope_howell_hopehowellxxx_1', 'Adriana Chechik and Hope Howell in "Hope And Adriana\'s Gangbang', 'hopehowellxxx'],
['airtight', 'diamond_foxxx_milfslikeitbig', 'Diamond Foxx in "Diamond\'s Bday Gangbang"', 'milfslikeitbig'],
['airtight', 'tory_lane_bigtitsatwork', 'Tory Lane in "I\'m Your Christmas Bonus"', 'bigtitsatwork'],
['airtight', 11, 'Malena Nazionale in "Rocco\'s Perverted Secretaries 2: Italian Edition"', 'roccosiffredi'],
['airtight', 3, 'Anita Bellini in "Triple Dick Gangbang"', 'handsonhardcore'],
@ -605,7 +606,7 @@ const tagMedia = [
['airtight', 'mina_ddfnetwork', 'Remy Lacroix in "Ass Worship 14"', 'julesjordan'],
['airtight', 1, 'Jynx Maze in "Pump My Ass Full of Cum 3"', 'julesjordan'],
['airtight', 10, 'Asa Akira in "Asa Akira To The Limit"', 'julesjordan'],
['airtight', 8, 'Veronica Leal in SZ2520'],
['airtight', 8, 'Veronica Leal in SZ2520', 'legalporno'],
['airtight', 5, 'Chloe Amour in "DP Masters 4"', 'julesjordan'],
['airtight', 9, 'Cindy Shine in GP1658'],
['anal', 5, 'Abella Danger', 'hardx'],
@ -660,6 +661,7 @@ const tagMedia = [
['blonde', 'shawna_lenee_sunrisekings', 'Shawna Lenee', 'sunrisekings'],
['blonde', 2, 'Isabelle Deltore', 'herlimit'],
['blowbang', 'ana_foxxx_hardx', 'Ana Foxxx in "Facialized Vol. 4"', 'hardx'],
['blowbang', 'angela_white_julesjordan', 'Angela White in "Her Biggest Gangbang Ever"', 'julesjordan'],
['blowbang', 'monika_fox_legalporno', 'Monika Fox in GL479', 'legalporno'],
['blowbang', 0, 'Lacy Lennon in "Lacy Lennon\'s First Blowbang"', 'hardx'],
['blowbang', 'zaawaadi_roccosiffredi_1', 'Zaawaadi in "My Name Is Zaawaadi"', 'roccosiffredi'],
@ -733,6 +735,7 @@ const tagMedia = [
['deepthroat', 4, 'Tammy', 'youngthroats'],
['deepthroat', 1, 'Jynx Maze in "Slutty and Sluttier 13"', 'evilangel'],
['deepthroat', 0, 'Chanel Grey in "Deepthroating Is Fun"', 'throated'],
['desi', 'priya_rai_hollyrandall', 'Priya Rai in "Dildo Fun"', 'hollyrandall'],
['doggy-style', 'jane_wilde_evilangel', 'Jane Wilde and Brock Cooper in "The Cock Hungry Chronicles"', 'evilangel'],
['double-blowjob', 1, 'Veronica Rodriguez and Penny Pax in "Fucking Older Guys 5"', 'penthouse'],
['double-blowjob', 0, 'Kira Noir and Kali Roses', 'brazzers'],
@ -780,10 +783,11 @@ const tagMedia = [
['dp', 3, 'Hime Marie in AA047', 'legalporno'],
['dp', 'kenna_james_tushy_1', 'Kenna James in "Yoga Retreat', 'tushy'],
['dp', 2, 'Megan Rain in "DP Masters 4"', 'julesjordan'],
['dp', 6, 'Kira Noir', 'hardx'],
['dp', 'silvia_dellai_dpfanatics', 'Silvia Dellai in "Tempting Promises"', 'dpfanatics'],
['dp', 'lara_frost_legalporno', 'Lara Frost in NRX070', 'legalporno'],
['dp', 6, 'Kira Noir', 'hardx'],
['dp', 5, 'Lana Rhoades in "Gangbang Me 3"', 'hardx'],
['dp', 'silvia_dellai_dpfanatics', 'Silvia Dellai in "Tempting Promises"', 'dpfanatics'],
['dp', 'diamond_foxxx_milfslikeitbig', 'Diamond Foxxx in "Deep Cover...Deeper Throat"', 'milfslikeitbig'],
['dp', 'zaawaadi_roccosiffredi', 'Zaawaadi in "My Name Is Zaawaadi"', 'roccosiffredi'],
['dp', 7, 'Chloe Lamour in "DP Masters 7"', 'julesjordan'],
['dp', 'poster', 'Mia Malkova in "DP Me 8"', 'hardx'],
@ -814,6 +818,7 @@ const tagMedia = [
['facial', 2, 'Ashly Anderson', 'hookuphotshot'],
['facial', 4, 'Kendra Heart', 'facialsforever'],
['flexible', 'lara_frost_legalporno', 'Lara Frost in NRX059', 'legalporno'],
['electric-shock', 'tia_ling_electrosluts', 'Tia Ling', 'electrosluts'],
['enhanced-boobs', 7, 'Charley Atwell', 'icandigirls'],
['enhanced-boobs', 14, 'Rikki Six', 'dreamdolls'],
['enhanced-boobs', 2, 'Gia Milana in "Hot Anal Latina"', 'hardx'],
@ -834,8 +839,10 @@ const tagMedia = [
['enhanced-boobs', 'kelly_oliveira_teenfidelity', 'Kelly Oliveira in "Keep It Going"', 'teenfidelity'],
['enhanced-boobs', 9, 'Putri Cinta', 'watch4beauty'],
['enhanced-boobs', 'alexis_zara_wildoncam', 'Alexis Zara', 'wildoncam'],
['enhanced-boobs', 'diamond_foxxx_brazzers_1', 'Diamond Foxxx in "Big Cock Inheritor"', 'brazzers'],
['enhanced-boobs', 'silvia_dellai_pornworld', 'Silvia Dellai in GP1966', 'pornworld'],
['enhanced-boobs', 3, 'Ashly Anderson', 'passionhd'],
['enhanced-boobs', 'diamond_brandi_kendra_jewels_brazzers', 'Diamond Jackson, Brandi Love, Johnny Sins, Kendra Lust and Jewels Jade in "Miss Titness America"', 'brazzers'],
['enhanced-boobs', 'jennifer_mendez_bangbros', 'Jennifer Mendez', 'bangbros'],
['enhanced-boobs', 'charlie_atwell_photodromm', 'Charley Atwell', 'photodromm'],
['enhanced-boobs', 'clanddi_jinkcego_ddfbusty_5', 'Clanddi Jinkcego & Rebecca Jessop', 'ddfbusty'],
@ -899,21 +906,25 @@ const tagMedia = [
['latina', 1, 'Jynx Maze in "Big Anal Asses 2"', 'hardx'],
['latina', 0, 'Vienna Black', 'spizoo'],
['latina', 3, 'Gina Valentina', 'brazzers'],
['lesbian', 'honey_gold_kaylani_lei_brazzers', 'Honey Gold and Kaylani Lei', 'brazzers'],
['lesbian', 'honey_gold_kaylani_lei_brazzers', 'Kaylani Lei and Honey Gold', 'brazzers'],
['lesbian', 0, 'Jenna Sativa and Alina Lopez in "Opposites Attract"', 'girlgirl'],
['maid', 0, 'Whitney Wright in "Dredd Up Your Ass 2"', 'julesjordan'],
['maid', 1, 'Alessandra Jane', 'brazzers'],
['maid', 'priya_rai_hollyrandall', 'Priya Rai', 'hollyrandall'],
['milf', 'nikki_benz_bigwetbutts', 'Nikki Benz in "Pantyhose Playtime"', 'bigwetbutts'],
['milf', 'diamond_foxxx_brazzers_2', 'Diamond Foxxx in "Two MILFs, One Cock"', 'brazzers'],
['milf', 'silvia_saige_ddfnetwork', 'Silvia Saige', 'pornworld'],
['milf', 'kaylani_lei_puremature', 'Kaylani Lei', 'puremature'],
['milf', 2, 'Shalina Devine', 'analmom'],
['milf', 1, 'Francesca Le', 'evilangel'],
['milk', 'diamond_foxxx_mommygotboobs', 'Diamond Foxxx in "Midnight Milk"', 'mommygotboobs'],
['mff', 'zaawaadi_asia_rae_allblackx_3', 'Zaawaadi and Asia Rae in "All Black Threesome"', 'allblackx'],
['mff', 0, 'Madison Ivy, Adriana Chechik and Keiran Lee in "Day With A Pornstar"', 'brazzers'],
['mff', 1, 'Anikka Albrite, Kelsi Monroe and Mick Blue', 'hardx'],
['mff', 'azul_hermosa_isis_love_pornstarslikeitbig', 'Azul Hermosa, Isis Love and Zac Wild in "She Likes Rough Quickies"', 'pornstarslikeitbig'],
['mfm', 'kyra_black_21sextury', 'Kyra Black', '21sextury'],
['mfm', 0, 'Vina Sky in "Jules Jordan\'s Three Ways"', 'julesjordan'],
['mfm', 'anne_amari_wefuckblackgirls', 'Anne Amari', 'wefuckblackgirls'],
['mfm', 8, 'Ariana Marie in "DP Masters 7"', 'julesjordan'],
['mfm', 1, 'Lana Rhoades in "Gangbang Me 3"', 'hardx'],
['mfm', 'franceska_jaimes_digitalplayground', 'Franceska Jaimes in "Monarch"', 'digitalplayground'],
@ -1004,6 +1015,8 @@ const tagMedia = [
['toy-dp', 0, 'Marley Brinx, Ivy Lebelle and Lyra Law in "Marley Brinx First GGDP"', 'lesbianx'],
['toys', 1, 'Chloe Lamour in "Curives In All The Right Places"', 'wetandpuffy'],
['toys', 'shawna_lenee_sunrisekings', 'Shawna Lenee', 'sunrisekings'],
['triple-penetration', 'angela_white_julesjordan', 'Angela White in "Her Biggest Gangbang Ever"', 'julesjordan'],
['triple-penetration', 'ria_sunn_legalporno', 'Ria Sunn in SZ2082', 'legalporno'],
['tvp', 'september_reign_wefuckblackgirls', 'September Reign in "Second Appearance"', 'wefuckblackgirls'],
['trainbang', 'poster', 'Kali Roses in "Passing Me Around"', 'blacked'],
['trainbang', 'gina_gerson_assholefever', 'Gina Gerson in "Oppa Gangbang Style"', 'assholefever'],

View File

@ -10,21 +10,26 @@ function scrapeAll(scenes) {
release.url = query.url('a');
release.entryId = release.url.match(/\/watch\/(\d+)/)[1];
release.title = query.cnt('.bottom .h4') || query.q('.bottom .link', 'title');
release.date = query.date('.extra-info:not(.actors)', 'DD MMMM YYYY');
release.title = query.cnt('[class*="item-title"] a') || query.q('.bottom .link', 'title');
release.date = query.date('[class*="item-date"]', 'MMM DD, YYYY');
release.actors = query.all('.actors a strong', true);
release.actors = query.all('[class*="item-actors"] a').map(el => ({
name: query.cnt(el),
url: query.url(el, null),
}));
// const poster = query.img('.thumb img');
const poster = query.img('.thumb', 'data-bg');
const poster = query.img('[class*="thumb"]', 'data-bg');
release.poster = [
poster.replace('512x288', '1472x828'),
poster,
];
if (poster) {
release.poster = [
poster.replace('512x288', '1472x828'),
poster,
];
}
release.teaser = { src: query.video('video-preview') };
release.channel = slugify(query.q('.bottom a[href*="/channels"]', 'title'), '');
release.teaser = query.video('video-preview');
release.channel = slugify(query.q('[class*="item-channel"] a', 'title'), '');
return release;
});
@ -38,21 +43,22 @@ function scrapeScene({ query }, url) {
release.description = query.meta('name=description') || query.q('read-even-more', true);
release.date = query.date('.h5-published', 'DD MMMM YYYY', /\d{1,2} \w+ \d{4}/);
release.actors = query.all('.video-top-details .actors a[href*="/models"]', true);
release.date = query.date('.h5-published', 'MMM DD, YYYY', /\w{3} \d{1,2}, \d{4}/);
release.actors = query.all('.video-top-details .actors a[href*="/models"]').map(el => ({
name: query.cnt(el),
url: query.url(el, null),
}));
release.duration = query.dur('meta[itemprop="duration"]', null, 'content');
release.tags = query.all('.video-top-details a[href*="/categories"], .video-top-details a[href*="/tags"]', true);
release.poster = query.img('.poster img') || query.meta('itemprop=thumbnailUrl');
release.photos = query.imgs('#gallery-thumbs picture img').slice(1).map(photo => [ // first image is poster
release.photos = query.imgs('#gallery-thumbs [class*="thumb"]', 'data-bg').slice(1).map(photo => [ // first image is poster
photo.replace('512x288', '1472x828'),
photo,
]);
release.trailer = {
src: query.meta('itemprop=contentURL'),
};
release.trailer = query.meta('itemprop=contentURL');
release.channel = slugify(query.q('.video-top-details .actors a[href*="/channels"] strong', true), '');
@ -60,7 +66,7 @@ function scrapeScene({ query }, url) {
}
async function fetchActorReleases({ query }, url, page = 1, accReleases = []) {
const releases = scrapeAll(qu.initAll(query.all('.main-content .card-video')));
const releases = scrapeAll(qu.initAll(query.all('.main-content .card-video, .main-content .global-video-card')));
const hasNextPage = query.exists('.pages a.active + a');
if (hasNextPage) {
@ -77,15 +83,23 @@ async function fetchActorReleases({ query }, url, page = 1, accReleases = []) {
async function scrapeProfile({ query }, url, include) {
const profile = {};
const keys = query.all('.col .h4:not(.more-less-actors)', true);
const values = query.all('.col .h4-big', true);
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
const bio = query.all('[class*="list"] [class*="list-item"]').reduce((acc, el) => ({
...acc,
[slugify(query.text(el), '_')]: query.cnt(el, 'span'),
}), {});
const tags = query.all('.more-less-actors a[href*="/tags"], .more-less-actors a[href*="/categories"]', true);
const tags = query.cnts(`
[class$="description"] [class*="more-less"] a[href*="/categories"],
[class$="description"] [class*="more-less"] a[href*="/tags"],
[class*="seo-text"] a[href*="/categories"],
[class*="seo-text"] a[href*="/tags"]
`);
profile.nationality = bio.nationality;
profile.placeOfBirth = bio.birth_place;
profile.dateOfBirth = qu.extractDate(bio.birth_date, 'MMM D, YYYY');
if (/enhanced/i.test(bio.tits_type)) profile.naturalBoobs = false;
if (/natural/i.test(bio.tits_type)) profile.naturalBoobs = true;
@ -95,8 +109,8 @@ async function scrapeProfile({ query }, url, include) {
if (tags.includes('tattoo') || tags.includes('tattoos')) profile.hasTattoos = true;
if (tags.includes('piercing') || tags.includes('piercings')) profile.hasPiercings = true;
profile.description = query.q('.more-less-actors read-even-more', true);
profile.avatar = query.img('.avatar .thumb img') || null;
profile.description = query.text('[class$="description"] [class*="more-less"]');
profile.avatar = query.img('[class*="poster"] img') || null;
if (include.releases) {
profile.releases = await fetchActorReleases({ query }, url);
@ -106,7 +120,7 @@ async function scrapeProfile({ query }, url, include) {
}
async function fetchLatest(channel, page = 1) {
const res = await qu.getAll(`${channel.url}?page=${page}`, '.main-content .card-video');
const res = await qu.getAll(`${channel.url}?page=${page}`, '.main-content .card-video, .main-content .global-video-card');
return res.ok ? scrapeAll(res.items, channel) : res.status;
}