diff --git a/src/scrapers/cumlouder.js b/src/scrapers/cumlouder.js index a36e98c8..c39d7e8e 100755 --- a/src/scrapers/cumlouder.js +++ b/src/scrapers/cumlouder.js @@ -1,8 +1,8 @@ 'use strict'; +const unprint = require('unprint'); const { decode } = require('html-entities'); -const qu = require('../utils/qu'); const slugify = require('../utils/slugify'); function scrapeAll(items, _channel) { @@ -12,13 +12,13 @@ function scrapeAll(items, _channel) { const { date, precision } = query.dateAgo('.fecha'); const poster = query.img('.thumb'); - release.entryId = query.number(null, /\d+/, 'onclick'); - release.url = query.url(null, 'href', { origin: 'https://www.cumlouder.com' }); + release.url = query.url(null, { origin: 'https://www.cumlouder.com' }); + release.entryId = new URL(release.url).pathname.match(/video\/([\w-]+)/)?.[1]; release.date = date; release.datePrecision = precision; - release.title = query.cnt('h2'); + release.title = query.content('h2'); release.duration = query.duration('.minutos'); release.poster = [ @@ -30,26 +30,36 @@ function scrapeAll(items, _channel) { }); } -function scrapeScene({ query }, channel, html) { +async function fetchLatest(channel, page) { + const res = await unprint.get(`${channel.url}/${page}/`, { selectAll: '.muestra-escena' }); + + if (res.ok) { + return scrapeAll(res.context, channel); + } + + return res.status; +} + +function scrapeScene({ query, html }, url) { const release = {}; const { date, precision } = query.dateAgo('.sub-video .added'); - release.entryId = html.match(/cumlouder_(\d+)/)?.[1]; + release.entryId = new URL(url).pathname.match(/video\/([\w-]+)/)?.[1]; - release.title = query.cnt('.video-top h1'); + release.title = query.content('.video-top h1'); release.description = query.text('.sub-video p'); release.date = date; release.datePrecision = precision; release.actors = query.all('.sub-video .pornstar-link').map((el) => ({ - name: query.cnt(el, null), - url: query.url(el, null, 'href', { origin: 'https://www.cumlouder.com' }), + name: unprint.query.content(el, null), + url: unprint.query.url(el, null, { origin: 'https://www.cumlouder.com' }), })); release.duration = query.duration('.video-top .duracion'); - release.tags = query.cnts('.video-top .tag-link'); + release.tags = query.contents('.video-top .tag-link'); release.poster = query.poster() || html.match(/urlImg\s*=\s*'(.*)';/)?.[1]; release.video = query.video() || decode(html.match(/urlVideo\s*=\s*'(.*)';/)?.[1]); // no trailers but full-length videos @@ -59,55 +69,47 @@ function scrapeScene({ query }, channel, html) { return release; } -function scrapeProfile({ query, el }, channel) { +async function fetchScene(url, channel) { + const res = await unprint.get(url); + + if (res.ok) { + return scrapeScene(res.context, url, channel); + } + + return res.status; +} + +function scrapeProfile({ query }, channel) { const profile = {}; const bio = query.all('.data-bio li').reduce((acc, bioEl) => ({ ...acc, - [slugify(query.cnt(bioEl, 'strong'), '_')]: query.text(bioEl), + [slugify(unprint.query.content(bioEl, 'strong'), '_')]: unprint.query.text(bioEl), }), {}); profile.nationality = bio.nationality; - profile.dateOfBirth = qu.extractDate(bio.date_of_birth, 'DD-MM-YYYY'); + profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'DD-MM-YYYY'); profile.height = Number(bio.height) * 100; profile.weight = parseInt(bio.weight, 10); profile.eyes = bio.eye_color; profile.hairColor = bio.hair_color; - profile.description = query.cnt('.data-bio p:last-of-type'); + profile.description = query.content('.data-bio p:last-of-type'); profile.avatar = query.img('.thumb-bio'); - profile.scenes = scrapeAll(qu.initAll(el, '.muestra-escena'), channel); + profile.socials = query.urls('a.twitter-timeline'); + + profile.scenes = scrapeAll(unprint.initAll(query.all('.muestra-escena')), channel); return profile; } -async function fetchLatest(channel, page) { - const res = await qu.getAll(`${channel.url}/${page}/`, '.muestra-escena'); - - if (res.ok) { - return scrapeAll(res.items, channel); - } - - return res.status; -} - -async function fetchScene(url, channel) { - const res = await qu.get(url); - - if (res.ok) { - return scrapeScene(res.item, channel, res.html); - } - - return res.status; -} - async function fetchProfile(actor, channel) { - const res = await qu.get(`https://www.cumlouder.com/girl/${actor.slug}/`, '.listado-escenas'); + const res = await unprint.get(`https://www.cumlouder.com/girl/${actor.slug}/`, { select: '.listado-escenas' }); if (res.ok) { - return scrapeProfile(res.item, channel); + return scrapeProfile(res.context, channel); } return res.status; diff --git a/tests/profiles.js b/tests/profiles.js index e77eb21e..83da5ab0 100644 --- a/tests/profiles.js +++ b/tests/profiles.js @@ -217,45 +217,47 @@ const actors = [ { entity: 'modelmediaasia', name: 'Li WeiWei', fields: ['avatar', 'entryId', 'gender', 'alias', 'height', 'weight', 'bust', 'waist', 'hip', 'socials'] }, { entity: 'delphine', name: 'Bridgette B', fields: ['avatar', 'measurements', 'birthPlace'] }, // etc. + { entity: 'adultempire', name: 'Abella Danger', fields: ['avatar', 'description', 'measurements', 'eyes', 'height', 'weight'] }, { entity: 'analvids', name: 'Veronica Leal', fields: ['avatar', 'gender', 'birthCountry', 'nationality', 'age', 'aliases', 'nationality'] }, + { entity: 'angelogodshackoriginal', name: 'Emily Pink', fields: ['avatar'] }, + { entity: 'bang', name: 'Riley Reid', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'ethnicity', 'hairColor', 'eyes'] }, { entity: 'bangbros', name: 'Kira Perez', fields: ['avatar', 'gender', 'ethnicity', 'hairColor'] }, + { entity: 'boobpedia', name: 'Paige British', fields: ['avatar'] }, + { entity: 'boyfun', name: 'Amahd Passer', fields: ['avatar', 'age', 'height', 'weight', 'penisLength', 'isCircumcised'] }, + { entity: 'bradmontana', name: 'Alicia Ribeiro', fields: ['avatar', 'gender'] }, + { entity: 'cherrypimps', name: 'Andi Avalon', fields: ['avatar', 'height', 'weight', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'hair', 'eyes', 'hasTattoos', 'age'] }, + { entity: 'cumlouder', name: 'Valentina Nappi', fields: ['avatar', 'nationality', 'dateOfBirth', 'height', 'weight', 'eyes', 'hairColor', 'description', 'socials'] }, + { entity: 'dorcelclub', name: 'Clea Gaultier', fields: ['avatar'] }, + { entity: 'doubleviewcasting', name: 'Abigaile Johnson', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'weight', 'height', 'measurements'] }, + { entity: 'firstanalquest', name: 'Abigaile Johnson', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'weight', 'height', 'measurements'] }, + { entity: 'freeones', name: 'Sophia Locke', fields: ['avatar', 'description', 'dateOfBirth', 'age', 'birthPlace', 'nationality', 'ethnicity', 'eyes', 'hairColor', 'bust', 'cup', 'waist', 'hip', 'height', 'weight', 'foot', 'socials', 'hasTattoos', 'tattoos', 'hasPiercings', 'piercings', 'naturalBoobs'] }, + { entity: 'fullpornnetwork', name: 'Kenzie Reeves', fields: ['avatar', 'description'] }, + { entity: 'hitzefrei', name: 'Jolee Love', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hair', 'description'] }, { entity: 'hookuphotshot', name: 'Kenzie Reeves', fields: ['avatar', 'description'] }, + { entity: 'inthecrack', name: 'Vicki Chase', fields: ['dateOfBirth', 'height', 'weight', 'ethnicity', 'birthPlace'] }, + { entity: 'karups', name: 'Peach Lollypop', fields: ['avatar'] }, + { entity: 'littlecapricedreams', name: 'Littlecaprice', fields: ['avatar', 'nationality', 'cup', 'measurements', 'height', 'description'] }, // sic + { entity: 'mariskax', name: 'Honey Demon', fields: ['avatar', 'gender', 'dateOfBirth', 'placeOfBirth', 'measurements', 'height', 'weight', 'hairColor', 'eyes'] }, + { entity: 'meidenvanholland', name: 'Izzy Bizzy Bang Bang', fields: ['avatar', 'description'] }, + { entity: 'nebraskacoeds', name: 'Mary Beth Haglin', fields: ['avatar'] }, + { entity: 'pascalssubsluts', name: 'Zlata Shine', fields: ['avatar', 'gender', 'nationality', 'hairColor', 'height', 'description'] }, + { entity: 'pierrewoodman', name: 'Abby Lee Brazil', fields: ['avatar', 'nationality'] }, + { entity: 'porncz', name: 'Kama Oxi', fields: ['avatar', 'gender', 'birthCountry', 'ethnicity', 'age', 'hairColor', 'cup', 'naturalBoobs', 'hasTattoos'] }, + { entity: 'pornhub', name: 'Lexi Luna', fields: ['avatar', 'gender', 'ethnicity', 'description', 'birthPlace', 'measurements', 'naturalBoobs', 'height', 'weight', 'hairColor', 'hasPiercings', 'hasTattoos'] }, { entity: 'pornworld', name: 'Veronica Leal', fields: ['avatar', 'nationality', 'age'] }, { entity: 'private', name: 'Cherry Kiss', fields: ['avatar', 'description', 'nationality', 'measurements', 'height', 'weight', 'hairColor', 'eye', 'hasTattoos', 'tattoos', 'hasPiercings', 'piercings'] }, + { entity: 'rickysroom', name: 'Liz Jordan', fields: ['avatar', 'description', 'birthPlace', 'dateOfBirth', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] }, + { entity: 'score', name: 'Vanessa Blue', fields: ['avatar', 'gender', 'placeOfResidence', 'ethnicity', 'height', 'weight', 'measurements', 'hairColor', 'dateOfBirth'] }, + { entity: 'sexlikereal', name: 'Agatha Vega', fields: ['avatar', 'birthPlace', 'height', 'weight', 'description'] }, { entity: 'teenmegaworld', name: 'Sheri Vi', fields: ['avatar', 'description', 'hairColor', 'eyes'] }, + { entity: 'testedefudelidade', name: 'May Akemi', fields: ['avatar'] }, { entity: 'theflourishxxx', name: 'XWifeKaren', fields: ['avatar', 'description'] }, { entity: 'tokyohot', name: 'Mai Kawana', url: 'https://my.tokyo-hot.com/cast/2099/', fields: ['avatar', 'birthPlace', 'height', 'cup', 'bust', 'waist', 'hip', 'hairStyle', 'shoeSize', 'bloodType'] }, - { entity: 'rickysroom', name: 'Liz Jordan', fields: ['avatar', 'description', 'birthPlace', 'dateOfBirth', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] }, - { entity: 'cherrypimps', name: 'Andi Avalon', fields: ['avatar', 'height', 'weight', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'hair', 'eyes', 'hasTattoos', 'age'] }, - { entity: 'testedefudelidade', name: 'May Akemi', fields: ['avatar'] }, - { entity: 'sexlikereal', name: 'Agatha Vega', fields: ['avatar', 'birthPlace', 'height', 'weight', 'description'] }, - { entity: 'porncz', name: 'Kama Oxi', fields: ['avatar', 'gender', 'birthCountry', 'ethnicity', 'age', 'hairColor', 'cup', 'naturalBoobs', 'hasTattoos'] }, - { entity: 'score', name: 'Vanessa Blue', fields: ['avatar', 'gender', 'placeOfResidence', 'ethnicity', 'height', 'weight', 'measurements', 'hairColor', 'dateOfBirth'] }, - { entity: 'pierrewoodman', name: 'Abby Lee Brazil', fields: ['avatar', 'nationality'] }, { entity: 'wakeupnfuck', name: 'Abby Lee Brazil', fields: ['avatar', 'nationality'] }, - { entity: 'dorcelclub', name: 'Clea Gaultier', fields: ['avatar'] }, - { entity: 'hitzefrei', name: 'Jolee Love', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hair', 'description'] }, - { entity: 'mariskax', name: 'Honey Demon', fields: ['avatar', 'gender', 'dateOfBirth', 'placeOfBirth', 'measurements', 'height', 'weight', 'hairColor', 'eyes'] }, - { entity: 'pornhub', name: 'Lexi Luna', fields: ['avatar', 'gender', 'ethnicity', 'description', 'birthPlace', 'measurements', 'naturalBoobs', 'height', 'weight', 'hairColor', 'hasPiercings', 'hasTattoos'] }, - { entity: 'fullpornnetwork', name: 'Kenzie Reeves', fields: ['avatar', 'description'] }, - { entity: 'meidenvanholland', name: 'Izzy Bizzy Bang Bang', fields: ['avatar', 'description'] }, - { entity: 'karups', name: 'Peach Lollypop', fields: ['avatar'] }, - { entity: 'boyfun', name: 'Amahd Passer', fields: ['avatar', 'age', 'height', 'weight', 'penisLength', 'isCircumcised'] }, - { entity: 'bang', name: 'Riley Reid', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'ethnicity', 'hairColor', 'eyes'] }, - { entity: 'littlecapricedreams', name: 'Littlecaprice', fields: ['avatar', 'nationality', 'cup', 'measurements', 'height', 'description'] }, // sic - { entity: 'pascalssubsluts', name: 'Zlata Shine', fields: ['avatar', 'gender', 'nationality', 'hairColor', 'height', 'description'] }, - { entity: 'nebraskacoeds', name: 'Mary Beth Haglin', fields: ['avatar'] }, - { entity: 'firstanalquest', name: 'Abigaile Johnson', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'weight', 'height', 'measurements'] }, - { entity: 'doubleviewcasting', name: 'Abigaile Johnson', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'weight', 'height', 'measurements'] }, - { entity: 'boobpedia', name: 'Paige British', fields: ['avatar'] }, - { entity: 'angelogodshackoriginal', name: 'Emily Pink', fields: ['avatar'] }, - { entity: 'bradmontana', name: 'Alicia Ribeiro', fields: ['avatar', 'gender'] }, - { entity: 'adultempire', name: 'Abella Danger', fields: ['avatar', 'description', 'measurements', 'eyes', 'height', 'weight'] }, - { entity: 'freeones', name: 'Sophia Locke', fields: ['avatar', 'description', 'dateOfBirth', 'age', 'birthPlace', 'nationality', 'ethnicity', 'eyes', 'hairColor', 'bust', 'cup', 'waist', 'hip', 'height', 'weight', 'foot', 'socials', 'hasTattoos', 'tattoos', 'hasPiercings', 'piercings', 'naturalBoobs'] }, ]; const actorScrapers = scrapers.actors; -const source = argv.source?.[0] || null; +const sources = argv.sources || null; async function validateUrl(url, mime = 'image/') { if (!url) { @@ -336,7 +338,7 @@ async function init() { return; } - if (source && source !== entitySlug) { + if (sources && !sources.includes(entitySlug)) { // console.log('____', entitySlug); return; } @@ -363,8 +365,10 @@ async function init() { assert.fail('profile not found'); } - console.log(omit(profile, ['scenes'])); - console.log('Untested fields', Object.entries(profile).filter(([field, value]) => !actor.fields.includes(field) && typeof value !== 'undefined' && value !== null).map(([field]) => `'${field}'`).join(', ')); + if (argv.inspect) { + console.log(omit(profile, ['scenes'])); + console.log('Untested fields', Object.entries(profile).filter(([field, value]) => !actor.fields.includes(field) && typeof value !== 'undefined' && value !== null).map(([field]) => `'${field}'`).join(', ')); + } await Promise.all(actor.fields.map(async (field) => { assert.ok( @@ -378,6 +382,8 @@ async function init() { }); }, Promise.resolve()); + console.log(actors.length); + await knex.destroy(); }