diff --git a/config/default.js b/config/default.js index bacc5b12..a34a207e 100644 --- a/config/default.js +++ b/config/default.js @@ -11,10 +11,18 @@ module.exports = { }, // include: [], // exclude: [], + exclude: [ + ['famedigital', [ + 'lowartfilms', + ]], + ], profiles: [ - 'evilangel', [ - // Gamma; Evil angel and Wicked have their own assets + 'evilangel', + 'famedigital', + ], + [ + // Gamma; Evil Angel + Devil's Film and Wicked have their own assets 'xempire', 'blowpass', ], @@ -30,9 +38,9 @@ module.exports = { 'men', 'transangels', ], + 'wicked', 'brazzers', 'milehighmedia', - 'wicked', '21sextury', 'julesjordan', 'naughtyamerica', @@ -47,6 +55,7 @@ module.exports = { 'freeonesLegacy', ], fetchAfter: [1, 'week'], + nullDateLimit: 10, media: { path: './media', thumbnailSize: 320, // width for 16:9 will be exactly 576px diff --git a/public/img/logos/ddfnetwork/bustylover.png b/public/img/logos/ddfnetwork/bustylover.png new file mode 100644 index 00000000..fc240e8d Binary files /dev/null and b/public/img/logos/ddfnetwork/bustylover.png differ diff --git a/public/img/logos/ddfnetwork/favicon.png b/public/img/logos/ddfnetwork/favicon.png index 6fa4a3fd..3019bb9a 100644 Binary files a/public/img/logos/ddfnetwork/favicon.png and b/public/img/logos/ddfnetwork/favicon.png differ diff --git a/public/img/logos/ddfnetwork/fuckinhd.png b/public/img/logos/ddfnetwork/fuckinhd.png new file mode 100644 index 00000000..57907e15 Binary files /dev/null and b/public/img/logos/ddfnetwork/fuckinhd.png differ diff --git a/public/img/logos/ddfnetwork/misc/busty-lover.png b/public/img/logos/ddfnetwork/misc/busty-lover.png new file mode 100644 index 00000000..c15dcf62 Binary files /dev/null and b/public/img/logos/ddfnetwork/misc/busty-lover.png differ diff --git a/public/img/logos/ddfnetwork/misc/busty-lover.svg b/public/img/logos/ddfnetwork/misc/busty-lover.svg new file mode 100644 index 00000000..d2390e1d --- /dev/null +++ b/public/img/logos/ddfnetwork/misc/busty-lover.svg @@ -0,0 +1,96 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/public/img/logos/ddfnetwork/misc/fuck-in-hd.png b/public/img/logos/ddfnetwork/misc/fuck-in-hd.png new file mode 100644 index 00000000..2b7a82e0 Binary files /dev/null and b/public/img/logos/ddfnetwork/misc/fuck-in-hd.png differ diff --git a/public/img/logos/ddfnetwork/misc/fuck-in-hd.svg b/public/img/logos/ddfnetwork/misc/fuck-in-hd.svg new file mode 100644 index 00000000..6d7b4265 --- /dev/null +++ b/public/img/logos/ddfnetwork/misc/fuck-in-hd.svg @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/public/img/logos/famedigital/daringsex.png b/public/img/logos/famedigital/daringsex.png new file mode 100644 index 00000000..a582063d Binary files /dev/null and b/public/img/logos/famedigital/daringsex.png differ diff --git a/public/img/logos/famedigital/devilsfilm.png b/public/img/logos/famedigital/devilsfilm.png new file mode 100644 index 00000000..ea58d5d1 Binary files /dev/null and b/public/img/logos/famedigital/devilsfilm.png differ diff --git a/public/img/logos/famedigital/favicon.png b/public/img/logos/famedigital/favicon.png new file mode 100644 index 00000000..d2b7eb47 Binary files /dev/null and b/public/img/logos/famedigital/favicon.png differ diff --git a/public/img/logos/famedigital/lowartfilms.png b/public/img/logos/famedigital/lowartfilms.png new file mode 100644 index 00000000..350aa681 Binary files /dev/null and b/public/img/logos/famedigital/lowartfilms.png differ diff --git a/public/img/logos/famedigital/misc/daring-sex_original.png b/public/img/logos/famedigital/misc/daring-sex_original.png new file mode 100644 index 00000000..bca267c1 Binary files /dev/null and b/public/img/logos/famedigital/misc/daring-sex_original.png differ diff --git a/public/img/logos/famedigital/misc/devils-film.png b/public/img/logos/famedigital/misc/devils-film.png new file mode 100644 index 00000000..2462f2c9 Binary files /dev/null and b/public/img/logos/famedigital/misc/devils-film.png differ diff --git a/public/img/logos/famedigital/misc/devils-film.svg b/public/img/logos/famedigital/misc/devils-film.svg new file mode 100644 index 00000000..41e099e6 --- /dev/null +++ b/public/img/logos/famedigital/misc/devils-film.svg @@ -0,0 +1 @@ +devilsfilm-logo \ No newline at end of file diff --git a/public/img/logos/famedigital/misc/fame-digital.png b/public/img/logos/famedigital/misc/fame-digital.png new file mode 100644 index 00000000..f91ec59a Binary files /dev/null and b/public/img/logos/famedigital/misc/fame-digital.png differ diff --git a/public/img/logos/famedigital/misc/fame-digital_square.png b/public/img/logos/famedigital/misc/fame-digital_square.png new file mode 100644 index 00000000..c954e70f Binary files /dev/null and b/public/img/logos/famedigital/misc/fame-digital_square.png differ diff --git a/public/img/logos/famedigital/misc/peter-north.png b/public/img/logos/famedigital/misc/peter-north.png new file mode 100644 index 00000000..36b06e7b Binary files /dev/null and b/public/img/logos/famedigital/misc/peter-north.png differ diff --git a/public/img/logos/famedigital/misc/peter-north.svg b/public/img/logos/famedigital/misc/peter-north.svg new file mode 100644 index 00000000..59ea4fca --- /dev/null +++ b/public/img/logos/famedigital/misc/peter-north.svg @@ -0,0 +1,109 @@ + + + + + + + + + + + + + diff --git a/public/img/logos/famedigital/misc/rocco-siffredi.png b/public/img/logos/famedigital/misc/rocco-siffredi.png new file mode 100644 index 00000000..713df04d Binary files /dev/null and b/public/img/logos/famedigital/misc/rocco-siffredi.png differ diff --git a/public/img/logos/famedigital/misc/rocco-siffredi.svg b/public/img/logos/famedigital/misc/rocco-siffredi.svg new file mode 100644 index 00000000..9049fd3b --- /dev/null +++ b/public/img/logos/famedigital/misc/rocco-siffredi.svg @@ -0,0 +1,116 @@ + + + +rocco_logo_vers2 + + + + + + + + + + + + diff --git a/public/img/logos/famedigital/network.png b/public/img/logos/famedigital/network.png new file mode 100644 index 00000000..83b33c51 Binary files /dev/null and b/public/img/logos/famedigital/network.png differ diff --git a/public/img/logos/famedigital/peternorth.png b/public/img/logos/famedigital/peternorth.png new file mode 100644 index 00000000..e49d91d8 Binary files /dev/null and b/public/img/logos/famedigital/peternorth.png differ diff --git a/public/img/logos/famedigital/roccosiffredi.png b/public/img/logos/famedigital/roccosiffredi.png new file mode 100644 index 00000000..c297dd86 Binary files /dev/null and b/public/img/logos/famedigital/roccosiffredi.png differ diff --git a/public/img/logos/famedigital/silverstonedvd.png b/public/img/logos/famedigital/silverstonedvd.png new file mode 100644 index 00000000..2b0ee5f5 Binary files /dev/null and b/public/img/logos/famedigital/silverstonedvd.png differ diff --git a/public/img/logos/famedigital/silviasaint.png b/public/img/logos/famedigital/silviasaint.png new file mode 100644 index 00000000..15e05681 Binary files /dev/null and b/public/img/logos/famedigital/silviasaint.png differ diff --git a/public/img/logos/famedigital/whiteghetto.png b/public/img/logos/famedigital/whiteghetto.png new file mode 100644 index 00000000..ecad4c00 Binary files /dev/null and b/public/img/logos/famedigital/whiteghetto.png differ diff --git a/public/img/logos/teamskeet/favicon.png b/public/img/logos/teamskeet/favicon.png index 81515f32..ba0ea830 100644 Binary files a/public/img/logos/teamskeet/favicon.png and b/public/img/logos/teamskeet/favicon.png differ diff --git a/seeds/00_networks.js b/seeds/00_networks.js index ceb34400..cd13707f 100644 --- a/seeds/00_networks.js +++ b/seeds/00_networks.js @@ -42,6 +42,12 @@ const networks = [ url: 'https://ddfnetwork.com', description: 'European porn videos hub with exclusive VR, 4K and full HD XXX videos and hot sex photos of Europes finest porn star babes.', }, + { + slug: 'famedigital', + name: 'Fame Digital', + url: 'https://www.famedigital.com', + description: 'Watch and download thousands of the best porn videos at FameDigital.com, the largest porn network on the web! The hottest teens, MILFs and more pornstars are all here!', + }, { slug: 'digitalplayground', name: 'Digital Playground', diff --git a/seeds/01_sites.js b/seeds/01_sites.js index 7cad69b8..70167389 100644 --- a/seeds/01_sites.js +++ b/seeds/01_sites.js @@ -1007,6 +1007,23 @@ const sites = [ description: 'Fantasy Blowjobs & POV Cock Sucking Videos and Photos Produced in VR, 4K and full HD featuring Sexy European Pornstars', network: 'ddfnetwork', }, + { + slug: 'fuckinhd', + name: 'Fuck in HD', + url: 'https://fuckinhd.com', + description: 'HD Hardcore Sex & XXX Fantasy Porn Videos and Photos Produced in full HD featuring a Variety of Hardcore Porn Niches.', + network: 'ddfnetwork', + parameters: { native: true }, + enabled: false, // appears to be re-releases only + }, + { + slug: 'bustylover', + name: 'Busty Lover', + url: 'https://bustylover.com', + network: 'ddfnetwork', + parameters: { native: true }, + enabled: false, // appears to be re-releases only + }, // DIGITAL PLAYGROUND { slug: 'digitalplayground', @@ -1307,6 +1324,87 @@ const sites = [ description: '', network: 'fakehub', }, + // FAME DIGITAL + { + slug: 'devilsfilm', + name: 'Devil\'s Film', + url: 'https://www.devilsfilm.com', + description: 'Welcome to the best porn network, DevilsFilm.com, featuring teens, MILFs, trans and interracial porn with all of your favorite pornstars in 4k ultra HD!', + parameters: { api: true }, + network: 'famedigital', + }, + { + slug: 'lowartfilms', + name: 'Low Art Films', + url: 'https://www.lowartfilms.com', + description: 'Artistic Hardcore Porn Videos', + network: 'famedigital', + parameters: { + latest: '/en/All/scenes/0/latest/', + upcoming: '/en/All/scenes/0/upcoming', + }, + }, + { + slug: 'daringsex', + name: 'Daring Sex', + url: 'https://www.daringsexhd.com/', + description: 'Welcome the official Daring Sex site, home of high quality erotica, sensual porn and hardcore exploration of the darker side of sexuality. Here you will find a variety of videos for lovers looking for a bit of extra, or something darker with an element of control.', + network: 'famedigital', + parameters: { api: true }, + enabled: false, + }, + { + slug: 'peternorth', + name: 'Peter North', + url: 'https://www.peternorth.com', + description: 'PeterNorth.com features hundreds of cumshots and deepthroat blowjob videos with the hottest teens & MILFs. Watch 25 years of Peter North inside!', + network: 'famedigital', + parameters: { + latest: '/en/videos/AllCategories/0/3/0/All-Dvds/0/latest/', + upcoming: '/en/videos/AllCategories/0/3/0/All-Dvds/0/upcoming', + }, + }, + { + slug: 'roccosiffredi', + name: 'Rocco Siffredi', + url: 'https://www.roccosiffredi.com', + description: 'Welcome to the official RoccoSiffredi.com, the Italian Stallion, with hardcore anal fucking and rough sex from the man himself who has coined the term hardcore.', + parameters: { api: true }, + network: 'famedigital', + }, + { + slug: 'silverstonedvd', + name: 'Silverstone DVD', + url: 'https://www.silverstonedvd.com', + description: 'Welcome to SilverStoneDVDs.com to enjoy unlimited streaming & downloads of teen porn, hot latina anal, young and dumb blowjob, DPs and hardcore porn.', + network: 'famedigital', + parameters: { + latest: '/en/All/scenes/0/latest/', + upcoming: '/en/All/scenes/0/upcoming', + }, + }, + { + slug: 'silviasaint', + name: 'Silvia Saint', + url: 'https://www.silviasaint.com', + description: 'Welcome to Silvia Saint official website. You can see Silvia Saint videos, pictures and blog!', + network: 'famedigital', + parameters: { + latest: '/en/scenes/All/0/', + upcoming: '/en/scenes/All/0/1/upcoming', + }, + }, + { + slug: 'whiteghetto', + name: 'White Ghetto', + url: 'https://www.whiteghetto.com', + description: 'Welcome to WhiteGhetto.com. Home of MILFs, GILFs, Midget porn, Indian babes, hairy pussies and more unusual and oddity porn!', + network: 'famedigital', + parameters: { + latest: '/en/scenes/All/0/superCat/0/latest/', + upcoming: '/en/scenes/All/0/superCat/0/upcoming', + }, + }, // JAYS POV { slug: 'jayspov', diff --git a/src/actors.js b/src/actors.js index dbdf78f5..61ebbd66 100644 --- a/src/actors.js +++ b/src/actors.js @@ -351,6 +351,11 @@ async function scrapeActors(actorNames) { try { return await profileScrapers.reduce(async (outcome, { scraper, scraperSlug }) => outcome.catch(async () => { + if (!scraper) { + logger.warn(`No profile profile scraper available for ${scraperSlug}`); + throw Object.assign(new Error(`No profile scraper available for ${scraperSlug}`)); + } + logger.verbose(`Searching '${actorName}' on ${scraperSlug}`); const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug); @@ -371,7 +376,7 @@ async function scrapeActors(actorNames) { } catch (error) { if (error.warn !== false) { logger.warn(`Error in scraper ${source}: ${error.message}`); - logger.error(error.stack); + // logger.error(error.stack); } } @@ -380,6 +385,10 @@ async function scrapeActors(actorNames) { const profile = await mergeProfiles(profiles, actorEntry); + if (argv.inspect) { + console.log(profile); + } + if (profile === null) { logger.warn(`Could not find profile for actor '${actorName}'`); diff --git a/src/app.js b/src/app.js index 6d7a0a1d..43cb4830 100644 --- a/src/app.js +++ b/src/app.js @@ -25,7 +25,7 @@ async function init() { const actors = await scrapeActors(); if (argv.withReleases) { - const releases = actors.map(actor => actor.releases).flat(); + const releases = actors.map(actor => actor?.releases || []).flat(); await scrapeReleases(releases, null, 'scene'); } diff --git a/src/argv.js b/src/argv.js index 9e26c93f..509f290d 100644 --- a/src/argv.js +++ b/src/argv.js @@ -80,18 +80,23 @@ const { argv } = yargs describe: 'Don\'t fetch scenes older than', type: 'string', default: config.fetchAfter.join(' '), - alias: 'limit', }) - .option('pages', { - describe: 'Limit pages to scrape per site. Only used when no dates are found or --after is unset.', + .option('null-date-limit', { + describe: 'Limit amount of scenes when dates are missing.', type: 'number', - default: 1, + default: config.nullDateLimit, + alias: 'limit', }) .option('save', { describe: 'Save fetched releases to database', type: 'boolean', default: true, }) + .option('inspect', { + describe: 'Show data in console.', + type: 'boolean', + default: false, + }) .option('level', { describe: 'Log level', type: 'string', diff --git a/src/scrape-sites.js b/src/scrape-sites.js index 434f3b20..6843d634 100644 --- a/src/scrape-sites.js +++ b/src/scrape-sites.js @@ -51,14 +51,20 @@ async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), a if ( uniqueReleases.length > 0 - && (oldestReleaseOnPage || page < argv.pages) - && moment(oldestReleaseOnPage).isAfter(afterDate) + // && (oldestReleaseOnPage || page < argv.pages) + && (oldestReleaseOnPage + ? moment(oldestReleaseOnPage).isAfter(afterDate) + : accReleases.length + uniqueReleases.length < argv.nullDateLimit) ) { // oldest release on page is newer that specified limit, fetch next page return scrapeUniqueReleases(scraper, site, afterDate, accReleases.concat(uniqueReleases), page + 1); } - return accReleases.concat(uniqueReleases); + if (oldestReleaseOnPage) { + return accReleases.concat(uniqueReleases); + } + + return accReleases.concat(uniqueReleases).slice(0, argv.nullDateLimit); } async function scrapeUpcomingReleases(scraper, site) { @@ -105,7 +111,7 @@ async function scrapeSiteReleases(scraper, site) { ]); if (argv.upcoming) { - logger.info(`${site.name}: ${argv.latest ? `Found ${newReleases.length}` : 'Ignoring'} latest releases, ${argv.upcoming ? '' : 'ignoring '}${upcomingReleases.length || ''} upcoming releases`); + logger.info(`${site.name}: ${argv.latest ? `Found ${newReleases.length}` : 'Ignoring'} latest releases,${argv.upcoming ? ' ' : ' ignoring '}${upcomingReleases.length || '0'} upcoming releases`); } const baseReleases = [...newReleases, ...upcomingReleases]; diff --git a/src/scrapers/ddfnetwork.js b/src/scrapers/ddfnetwork.js index 0e5ed420..b4eb354e 100644 --- a/src/scrapers/ddfnetwork.js +++ b/src/scrapers/ddfnetwork.js @@ -1,101 +1,81 @@ 'use strict'; const bhttp = require('bhttp'); -const cheerio = require('cheerio'); -const { JSDOM } = require('jsdom'); -const moment = require('moment'); + +const { d, ex, exa, get } = require('../utils/q'); +const slugify = require('../utils/slugify'); /* eslint-disable newline-per-chained-call */ -function scrapeLatest(html, site) { - const $ = cheerio.load(html, { normalizeWhitespace: true }); - const sceneElements = $('.card.m-1').toArray(); +function scrapeAll(html, site, origin) { + return exa(html, '.card.m-1:not(.pornstar-card)').map(({ q, qa, qd }) => { + const release = {}; - return sceneElements.map((element) => { - const sceneLinkElement = $(element).find('a').first(); - const title = sceneLinkElement.attr('title'); - const url = `${site.url}${sceneLinkElement.attr('href')}`; - const entryId = url.split('/').slice(-1)[0]; + release.title = q('a', 'title'); + release.url = `${site?.url || origin || 'https://ddfnetwork.com'}${q('a', 'href')}`; + [release.entryId] = release.url.split('/').slice(-1); - const date = moment.utc($(element).find('small[datetime]').attr('datetime'), 'YYYY-MM-DD HH:mm:ss').toDate(); - const actors = $(element).find('.card-subtitle a').map((actorIndex, actorElement) => $(actorElement).text().trim()) - .toArray() - .filter(actor => actor); + release.date = qd('small[datetime]', 'YYYY-MM-DD HH:mm:ss', null, 'datetime'); + release.actors = qa('.card-subtitle a', true).filter(Boolean); - const duration = parseInt($(element).find('.card-info div:nth-child(2) .card-text').text(), 10) * 60; + const duration = parseInt(q('.card-info div:nth-child(2) .card-text', true), 10) * 60; + if (duration) release.duration = duration; - const poster = sceneLinkElement.find('img').attr('data-src'); + release.poster = q('img').dataset.src; - return { - url, - entryId, - title, - actors, - date, - duration, - poster, - rating: null, - site, - }; + return release; }); } -async function scrapeScene(html, url, site) { - const $ = cheerio.load(html, { normalizeWhitespace: true }); +async function scrapeScene(html, url, _site) { + const { q, qa, qd, qm, qp, qus } = ex(html); + const release = {}; - const entryId = url.split('/').slice(-1)[0]; - const title = $('meta[itemprop="name"]').attr('content'); - const description = $('.descr-box p').text(); // meta tags don't contain full description + [release.entryId] = url.split('/').slice(-1); - const dateProp = $('meta[itemprop="uploadDate"]').attr('content'); - const date = dateProp - ? moment.utc($('meta[itemprop="uploadDate"]').attr('content'), 'YYYY-MM-DD').toDate() - : moment.utc($('.title-border:nth-child(2) p').text(), 'MM.DD.YYYY').toDate(); - const actors = $('.pornstar-card > a').map((actorIndex, actorElement) => $(actorElement).attr('title')).toArray(); + release.title = qm('itemprop=name'); + release.description = q('.descr-box p', true); + release.date = qd('meta[itemprop=uploadDate]', 'YYYY-MM-DD', null, 'content') + || qd('.title-border:nth-child(2) p', 'MM.DD.YYYY'); - const likes = Number($('.info-panel.likes .likes').text()); - const duration = Number($('.info-panel.duration .duration').text().slice(0, -4)) * 60; + release.actors = qa('.pornstar-card > a', 'title'); + release.tags = qa('.tags-tab .tags a', true); - const tags = $('.tags-tab .tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); + release.duration = parseInt(q('.icon-video-red + span', true), 10) * 60; + release.likes = Number(q('.icon-like-red + span', true)); - const poster = $('#video').attr('poster'); - const photos = $('.photo-slider-guest .card a').map((photoIndex, photoElement) => $(photoElement).attr('href')).toArray(); + release.poster = qp(); + release.photos = qus('.photo-slider-guest .card a'); - const trailer540 = $('source[res="540"]').attr('src'); - const trailer720 = $('source[res="720"]').attr('src'); + release.trailer = qa('source[type="video/mp4"]').map(trailer => ({ + src: trailer.src, + quality: Number(trailer.attributes.res.value), + })); - return { - url, - entryId, - title, - description, - actors, - date, - duration, - tags, - poster, - photos, - trailer: [ - { - src: trailer720, - quality: 720, - }, - { - src: trailer540, - quality: 540, - }, - ], - rating: { - likes, - }, - site, - }; + return release; +} + +async function fetchActorReleases(urls) { + // DDF Network and DDF Network Stream list all scenes, exclude + const sources = urls.filter(url => !/ddfnetwork/.test(url)); + + const releases = await Promise.all(sources.map(async (url) => { + const { html } = await get(url); + + return scrapeAll(html, null, new URL(url).origin); + })); + + // DDF cross-releases scenes between sites, filter duplicates by entryId + return Object.values(releases + .flat() + .sort((releaseA, releaseB) => releaseB.date - releaseA.date) // sort by date so earliest scene remains + .reduce((acc, release) => ({ ...acc, [release.entryId]: release }), {})); } async function scrapeProfile(html, _url, actorName) { - const { document } = new JSDOM(html).window; + const { q, qa, qus } = ex(html); - const keys = Array.from(document.querySelectorAll('.about-title'), el => el.textContent.trim().replace(':', '')); - const values = Array.from(document.querySelectorAll('.about-info'), (el) => { + const keys = qa('.about-title', true).map(key => slugify(key, { delimiter: '_' })); + const values = qa('.about-info').map((el) => { if (el.children.length > 0) { return Array.from(el.children, child => child.textContent.trim()).join(', '); } @@ -104,9 +84,7 @@ async function scrapeProfile(html, _url, actorName) { }); const bio = keys.reduce((acc, key, index) => { - if (values[index] === '-') { - return acc; - } + if (values[index] === '-') return acc; return { ...acc, @@ -114,45 +92,49 @@ async function scrapeProfile(html, _url, actorName) { }; }, {}); - const descriptionEl = document.querySelector('.description-box'); - const avatarEl = document.querySelector('.pornstar-details .card-img-top'); - const profile = { name: actorName, }; - profile.birthdate = moment.utc(bio.Birthday, 'MMMM DD, YYYY').toDate(); - if (bio.Nationality) profile.nationality = bio.Nationality; + profile.description = q('.description-box', true); + profile.birthdate = d(bio.birthday, 'MMMM DD, YYYY'); - if (bio['Bra size']) [profile.bust] = bio['Bra size'].match(/\d+\w+/); - if (bio.Waist) profile.waist = Number(bio.Waist.match(/\d+/)[0]); - if (bio.Hips) profile.hip = Number(bio.Hips.match(/\d+/)[0]); + if (bio.nationality) profile.nationality = bio.nationality; - if (bio.Height) profile.height = Number(bio.Height.match(/\d{2,}/)[0]); + if (bio.bra_size) [profile.bust] = bio.bra_size.match(/\d+\w+/); + if (bio.waist) profile.waist = Number(bio.waist.match(/\d+/)[0]); + if (bio.hips) profile.hip = Number(bio.hips.match(/\d+/)[0]); - if (bio['Tit Style'] && bio['Tit Style'].match('Enhanced')) profile.naturalBoobs = false; - if (bio['Tit Style'] && bio['Tit Style'].match('Natural')) profile.naturalBoobs = true; + if (bio.height) profile.height = Number(bio.height.match(/\d{2,}/)[0]); - if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true; - if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true; + if (bio.tit_style && /Enhanced/.test(bio.tit_style)) profile.naturalBoobs = false; + if (bio.tit_style && /Natural/.test(bio.tit_style)) profile.naturalBoobs = true; - if (bio['Hair Style']) profile.hair = bio['Hair Style'].split(',')[0].trim().toLowerCase(); - if (bio['Eye Color']) profile.eyes = bio['Eye Color'].match(/\w+/)[0].toLowerCase(); + if (bio.body_art && /Tattoo/.test(bio.body_art)) profile.hasTattoos = true; + if (bio.body_art && /Piercing/.test(bio.body_art)) profile.hasPiercings = true; - if (bio['Shoe size']) profile.shoes = Number(bio['Shoe size'].split('|')[1]); + if (bio.hair_style) profile.hair = bio.hair_style.split(',')[0].trim().toLowerCase(); + if (bio.eye_color) profile.eyes = bio.eye_color.match(/\w+/)[0].toLowerCase(); - if (descriptionEl) profile.description = descriptionEl.textContent.trim(); + if (bio.shoe_size) profile.shoes = Number(bio.shoe_size.split('|')[1]); + + const avatarEl = q('.pornstar-details .card-img-top'); if (avatarEl && avatarEl.dataset.src.match('^//')) profile.avatar = `https:${avatarEl.dataset.src}`; + profile.releases = await fetchActorReleases(qus('.find-me-tab li a')); + return profile; } async function fetchLatest(site, page = 1) { - const url = `https://ddfnetwork.com/videos/search/latest/ever/${new URL(site.url).hostname}/-/${page}`; + const url = site.parameters?.native + ? `${site.url}/videos/search/latest/ever/allsite/-/${page}` + : `https://ddfnetwork.com/videos/search/latest/ever/${new URL(site.url).hostname}/-/${page}`; + console.log(url); const res = await bhttp.get(url); - return scrapeLatest(res.body.toString(), site); + return scrapeAll(res.body.toString(), site); } async function fetchScene(url, site) { diff --git a/src/scrapers/famedigital.js b/src/scrapers/famedigital.js new file mode 100644 index 00000000..e08765c6 --- /dev/null +++ b/src/scrapers/famedigital.js @@ -0,0 +1,72 @@ +'use strict'; + +const { fetchLatest, fetchApiLatest, fetchUpcoming, fetchApiUpcoming, fetchScene, fetchProfile, fetchApiProfile } = require('./gamma'); + +function extractLowArtActors(release) { + const actors = release.title + .replace(/solo/i, '') + .split(/,|\band\b/ig) + .map(actor => actor.trim()); + + return { + ...release, + actors, + }; +} + +async function networkFetchLatest(site, page = 1) { + if (site.parameters?.api) return fetchApiLatest(site, page, false); + + const releases = await fetchLatest(site, page); + + if (site.slug === 'lowartfilms') { + return releases.map(release => extractLowArtActors(release)); + } + + return releases; +} + +async function networkFetchScene(url, site) { + const release = await fetchScene(url, site); + + if (site.slug === 'lowartfilms') { + return extractLowArtActors(release); + } + + return release; +} + +async function networkFetchUpcoming(site, page = 1) { + if (site.parameters?.api) return fetchApiUpcoming(site, page, true); + + return fetchUpcoming(site, page); +} + +async function networkFetchProfile(actorName) { + // not all Fame Digital sites offer Gamma actors + const [devils, rocco, peter] = await Promise.all([ + fetchApiProfile(actorName, 'devilsfilm', true), + fetchApiProfile(actorName, 'roccosiffredi'), + fetchProfile(actorName, 'peternorth', true), + ]); + + if (devils || rocco || peter) { + const releases = [].concat(devils?.releases || [], rocco?.releases || [], peter?.releases || []); + + return { + ...peter, + ...rocco, + ...devils, + releases, + }; + } + + return null; +} + +module.exports = { + fetchLatest: networkFetchLatest, + fetchProfile: networkFetchProfile, + fetchScene: networkFetchScene, + fetchUpcoming: networkFetchUpcoming, +}; diff --git a/src/scrapers/gamma.js b/src/scrapers/gamma.js index d4da5501..afe767e2 100644 --- a/src/scrapers/gamma.js +++ b/src/scrapers/gamma.js @@ -111,18 +111,23 @@ function scrapeAll(html, site, useNetworkUrl) { const scenesElements = $('li[data-itemtype=scene]').toArray(); return scenesElements.map((element) => { + const release = {}; + const sceneLinkElement = $(element).find('.sceneTitle a'); - const url = `${useNetworkUrl ? site.network.url : site.url}${sceneLinkElement.attr('href')}`; - const title = sceneLinkElement.attr('title'); + release.url = `${useNetworkUrl ? site.network.url : site.url}${sceneLinkElement.attr('href')}`; + release.title = sceneLinkElement.attr('title'); - const entryId = $(element).attr('data-itemid'); + release.entryId = $(element).attr('data-itemid'); - const date = moment - .utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY') - .toDate(); + const dateEl = $(element).find('.sceneDate').text() || null; + if (dateEl) { + release.date = moment + .utc($(element).find('.sceneDate').text(), ['MM-DD-YYYY', 'YYYY-MM-DD']) + .toDate(); + } - const actors = $(element).find('.sceneActors a') + release.actors = $(element).find('.sceneActors a') .map((actorIndex, actorElement) => $(actorElement).attr('title')) .toArray(); @@ -130,27 +135,17 @@ function scrapeAll(html, site, useNetworkUrl) { .toArray() .map(value => Number($(value).text())); - const poster = $(element).find('.imgLink img').attr('data-original'); - const trailer = `https://videothumb.gammacdn.com/307x224/${entryId}.mp4`; + release.rating = { likes, dislikes }; - return { - url, - entryId, - title, - actors, - director: 'Mason', - date, - poster, - trailer: { - src: trailer, - quality: 224, - }, - rating: { - likes, - dislikes, - }, - site, + const posterEl = $(element).find('.imgLink img'); + if (posterEl) release.poster = posterEl.attr('data-original') || posterEl.attr('src'); + + release.teaser = { + src: `https://videothumb.gammacdn.com/307x224/${release.entryId}.mp4`, + quality: 224, }; + + return release; }); } @@ -161,33 +156,41 @@ async function scrapeScene(html, url, site) { const json = $('script[type="application/ld+json"]').html(); const videoJson = $('script:contains("window.ScenePlayerOptions")').html(); - const [data, data2] = JSON.parse(json); + const [data, data2] = json ? JSON.parse(json) : []; const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{'), videoJson.indexOf('};') + 1)); [release.entryId] = new URL(url).pathname.split('/').slice(-1); + release.title = data?.name || videoData.playerOptions.sceneInfos.sceneTitle; - release.title = data.name; - release.description = data.description; - - // date in data object is not the release date of the scene, but the date the entry was added + // date in data object is not the release date of the scene, but the date the entry was added; only use as fallback const dateString = $('.updatedDate').first().text().trim(); const dateMatch = dateString.match(/\d{2,4}-\d{2}-\d{2,4}/)?.[0]; - release.date = moment.utc(dateMatch, ['MM-DD-YYYY', 'YYYY-MM-DD']).toDate(); - release.director = data.director?.[0].name || data2?.director?.[0].name; - release.actors = (data.actor || data2.actor).map(actor => actor.name); - const hasTrans = (data.actor || data2.actor).some(actor => actor.gender === 'shemale'); + if (dateMatch) release.date = moment.utc(dateMatch, ['MM-DD-YYYY', 'YYYY-MM-DD']).toDate(); + else if (data?.dateCreated) release.date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate(); + else release.date = videoData.playerOptions.sceneInfos.sceneReleaseDate; - const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5; - if (stars) release.rating = { stars }; + if (data) { + release.description = data.description; + release.director = data.director?.[0].name || data2?.director?.[0].name; - release.duration = moment.duration(data.duration.slice(2).split(':')).asSeconds(); + const actors = data?.actor || data2?.actor || []; + const hasTrans = actors.some(actor => actor.gender === 'shemale'); + release.actors = actors.map(actor => actor.name); - const rawTags = data.keywords?.split(', '); - release.tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags; + const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5; + if (stars) release.rating = { stars }; + + release.duration = moment.duration(data.duration.slice(2)).asSeconds(); + + const rawTags = data.keywords?.split(', '); + release.tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags; + } release.poster = videoData.picPreview; - release.photos = await getPhotos($('.picturesItem a').attr('href'), site); + + const photoLink = $('.picturesItem a').attr('href'); + if (photoLink) release.photos = await getPhotos(photoLink, site); const trailer = `${videoData.playerOptions.host}${videoData.url}`; release.trailer = [ @@ -297,6 +300,11 @@ async function fetchApiCredentials(referer) { const body = res.body.toString(); const apiLine = body.split('\n').find(bodyLine => bodyLine.match('apiKey')); + + if (!apiLine) { + throw new Error(`Can not use Gamma API for ${referer}`); + } + const apiSerial = apiLine.slice(apiLine.indexOf('{'), apiLine.indexOf('};') + 1); const apiData = JSON.parse(apiSerial); @@ -321,7 +329,7 @@ async function fetchApiLatest(site, page = 1, upcoming = false) { requests: [ { indexName: 'all_scenes', - params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]`, + params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]&filters=sitename:${site.slug}`, }, ], }, { @@ -331,7 +339,11 @@ async function fetchApiLatest(site, page = 1, upcoming = false) { encodeJSON: true, }); - return scrapeApiReleases(res.body.results[0].hits, site); + if (res.statuscode === 200 && res.body.results?.[0]?.hits) { + return scrapeApiReleases(res.body.results[0].hits, site); + } + + return []; } async function fetchApiUpcoming(site) { @@ -339,14 +351,14 @@ async function fetchApiUpcoming(site) { } async function fetchLatest(site, page = 1) { - const url = `${site.url}/en/videos/AllCategories/0/${page}`; + const url = `${site.url}${site.parameters?.latest || '/en/videos/AllCategories/0/'}${page}`; const res = await bhttp.get(url); return scrapeAll(res.body.toString(), site); } async function fetchUpcoming(site) { - const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`); + const res = await bhttp.get(`${site.url}${site.parameters?.upcoming || '/en/videos/AllCategories/0/1/upcoming'}`); return scrapeAll(res.body.toString(), site); } @@ -362,7 +374,7 @@ async function fetchActorScenes(actorName, apiUrl, siteSlug) { requests: [ { indexName: 'all_scenes', - params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=0&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["actors.name:${actorName}"]]`, + params: `query=&filters=sitename:${siteSlug}&hitsPerPage=36&maxValuesPerFacet=100&page=0&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["actors.name:${actorName}"]]`, }, ], }, { @@ -408,7 +420,7 @@ async function fetchProfile(actorName, siteSlug, altSearchUrl) { async function fetchApiProfile(actorName, siteSlug) { const actorSlug = encodeURI(actorName); - const referer = `https://www.${siteSlug}.com/en/search?query=${actorSlug}&tab=actors`; + const referer = `https://www.${siteSlug}.com/en/search`; const { apiUrl } = await fetchApiCredentials(referer); diff --git a/src/scrapers/score.js b/src/scrapers/score.js index 544b40e2..bace2778 100644 --- a/src/scrapers/score.js +++ b/src/scrapers/score.js @@ -80,7 +80,7 @@ async function scrapeScene(html, url, site) { const actorEl = qa('.stat').find(stat => /Featuring/.test(stat.textContent)); const actorString = qtext(actorEl); - release.actors = actorString?.split(/, and |, /g) || []; + release.actors = actorString?.split(/,\band\b|,/g).map(actor => actor.trim()) || []; } if (release.actors.length === 0 && site.parameters?.actors) release.actors = site.parameters.actors; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 7af56a41..1f57ee1b 100644 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -26,6 +26,7 @@ const bangbros = require('./bangbros'); const blowpass = require('./blowpass'); const brazzers = require('./brazzers'); const ddfnetwork = require('./ddfnetwork'); +const famedigital = require('./famedigital'); const evilangel = require('./evilangel'); const julesjordan = require('./julesjordan'); const kellymadison = require('./kellymadison'); @@ -60,6 +61,7 @@ module.exports = { digitalplayground, dogfart, dogfartnetwork: dogfart, + famedigital, evilangel, fakehub, jayrock, @@ -96,6 +98,7 @@ module.exports = { brazzers, ddfnetwork, digitalplayground, + famedigital, evilangel, fakehub, freeones, diff --git a/src/scrapers/xempire.js b/src/scrapers/xempire.js index 355dcea8..99237194 100644 --- a/src/scrapers/xempire.js +++ b/src/scrapers/xempire.js @@ -14,6 +14,7 @@ async function fetchScene(url, site) { // const siteUrl = siteDomain && `https://www.${siteDomain}`; release.channel = siteSlug; + release.director = 'Mason'; return release; } diff --git a/src/sites.js b/src/sites.js index f85b5bfa..a9ec0d7b 100644 --- a/src/sites.js +++ b/src/sites.js @@ -133,6 +133,7 @@ async function fetchSitesFromConfig() { 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters', ) .leftJoin('networks', 'sites.network_id', 'networks.id') + .where('sites.enabled') .where((builder) => { if (config.include) { builder diff --git a/src/utils/q.js b/src/utils/q.js index 47ecbda6..6ce524b7 100644 --- a/src/utils/q.js +++ b/src/utils/q.js @@ -17,8 +17,8 @@ function q(context, selector, attrArg, trim = true) { if (attr) { const value = selector - ? context.querySelector(selector)?.[attr] - : context[attr]; + ? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value + : context[attr] || context[attr]?.attributes[attr]?.value; return trim ? value?.trim() : value; } @@ -50,16 +50,14 @@ function qtext(context, selector, trim = true) { } function qmeta(context, selector, attrArg = 'content', trim = true) { - return q(context, selector, attrArg, trim); + if (/meta\[.*\]/.test(selector)) { + return q(context, selector, attrArg, trim); + } + + return q(context, `meta[${selector}]`, attrArg, trim); } -function qdate(context, selector, format, match, attr = 'textContent') { - const dateString = selector - ? context.querySelector(selector)?.[attr] - : context[attr]; - - if (!dateString) return null; - +function date(dateString, format, match) { if (match) { const dateStamp = dateString.trim().match(match); @@ -70,6 +68,14 @@ function qdate(context, selector, format, match, attr = 'textContent') { return moment.utc(dateString.trim(), format).toDate(); } +function qdate(context, selector, format, match, attr = 'textContent') { + const dateString = q(context, selector, attr, true); + + if (!dateString) return null; + + return date(dateString, format, match); +} + function qimage(context, selector = 'img', attr = 'src', protocol = 'https') { const image = q(context, selector, attr); @@ -107,6 +113,12 @@ function qtrailer(context, selector = 'source', attr = 'src', protocol = 'https' return attr ? prefixProtocol(trailer, protocol) : trailer; } +function qtrailers(context, selector = 'source', attr = 'src', protocol = 'https') { + const trailers = qall(context, selector, attr); + + return attr ? trailers.map(trailer => prefixProtocol(trailer, protocol)) : trailers; +} + function qlength(context, selector, attr = 'textContent') { const durationString = q(context, selector, attr); @@ -133,8 +145,9 @@ const funcs = { qmeta, qtext, qtrailer, - qurls, + qtrailers, qurl, + qurls, qa: qall, qd: qdate, qi: qimage, @@ -143,6 +156,7 @@ const funcs = { ql: qlength, qm: qmeta, qt: qtrailer, + qts: qtrailers, qtx: qtext, qu: qurl, qus: qurls, @@ -161,6 +175,7 @@ function init(element, window) { return { element, + html: element.outerHTML || element.body.outerHTML, ...(window && { window, document: window.document, @@ -209,6 +224,7 @@ async function getAll(url, selector, headers) { } module.exports = { + date, extract, extractAll, init, @@ -217,6 +233,7 @@ module.exports = { getAll, context: init, contextAll: initAll, + d: date, ex: extract, exa: extractAll, ctx: init,