From 6d93083581a61c6c13a73298276f9114d9c504ba Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 3 Feb 2021 00:46:59 +0100 Subject: [PATCH] Removed superfluous MindGeek scrapers. --- config/default.js | 4 + seeds/01_networks.js | 9 ++ src/actors.js | 14 +- src/entities.js | 27 ++-- src/scrapers/babes.js | 13 -- src/scrapers/brazzers-legacy.js | 212 ------------------------------ src/scrapers/digitalplayground.js | 13 -- src/scrapers/fakehub.js | 13 -- src/scrapers/famedigital.js | 1 + src/scrapers/iconmale.js | 11 -- src/scrapers/men.js | 13 -- src/scrapers/metrohd.js | 13 -- src/scrapers/milehighmedia.js | 13 -- src/scrapers/mindgeek.js | 26 ++-- src/scrapers/mofos.js | 13 -- src/scrapers/realitykings.js | 53 -------- src/scrapers/scrapers.js | 43 ++---- src/scrapers/transangels.js | 11 -- src/scrapers/twistys.js | 13 -- src/utils/http-legacy.js | 146 -------------------- 20 files changed, 62 insertions(+), 599 deletions(-) delete mode 100644 src/scrapers/babes.js delete mode 100644 src/scrapers/brazzers-legacy.js delete mode 100644 src/scrapers/digitalplayground.js delete mode 100644 src/scrapers/fakehub.js delete mode 100644 src/scrapers/iconmale.js delete mode 100644 src/scrapers/men.js delete mode 100644 src/scrapers/metrohd.js delete mode 100644 src/scrapers/milehighmedia.js delete mode 100644 src/scrapers/mofos.js delete mode 100644 src/scrapers/realitykings.js delete mode 100644 src/scrapers/transangels.js delete mode 100644 src/scrapers/twistys.js delete mode 100644 src/utils/http-legacy.js diff --git a/config/default.js b/config/default.js index 474290cb3..c85b20ae4 100644 --- a/config/default.js +++ b/config/default.js @@ -234,6 +234,10 @@ module.exports = { interval: 1000, concurrency: 1, }, + 'www.realitykings.com': { + interval: 1000, + concurrency: 1, + }, }, fetchAfter: [1, 'week'], missingDateLimit: 3, diff --git a/seeds/01_networks.js b/seeds/01_networks.js index 017bf3d3f..5cac7b7fc 100644 --- a/seeds/01_networks.js +++ b/seeds/01_networks.js @@ -190,6 +190,9 @@ const networks = [ name: 'Digital Playground', url: 'https://www.digitalplayground.com', description: 'DigitalPlayground.com is the leader in high quality adult blockbuster movies and award winning sex parodies that feature the most exclusive pornstars online! Adult Film Database of adult movies.', + parameters: { + actorPath: 'modelprofile', + }, parent: 'mindgeek', }, { @@ -232,6 +235,9 @@ const networks = [ name: 'Fake Hub', url: 'https://www.fakehub.com', description: 'Wherever they go, there is porn. Hospital, Taxis, Casting… Maybe fucking to a fake cop, fake agent or fake taxi driver. And we record it all.', + parameters: { + actorPath: 'modelprofile', + }, parent: 'mindgeek', }, { @@ -359,6 +365,9 @@ const networks = [ name: 'Men', url: 'https://www.men.com', description: 'Check out the best gay porn site on the net with daily updates, award-winning original series, exclusive Men.com models and over 800 of the hottest guys in gay porn.', + parameters: { + actorPath: 'modelprofile', + }, parent: 'mindgeek', }, { diff --git a/src/actors.js b/src/actors.js index 42832eb8c..b5976d94d 100644 --- a/src/actors.js +++ b/src/actors.js @@ -23,6 +23,7 @@ const logger = require('./logger')(__filename); const { toBaseReleases } = require('./deep'); const { associateAvatars, flushOrphanedMedia } = require('./media'); +const { fetchEntitiesBySlug } = require('./entities'); const { deleteScenes } = require('./releases'); const slugify = require('./utils/slugify'); @@ -740,23 +741,14 @@ async function scrapeActors(argNames) { const sources = argv.profileSources || config.profiles || Object.keys(scrapers.actors); const entitySlugs = sources.flat(); - const [entities, existingActorEntries] = await Promise.all([ - knex('entities') - .select(knex.raw('entities.*, row_to_json(parents) as parent, json_agg(children) as children')) - .whereIn('entities.slug', entitySlugs) - .whereIn('entities.type', ['network', 'channel']) - .leftJoin('entities as parents', 'parents.id', 'entities.parent_id') - .leftJoin('entities as children', 'children.parent_id', 'entities.id') - .orderBy('entities.type') - .groupBy('entities.id', 'parents.id'), + const [entitiesBySlug, existingActorEntries] = await Promise.all([ + fetchEntitiesBySlug(entitySlugs, 'desc'), knex('actors') .select(['id', 'name', 'slug', 'entry_id']) .whereIn('slug', baseActors.map(baseActor => baseActor.slug)) .whereNull('alias_for'), ]); - const entitiesBySlug = entities.reduce((acc, entity) => ({ ...acc, [entity.slug]: acc[entity.slug] || entity }), {}); - const existingActorEntriesBySlugAndEntryId = existingActorEntries.reduce((acc, actorEntry) => ({ ...acc, [actorEntry.slug]: { diff --git a/src/entities.js b/src/entities.js index 6c4b325da..7ed749004 100644 --- a/src/entities.js +++ b/src/entities.js @@ -167,15 +167,7 @@ async function fetchIncludedEntities() { return curatedNetworks; } -async function fetchReleaseEntities(baseReleases) { - const baseReleasesWithoutEntity = baseReleases.filter(release => release.url && !release.site && !release.entity); - - const entitySlugs = Array.from(new Set( - baseReleasesWithoutEntity - .map(baseRelease => urlToSiteSlug(baseRelease.url)) - .filter(Boolean), - )); - +async function fetchEntitiesBySlug(entitySlugs, sort = 'asc') { const entities = await knex.raw(` WITH RECURSIVE entity_tree as ( SELECT to_jsonb(entities) as entity, @@ -197,8 +189,8 @@ async function fetchReleaseEntities(baseReleases) { LEFT JOIN entities AS children ON children.parent_id = (entity->>'id')::int WHERE entity_tree.parent_id IS NULL GROUP BY entity_tree.entity - ORDER BY entity->'type' ASC; - `, { entitySlugs }); + ORDER BY entity->'type' :sort; + `, { entitySlugs, sort: knex.raw(sort) }); // channel entity will overwrite network entity const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => ({ @@ -209,6 +201,18 @@ async function fetchReleaseEntities(baseReleases) { return entitiesBySlug; } +async function fetchReleaseEntities(baseReleases) { + const baseReleasesWithoutEntity = baseReleases.filter(release => release.url && !release.site && !release.entity); + + const entitySlugs = Array.from(new Set( + baseReleasesWithoutEntity + .map(baseRelease => urlToSiteSlug(baseRelease.url)) + .filter(Boolean), + )); + + return fetchEntitiesBySlug(entitySlugs); +} + async function fetchEntity(entityId, type) { const entity = await knex('entities') .select(knex.raw(` @@ -361,6 +365,7 @@ module.exports = { curateEntities, fetchIncludedEntities, fetchReleaseEntities, + fetchEntitiesBySlug, fetchEntity, fetchEntities, searchEntities, diff --git a/src/scrapers/babes.js b/src/scrapers/babes.js deleted file mode 100644 index 0f1ec228f..000000000 --- a/src/scrapers/babes.js +++ /dev/null @@ -1,13 +0,0 @@ -'use strict'; - -const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek'); - -async function networkFetchProfile({ name: actorName }) { - return fetchProfile({ name: actorName }, 'babes'); -} - -module.exports = { - fetchLatest, - fetchProfile: networkFetchProfile, - fetchScene, -}; diff --git a/src/scrapers/brazzers-legacy.js b/src/scrapers/brazzers-legacy.js deleted file mode 100644 index 8e8ee34ca..000000000 --- a/src/scrapers/brazzers-legacy.js +++ /dev/null @@ -1,212 +0,0 @@ -'use strict'; - -/* eslint-disable newline-per-chained-call */ -const qu = require('../utils/qu'); -const slugify = require('../utils/slugify'); -const { heightToCm, lbsToKg } = require('../utils/convert'); - -function scrapeAll(items, channel, upcoming) { - return items.reduce((acc, { query }) => { - const isUpcoming = query.exists('.icon-upcoming.active'); - - if ((upcoming && !isUpcoming) || (!upcoming && isUpcoming)) { - return acc; - } - - const release = {}; - const pathname = query.url('a'); - - release.url = `https://www.brazzers.com${pathname}`; - release.entryId = pathname.match(/(\/view\/id\/|\/episode\/)(\d+)/)[2]; - - release.title = query.q('a', 'title'); - release.date = query.date('time', 'MMMM DD, YYYY'); - - release.actors = query.all('.model-names a', 'title'); - - release.likes = query.number('.label-rating .like-amount'); - release.dislikes = query.number('.label-rating .dislike-amount'); - - release.poster = query.img('.card-main-img'); - release.photos = query.imgs('.card-overlay .image-under'); - - release.channel = slugify(query.q('.collection', 'title'), ''); - - return acc.concat(release); - }, []); -} - -function getVideoData(html) { - try { - const videoScriptStart = html.indexOf('window.videoUiOptions'); - const videoScript = html.slice(videoScriptStart, html.indexOf('};', videoScriptStart)); - const videoString = videoScript.slice(videoScript.indexOf('{"stream_info"'), videoScript.lastIndexOf('},') + 1); - - return JSON.parse(videoString); - } catch (error) { - return null; - } -} - -async function scrapeScene({ query, html }, url, _site) { - const release = {}; - - release.entryId = new URL(url).pathname.match(/(\/view\/id\/|\/episode\/)(\d+)/)[2]; - - release.title = query.q('.scene-title[itemprop="name"]', true); - release.description = query.text('#scene-description p[itemprop="description"]'); - - release.date = query.date('.more-scene-info .scene-date', 'MMMM DD, YYYY'); - release.duration = query.number('#trailer-player-container', 'data-duration') // more accurate - || query.number('.scene-length[itemprop="duration"]', 'content') * 60; // fallback - - // actor cards have avatar, but truncated name - const actorImagesByActorId = query.imgs('.featured-model .card-image img').reduce((acc, img) => ({ - ...acc, - [img.match(/\/models\/(\d+)/)[1]]: [ - img.replace('medium', 'large'), - img, - ], - }), {}); - - release.actors = query.all('.related-model a').map((actorEl) => { - const name = query.q(actorEl, null, 'title'); - const avatar = actorImagesByActorId[query.url(actorEl, null).match(/\/view\/id\/(\d+)/)?.[1]]; - - return { name, avatar }; - }); - - release.likes = query.number('.label-rating .like'); - release.dislikes = query.number('.label-rating .dislike'); - - const tags = query.all('.tag-card-container a', true); - const categories = query.all('.timeline a[href*="/categories"]', 'title'); - - release.tags = tags.concat(categories); - release.channel = slugify(query.q('.scene-site .label-text', true) || query.q('.niche-site-logo', 'title'), ''); - - const videoData = getVideoData(html); - const poster = videoData?.poster || query.meta('itemprop="thumbnailUrl"') || query.q('#trailer-player-container', 'data-player-img'); - - release.poster = qu.prefixUrl(poster); - release.photos = query.urls('.carousel-thumb a'); - - if (videoData) { - release.trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({ - src: qu.prefixUrl(path), - quality: Number(quality.match(/\d{3,}/)[0]), - })); - } - - return release; -} - -async function fetchActorReleases({ query }, accReleases = []) { - const releases = scrapeAll(qu.initAll(query.all('.release-card.scene'))); - const next = query.url('.pagination .next a'); - - if (next) { - const url = `https://www.brazzers.com${next}`; - const res = await qu.get(url); - - if (res.ok) { - return fetchActorReleases(res.item, accReleases.concat(releases)); - } - } - - return accReleases.concat(releases); -} - -async function scrapeProfile({ query }, url, actorName, include) { - const bioKeys = query.all('.profile-spec-list label', true).map(key => key.replace(/\n+|\s{2,}/g, '').trim()); - const bioValues = query.all('.profile-spec-list var', true).map(value => value.replace(/\n+|\s{2,}/g, '').trim()); - - const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {}); - - const profile = { - name: actorName, - }; - - profile.description = query.q('.model-profile-specs p', true); - - if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity; - if (bio.Measurements && bio.Measurements.match(/\d+[A-Z]+-\d+-\d+/)) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-'); - if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = qu.extractDate(bio['Date of Birth'], 'MMMM DD, YYYY'); - if (bio['Birth Location']) profile.birthPlace = bio['Birth Location']; - if (bio['Pussy Type']) profile.pussy = bio['Pussy Type'].split(',').slice(-1)[0].toLowerCase(); - - if (bio.Height) profile.height = heightToCm(bio.Height); - if (bio.Weight) profile.weight = lbsToKg(bio.Weight.match(/\d+/)[0]); - if (bio['Hair Color']) profile.hair = bio['Hair Color'].toLowerCase(); - - if (bio['Tits Type'] && bio['Tits Type'].match('Natural')) profile.naturalBoobs = true; - if (bio['Tits Type'] && bio['Tits Type'].match('Enhanced')) profile.naturalBoobs = false; - - if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true; - if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true; - - const avatarEl = query.q('.big-pic-model-container img'); - if (avatarEl) profile.avatar = `https:${avatarEl.src}`; - - if (include.releases) { - profile.releases = await fetchActorReleases({ query }); - } - - return profile; -} - -async function fetchLatest(channel, page = 1) { - const res = await qu.getAll(`${channel.url}/page/${page}/`, '.release-card.scene'); - - if (res.ok) { - return scrapeAll(res.items, channel, false); - } - - return res.status; -} - -async function fetchUpcoming(channel) { - const res = await qu.getAll(`${channel.url}/page/1`, '.release-card.scene'); - - if (res.ok) { - return scrapeAll(res.items, channel, true); - } - - return res.status; -} - -async function fetchScene(url, site) { - const res = await qu.get(url); - - if (res.ok) { - return scrapeScene(res.item, url, site); - } - - return res.status; -} - -async function fetchProfile({ name: actorName }, context, include) { - const searchRes = await qu.get('https://brazzers.com/pornstars-search/', `a[title="${actorName}" i]`, { - Cookie: `textSearch=${encodeURIComponent(actorName)};`, - }); - - const actorLink = searchRes.ok && searchRes.item.qu.url(null); - - if (actorLink) { - const url = `https://brazzers.com${actorLink}`; - const res = await qu.get(url); - - if (res.ok) { - return scrapeProfile(res.item, url, actorName, include); - } - } - - return null; -} - -module.exports = { - fetchLatest, - fetchProfile, - fetchScene, - fetchUpcoming, -}; diff --git a/src/scrapers/digitalplayground.js b/src/scrapers/digitalplayground.js deleted file mode 100644 index 7d25a390f..000000000 --- a/src/scrapers/digitalplayground.js +++ /dev/null @@ -1,13 +0,0 @@ -'use strict'; - -const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek'); - -async function networkFetchProfile({ name: actorName }) { - return fetchProfile({ name: actorName }, 'digitalplayground', 'modelprofile'); -} - -module.exports = { - fetchLatest, - fetchProfile: networkFetchProfile, - fetchScene, -}; diff --git a/src/scrapers/fakehub.js b/src/scrapers/fakehub.js deleted file mode 100644 index f9857d6c5..000000000 --- a/src/scrapers/fakehub.js +++ /dev/null @@ -1,13 +0,0 @@ -'use strict'; - -const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek'); - -async function networkFetchProfile({ name: actorName }) { - return fetchProfile({ name: actorName }, 'fakehub', 'modelprofile'); -} - -module.exports = { - fetchLatest, - fetchProfile: networkFetchProfile, - fetchScene, -}; diff --git a/src/scrapers/famedigital.js b/src/scrapers/famedigital.js index 4066c7a29..1db24b721 100644 --- a/src/scrapers/famedigital.js +++ b/src/scrapers/famedigital.js @@ -10,6 +10,7 @@ const { fetchApiProfile, scrapeAll, } = require('./gamma'); + const { get } = require('../utils/qu'); const slugify = require('../utils/slugify'); diff --git a/src/scrapers/iconmale.js b/src/scrapers/iconmale.js deleted file mode 100644 index c5dea8d7f..000000000 --- a/src/scrapers/iconmale.js +++ /dev/null @@ -1,11 +0,0 @@ -'use strict'; - -const { fetchProfile } = require('./mindgeek'); - -async function networkFetchProfile({ name: actorName }) { - return fetchProfile({ name: actorName }, 'iconmale'); -} - -module.exports = { - fetchProfile: networkFetchProfile, -}; diff --git a/src/scrapers/men.js b/src/scrapers/men.js deleted file mode 100644 index 577ec325b..000000000 --- a/src/scrapers/men.js +++ /dev/null @@ -1,13 +0,0 @@ -'use strict'; - -const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek'); - -async function networkFetchProfile({ name: actorName }) { - return fetchProfile({ name: actorName }, 'men', 'modelprofile'); -} - -module.exports = { - fetchLatest, - fetchScene, - fetchProfile: networkFetchProfile, -}; diff --git a/src/scrapers/metrohd.js b/src/scrapers/metrohd.js deleted file mode 100644 index 7af86bf28..000000000 --- a/src/scrapers/metrohd.js +++ /dev/null @@ -1,13 +0,0 @@ -'use strict'; - -const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek'); - -async function networkFetchProfile({ name: actorName }) { - return fetchProfile({ name: actorName }, 'devianthardcore'); -} - -module.exports = { - fetchLatest, - fetchProfile: networkFetchProfile, - fetchScene, -}; diff --git a/src/scrapers/milehighmedia.js b/src/scrapers/milehighmedia.js deleted file mode 100644 index 099ff0cf0..000000000 --- a/src/scrapers/milehighmedia.js +++ /dev/null @@ -1,13 +0,0 @@ -'use strict'; - -const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek'); - -async function networkFetchProfile({ name: actorName }) { - return fetchProfile({ name: actorName }, 'milehighmedia'); -} - -module.exports = { - fetchLatest, - fetchProfile: networkFetchProfile, - fetchScene, -}; diff --git a/src/scrapers/mindgeek.js b/src/scrapers/mindgeek.js index 64c30224a..a589e96fd 100644 --- a/src/scrapers/mindgeek.js +++ b/src/scrapers/mindgeek.js @@ -172,14 +172,16 @@ function scrapeProfile(data, html, releases = [], networkName) { aliases: data.aliases, }; - const [bust, waist, hip] = data.measurements.split('-'); - profile.gender = data.gender === 'other' ? 'transsexual' : data.gender; - if (profile.gender === 'female') { - if (bust) profile.bust = bust.toUpperCase(); - if (waist) profile.waist = waist; - if (hip) profile.hip = hip; + if (data.measurements) { + const [bust, waist, hip] = data.measurements.split('-'); + + if (profile.gender === 'female') { + if (bust) profile.bust = bust.toUpperCase(); + if (waist) profile.waist = waist; + if (hip) profile.hip = hip; + } } if (data.birthPlace) profile.birthPlace = data.birthPlace; @@ -197,6 +199,14 @@ function scrapeProfile(data, html, releases = [], networkName) { const birthdate = query.all('li').find(el => /Date of Birth/.test(el.textContent)); if (birthdate) profile.birthdate = query.date(birthdate, 'span', 'MMMM Do, YYYY'); + if (data.tags.some(tag => /boob type/i.test(tag.category) && /natural tits/i.test(tag.name))) { + profile.naturalBoobs = true; + } + + if (data.tags.some(tag => /boob type/i.test(tag.category) && /enhanced/i.test(tag.name))) { + profile.naturalBoobs = false; + } + profile.releases = releases.map(release => scrapeScene(release, null, null, networkName)); return profile; @@ -276,7 +286,7 @@ async function fetchScene(url, site, baseScene) { return null; } -async function fetchProfile({ name: actorName }, networkOrNetworkSlug, actorPath = 'model') { +async function fetchProfile({ name: actorName }, networkOrNetworkSlug) { // const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`; const { session, instanceToken } = await getSession(networkOrNetworkSlug); @@ -291,7 +301,7 @@ async function fetchProfile({ name: actorName }, networkOrNetworkSlug, actorPath const actorData = res.body.result.find(actor => actor.name.toLowerCase() === actorName.toLowerCase()); if (actorData) { - const actorUrl = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com/${actorPath}/${actorData.id}/`; + const actorUrl = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com/${networkOrNetworkSlug?.parameters?.actorPath || 'model'}/${actorData.id}/`; const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`; const [actorRes, actorReleasesRes] = await Promise.all([ diff --git a/src/scrapers/mofos.js b/src/scrapers/mofos.js deleted file mode 100644 index 7a4930c65..000000000 --- a/src/scrapers/mofos.js +++ /dev/null @@ -1,13 +0,0 @@ -'use strict'; - -const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek'); - -async function networkFetchProfile({ name: actorName }) { - return fetchProfile({ name: actorName }, 'mofos'); -} - -module.exports = { - fetchLatest, - fetchScene, - fetchProfile: networkFetchProfile, -}; diff --git a/src/scrapers/realitykings.js b/src/scrapers/realitykings.js deleted file mode 100644 index cf92af085..000000000 --- a/src/scrapers/realitykings.js +++ /dev/null @@ -1,53 +0,0 @@ -'use strict'; - -const cheerio = require('cheerio'); - -const http = require('../utils/http'); - -const { - scrapeLatestX, - fetchLatest, - fetchScene, - fetchProfile, -} = require('./mindgeek'); - -function scrapeLatestClassic(html, site) { - const $ = cheerio.load(html, { normalizeWhitespace: true }); - - const stateTag = $('script:contains("initialState")').html(); - const prefix = 'initialState = {'; - const prefixIndex = stateTag.indexOf('initialState = {'); - const suffix = '};'; - const stateString = stateTag.slice(prefixIndex + prefix.length - 1, stateTag.indexOf('};', prefixIndex) + suffix.length - 1); - const data = JSON.parse(stateString); - - return Object.values(data.entities.releases).map(scene => scrapeLatestX(scene, site)); -} - -async function fetchClassic(site, page) { - const res = await http.get(`${site.url}/scenes?page=${page}`); - - if (res.statusCode === 200) { - return scrapeLatestClassic(res.body.toString(), site); - } - - return null; -} - -async function fetchLatestWrap(site, page = 1) { - if (site.parameters?.classic) { - return fetchClassic(site, page); - } - - return fetchLatest(site, page); -} - -async function networkFetchProfile({ name: actorName }) { - return fetchProfile({ name: actorName }, 'realitykings'); -} - -module.exports = { - fetchLatest: fetchLatestWrap, - fetchProfile: networkFetchProfile, - fetchScene, -}; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index d6af55a2d..af03bfce3 100644 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -5,7 +5,6 @@ const assylum = require('./assylum'); const aziani = require('./aziani'); const amateurallure = require('./amateurallure'); const americanpornstar = require('./americanpornstar'); -const babes = require('./babes'); const bamvisions = require('./bamvisions'); const bang = require('./bang'); const bangbros = require('./bangbros'); @@ -13,11 +12,9 @@ const blowpass = require('./blowpass'); const cherrypimps = require('./cherrypimps'); const czechav = require('./czechav'); const ddfnetwork = require('./ddfnetwork'); -const digitalplayground = require('./digitalplayground'); const dogfart = require('./dogfart'); const dorcel = require('./dorcel'); const elegantangel = require('./elegantangel'); -const fakehub = require('./fakehub'); const famedigital = require('./famedigital'); const firstanalquest = require('./firstanalquest'); const fcuk = require('./fcuk'); @@ -26,7 +23,6 @@ const gamma = require('./gamma'); const hitzefrei = require('./hitzefrei'); const hookuphotshot = require('./hookuphotshot'); const hush = require('./hush'); -const iconmale = require('./iconmale'); const insex = require('./insex'); const inthecrack = require('./inthecrack'); const jayrock = require('./jayrock'); @@ -39,12 +35,8 @@ const kink = require('./kink'); const legalporno = require('./legalporno'); const littlecapricedreams = require('./littlecapricedreams'); const porndoe = require('./porndoe'); -const men = require('./men'); -const metrohd = require('./metrohd'); const mikeadriano = require('./mikeadriano'); -const milehighmedia = require('./milehighmedia'); const mindgeek = require('./mindgeek'); -const mofos = require('./mofos'); const naughtyamerica = require('./naughtyamerica'); const newsensations = require('./newsensations'); const nubiles = require('./nubiles'); @@ -56,14 +48,11 @@ const pascalssubsluts = require('./pascalssubsluts'); // reserved keyword const pierrewoodman = require('./pierrewoodman'); const pinkyxxx = require('./pinkyxxx'); const privateNetwork = require('./private'); // reserved keyword -const realitykings = require('./realitykings'); const score = require('./score'); const teamskeet = require('./teamskeet'); const teencoreclub = require('./teencoreclub'); const topwebmodels = require('./topwebmodels'); -const transangels = require('./transangels'); const traxxx = require('./traxxx'); -const twistys = require('./twistys'); const vivid = require('./vivid'); const vixen = require('./vixen'); const vogov = require('./vogov'); @@ -73,7 +62,6 @@ const xempire = require('./xempire'); // profiles const boobpedia = require('./boobpedia'); const freeones = require('./freeones'); -// const freeoneslegacy = require('./freeones_legacy'); const scrapers = { releases: { @@ -83,7 +71,6 @@ const scrapers = { amateureuro: porndoe, assylum, aziani, - babes, bamvisions, bang, bangbros, @@ -92,12 +79,10 @@ const scrapers = { cherrypimps, czechav, pornworld: ddfnetwork, - digitalplayground, dogfart, dogfartnetwork: dogfart, dorcel, elegantangel, - fakehub, famedigital, fcuk, firstanalquest, @@ -124,12 +109,8 @@ const scrapers = { letsdoeit: porndoe, littlecapricedreams, mamacitaz: porndoe, - men, - metrohd, mikeadriano, - milehighmedia, mindgeek, - mofos, naughtyamerica, newsensations, nubiles, @@ -142,7 +123,6 @@ const scrapers = { porncz, pornpros: whalemember, private: privateNetwork, - realitykings, score, sexyhub: mindgeek, swallowsalon: julesjordan, @@ -151,7 +131,6 @@ const scrapers = { topwebmodels, transbella: porndoe, traxxx, - twistys, vipsexvault: porndoe, vivid, vixen, @@ -169,7 +148,7 @@ const scrapers = { analviolation: fullpornnetwork, anilos: nubiles, aziani, - babes, + babes: mindgeek, baddaddypov: fullpornnetwork, bamvisions, bang, @@ -186,7 +165,7 @@ const scrapers = { deeper: vixen, deeplush: nubiles, devilsfilm: famedigital, - digitalplayground, + digitalplayground: mindgeek, dtfsluts: fullpornnetwork, dogfartnetwork: dogfart, dorcelclub: dorcel, @@ -194,7 +173,7 @@ const scrapers = { elegantangel, evilangel: gamma, eyeontheguy: hush, - fakehub, + fakehub: mindgeek, exploitedcollegegirls: fcuk, firstanalquest, forbondage: porndoe, @@ -210,7 +189,7 @@ const scrapers = { hotcrazymess: nubiles, hushpass: hush, hussiepass: hush, - iconmale, + iconmale: mindgeek, interracialpass: hush, interracialpovs: hush, inthecrack, @@ -222,10 +201,10 @@ const scrapers = { kink, legalporno, littlecapricedreams, - men, - metrohd, - milehighmedia, - mofos, + men: mindgeek, + metrohd: mindgeek, + milehighmedia: mindgeek, + mofos: mindgeek, mugfucked: fullpornnetwork, naughtyamerica, nfbusty: nubiles, @@ -247,7 +226,7 @@ const scrapers = { povperverts: fullpornnetwork, povpornstars: hush, private: privateNetwork, - realitykings, + realitykings: mindgeek, roccosiffredi: famedigital, score, seehimfuck: hush, @@ -259,12 +238,12 @@ const scrapers = { teamskeet, teencoreclub, thatsitcomshow: nubiles, - transangels, + transangels: mindgeek, transbella: porndoe, trueanal: mikeadriano, tushy: vixen, tushyraw: vixen, - twistys, + twistys: mindgeek, vipsexvault: porndoe, vixen, wicked: gamma, diff --git a/src/scrapers/transangels.js b/src/scrapers/transangels.js deleted file mode 100644 index bd0b01d05..000000000 --- a/src/scrapers/transangels.js +++ /dev/null @@ -1,11 +0,0 @@ -'use strict'; - -const { fetchProfile } = require('./mindgeek'); - -async function networkFetchProfile({ name: actorName }) { - return fetchProfile({ name: actorName }, 'transangels'); -} - -module.exports = { - fetchProfile: networkFetchProfile, -}; diff --git a/src/scrapers/twistys.js b/src/scrapers/twistys.js deleted file mode 100644 index 4425d75d9..000000000 --- a/src/scrapers/twistys.js +++ /dev/null @@ -1,13 +0,0 @@ -'use strict'; - -const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek'); - -async function networkFetchProfile({ name: actorName }) { - return fetchProfile({ name: actorName }, 'twistys'); -} - -module.exports = { - fetchLatest, - fetchProfile: networkFetchProfile, - fetchScene, -}; diff --git a/src/utils/http-legacy.js b/src/utils/http-legacy.js deleted file mode 100644 index 45925baa4..000000000 --- a/src/utils/http-legacy.js +++ /dev/null @@ -1,146 +0,0 @@ -'use strict'; - -const util = require('util'); -const stream = require('stream'); -const config = require('config'); -const tunnel = require('tunnel'); -const bhttp = require('@thependulum/bhttp'); -const taskQueue = require('promise-task-queue'); - -const pipeline = util.promisify(stream.pipeline); -const logger = require('../logger')(__filename); - -const defaultHeaders = { - 'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1', -}; - -const defaultOptions = { - responseTimeout: 30000, -}; - -const proxyAgent = tunnel.httpsOverHttp({ - proxy: { - host: config.proxy.host, - port: config.proxy.port, - }, -}); - -function useProxy(url) { - if (!config.proxy.enable) { - return false; - } - - const { hostname } = new URL(url); - return config.proxy.hostnames.includes(hostname); -} - -const queue = taskQueue(); -const defaultQueueMethod = '20p'; - -async function handler({ - url, - method = 'GET', - body, - headers = {}, - options = {}, -}) { - if (body) { - logger.silly(`${method.toUpperCase()} ${url} with ${JSON.stringify(body)} ${options.queueMethod || defaultQueueMethod}`); - } else { - logger.silly(`${method.toUpperCase()} ${url} ${options.queueMethod || defaultQueueMethod}`); - } - - const reqOptions = { - headers: { - ...(options?.defaultHeaders !== false && defaultHeaders), - ...headers, - }, - ...defaultOptions, - ...options, - ...(options?.timeout && { responseTimeout: options?.timeout }), - }; - - if (useProxy(url)) { - reqOptions.agent = proxyAgent; - } - - const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase()) - ? await (options.useSession || bhttp)[method.toLowerCase()](url, body, reqOptions) - : await (options.useSession || bhttp)[method.toLowerCase()](url, reqOptions); - - if (options?.stream && options?.destination) { - await pipeline(res, ...(options?.transforms || []), options?.destination); - } - - const html = Buffer.isBuffer(res.body) ? res.body.toString() : null; - const json = Buffer.isBuffer(res.body) ? null : res.body; - - return { - ...res, - originalRes: res, - html, - json, - pipe: res.pipe, - ok: res.statusCode >= 200 && res.statusCode <= 299, - code: res.statusCode, - status: res.statusCode, - }; -} - -queue.on('concurrencyReached:http', () => { - logger.silly('Queueing requests'); -}); - -queue.define('20p', handler, { - concurrency: 20, -}); - -queue.define('1s', handler, { - interval: 1, -}); - -queue.define('5s', handler, { - interval: 5, -}); - -async function get(url, headers, options) { - return queue.push(options?.queueMethod || defaultQueueMethod, { - method: 'GET', - url, - headers, - options, - }); -} - -async function head(url, headers, options) { - return queue.push(options?.queueMethod || defaultQueueMethod, { - method: 'HEAD', - url, - headers, - options, - }); -} - -async function post(url, body, headers, options) { - return queue.push(options?.queueMethod || defaultQueueMethod, { - method: 'POST', - url, - body, - headers, - options, - }); -} - -function session(headers, options) { - return bhttp.session({ - headers, - options, - }); -} - -module.exports = { - get, - post, - head, - session, -};