Removed superfluous MindGeek scrapers.
This commit is contained in:
parent
8337ce8dbd
commit
6d93083581
|
@ -234,6 +234,10 @@ module.exports = {
|
|||
interval: 1000,
|
||||
concurrency: 1,
|
||||
},
|
||||
'www.realitykings.com': {
|
||||
interval: 1000,
|
||||
concurrency: 1,
|
||||
},
|
||||
},
|
||||
fetchAfter: [1, 'week'],
|
||||
missingDateLimit: 3,
|
||||
|
|
|
@ -190,6 +190,9 @@ const networks = [
|
|||
name: 'Digital Playground',
|
||||
url: 'https://www.digitalplayground.com',
|
||||
description: 'DigitalPlayground.com is the leader in high quality adult blockbuster movies and award winning sex parodies that feature the most exclusive pornstars online! Adult Film Database of adult movies.',
|
||||
parameters: {
|
||||
actorPath: 'modelprofile',
|
||||
},
|
||||
parent: 'mindgeek',
|
||||
},
|
||||
{
|
||||
|
@ -232,6 +235,9 @@ const networks = [
|
|||
name: 'Fake Hub',
|
||||
url: 'https://www.fakehub.com',
|
||||
description: 'Wherever they go, there is porn. Hospital, Taxis, Casting… Maybe fucking to a fake cop, fake agent or fake taxi driver. And we record it all.',
|
||||
parameters: {
|
||||
actorPath: 'modelprofile',
|
||||
},
|
||||
parent: 'mindgeek',
|
||||
},
|
||||
{
|
||||
|
@ -359,6 +365,9 @@ const networks = [
|
|||
name: 'Men',
|
||||
url: 'https://www.men.com',
|
||||
description: 'Check out the best gay porn site on the net with daily updates, award-winning original series, exclusive Men.com models and over 800 of the hottest guys in gay porn.',
|
||||
parameters: {
|
||||
actorPath: 'modelprofile',
|
||||
},
|
||||
parent: 'mindgeek',
|
||||
},
|
||||
{
|
||||
|
|
|
@ -23,6 +23,7 @@ const logger = require('./logger')(__filename);
|
|||
|
||||
const { toBaseReleases } = require('./deep');
|
||||
const { associateAvatars, flushOrphanedMedia } = require('./media');
|
||||
const { fetchEntitiesBySlug } = require('./entities');
|
||||
const { deleteScenes } = require('./releases');
|
||||
|
||||
const slugify = require('./utils/slugify');
|
||||
|
@ -740,23 +741,14 @@ async function scrapeActors(argNames) {
|
|||
const sources = argv.profileSources || config.profiles || Object.keys(scrapers.actors);
|
||||
const entitySlugs = sources.flat();
|
||||
|
||||
const [entities, existingActorEntries] = await Promise.all([
|
||||
knex('entities')
|
||||
.select(knex.raw('entities.*, row_to_json(parents) as parent, json_agg(children) as children'))
|
||||
.whereIn('entities.slug', entitySlugs)
|
||||
.whereIn('entities.type', ['network', 'channel'])
|
||||
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
|
||||
.leftJoin('entities as children', 'children.parent_id', 'entities.id')
|
||||
.orderBy('entities.type')
|
||||
.groupBy('entities.id', 'parents.id'),
|
||||
const [entitiesBySlug, existingActorEntries] = await Promise.all([
|
||||
fetchEntitiesBySlug(entitySlugs, 'desc'),
|
||||
knex('actors')
|
||||
.select(['id', 'name', 'slug', 'entry_id'])
|
||||
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
|
||||
.whereNull('alias_for'),
|
||||
]);
|
||||
|
||||
const entitiesBySlug = entities.reduce((acc, entity) => ({ ...acc, [entity.slug]: acc[entity.slug] || entity }), {});
|
||||
|
||||
const existingActorEntriesBySlugAndEntryId = existingActorEntries.reduce((acc, actorEntry) => ({
|
||||
...acc,
|
||||
[actorEntry.slug]: {
|
||||
|
|
|
@ -167,15 +167,7 @@ async function fetchIncludedEntities() {
|
|||
return curatedNetworks;
|
||||
}
|
||||
|
||||
async function fetchReleaseEntities(baseReleases) {
|
||||
const baseReleasesWithoutEntity = baseReleases.filter(release => release.url && !release.site && !release.entity);
|
||||
|
||||
const entitySlugs = Array.from(new Set(
|
||||
baseReleasesWithoutEntity
|
||||
.map(baseRelease => urlToSiteSlug(baseRelease.url))
|
||||
.filter(Boolean),
|
||||
));
|
||||
|
||||
async function fetchEntitiesBySlug(entitySlugs, sort = 'asc') {
|
||||
const entities = await knex.raw(`
|
||||
WITH RECURSIVE entity_tree as (
|
||||
SELECT to_jsonb(entities) as entity,
|
||||
|
@ -197,8 +189,8 @@ async function fetchReleaseEntities(baseReleases) {
|
|||
LEFT JOIN entities AS children ON children.parent_id = (entity->>'id')::int
|
||||
WHERE entity_tree.parent_id IS NULL
|
||||
GROUP BY entity_tree.entity
|
||||
ORDER BY entity->'type' ASC;
|
||||
`, { entitySlugs });
|
||||
ORDER BY entity->'type' :sort;
|
||||
`, { entitySlugs, sort: knex.raw(sort) });
|
||||
|
||||
// channel entity will overwrite network entity
|
||||
const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => ({
|
||||
|
@ -209,6 +201,18 @@ async function fetchReleaseEntities(baseReleases) {
|
|||
return entitiesBySlug;
|
||||
}
|
||||
|
||||
async function fetchReleaseEntities(baseReleases) {
|
||||
const baseReleasesWithoutEntity = baseReleases.filter(release => release.url && !release.site && !release.entity);
|
||||
|
||||
const entitySlugs = Array.from(new Set(
|
||||
baseReleasesWithoutEntity
|
||||
.map(baseRelease => urlToSiteSlug(baseRelease.url))
|
||||
.filter(Boolean),
|
||||
));
|
||||
|
||||
return fetchEntitiesBySlug(entitySlugs);
|
||||
}
|
||||
|
||||
async function fetchEntity(entityId, type) {
|
||||
const entity = await knex('entities')
|
||||
.select(knex.raw(`
|
||||
|
@ -361,6 +365,7 @@ module.exports = {
|
|||
curateEntities,
|
||||
fetchIncludedEntities,
|
||||
fetchReleaseEntities,
|
||||
fetchEntitiesBySlug,
|
||||
fetchEntity,
|
||||
fetchEntities,
|
||||
searchEntities,
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile({ name: actorName }) {
|
||||
return fetchProfile({ name: actorName }, 'babes');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
|
@ -1,212 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
/* eslint-disable newline-per-chained-call */
|
||||
const qu = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { heightToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
function scrapeAll(items, channel, upcoming) {
|
||||
return items.reduce((acc, { query }) => {
|
||||
const isUpcoming = query.exists('.icon-upcoming.active');
|
||||
|
||||
if ((upcoming && !isUpcoming) || (!upcoming && isUpcoming)) {
|
||||
return acc;
|
||||
}
|
||||
|
||||
const release = {};
|
||||
const pathname = query.url('a');
|
||||
|
||||
release.url = `https://www.brazzers.com${pathname}`;
|
||||
release.entryId = pathname.match(/(\/view\/id\/|\/episode\/)(\d+)/)[2];
|
||||
|
||||
release.title = query.q('a', 'title');
|
||||
release.date = query.date('time', 'MMMM DD, YYYY');
|
||||
|
||||
release.actors = query.all('.model-names a', 'title');
|
||||
|
||||
release.likes = query.number('.label-rating .like-amount');
|
||||
release.dislikes = query.number('.label-rating .dislike-amount');
|
||||
|
||||
release.poster = query.img('.card-main-img');
|
||||
release.photos = query.imgs('.card-overlay .image-under');
|
||||
|
||||
release.channel = slugify(query.q('.collection', 'title'), '');
|
||||
|
||||
return acc.concat(release);
|
||||
}, []);
|
||||
}
|
||||
|
||||
function getVideoData(html) {
|
||||
try {
|
||||
const videoScriptStart = html.indexOf('window.videoUiOptions');
|
||||
const videoScript = html.slice(videoScriptStart, html.indexOf('};', videoScriptStart));
|
||||
const videoString = videoScript.slice(videoScript.indexOf('{"stream_info"'), videoScript.lastIndexOf('},') + 1);
|
||||
|
||||
return JSON.parse(videoString);
|
||||
} catch (error) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function scrapeScene({ query, html }, url, _site) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/(\/view\/id\/|\/episode\/)(\d+)/)[2];
|
||||
|
||||
release.title = query.q('.scene-title[itemprop="name"]', true);
|
||||
release.description = query.text('#scene-description p[itemprop="description"]');
|
||||
|
||||
release.date = query.date('.more-scene-info .scene-date', 'MMMM DD, YYYY');
|
||||
release.duration = query.number('#trailer-player-container', 'data-duration') // more accurate
|
||||
|| query.number('.scene-length[itemprop="duration"]', 'content') * 60; // fallback
|
||||
|
||||
// actor cards have avatar, but truncated name
|
||||
const actorImagesByActorId = query.imgs('.featured-model .card-image img').reduce((acc, img) => ({
|
||||
...acc,
|
||||
[img.match(/\/models\/(\d+)/)[1]]: [
|
||||
img.replace('medium', 'large'),
|
||||
img,
|
||||
],
|
||||
}), {});
|
||||
|
||||
release.actors = query.all('.related-model a').map((actorEl) => {
|
||||
const name = query.q(actorEl, null, 'title');
|
||||
const avatar = actorImagesByActorId[query.url(actorEl, null).match(/\/view\/id\/(\d+)/)?.[1]];
|
||||
|
||||
return { name, avatar };
|
||||
});
|
||||
|
||||
release.likes = query.number('.label-rating .like');
|
||||
release.dislikes = query.number('.label-rating .dislike');
|
||||
|
||||
const tags = query.all('.tag-card-container a', true);
|
||||
const categories = query.all('.timeline a[href*="/categories"]', 'title');
|
||||
|
||||
release.tags = tags.concat(categories);
|
||||
release.channel = slugify(query.q('.scene-site .label-text', true) || query.q('.niche-site-logo', 'title'), '');
|
||||
|
||||
const videoData = getVideoData(html);
|
||||
const poster = videoData?.poster || query.meta('itemprop="thumbnailUrl"') || query.q('#trailer-player-container', 'data-player-img');
|
||||
|
||||
release.poster = qu.prefixUrl(poster);
|
||||
release.photos = query.urls('.carousel-thumb a');
|
||||
|
||||
if (videoData) {
|
||||
release.trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
|
||||
src: qu.prefixUrl(path),
|
||||
quality: Number(quality.match(/\d{3,}/)[0]),
|
||||
}));
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchActorReleases({ query }, accReleases = []) {
|
||||
const releases = scrapeAll(qu.initAll(query.all('.release-card.scene')));
|
||||
const next = query.url('.pagination .next a');
|
||||
|
||||
if (next) {
|
||||
const url = `https://www.brazzers.com${next}`;
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return fetchActorReleases(res.item, accReleases.concat(releases));
|
||||
}
|
||||
}
|
||||
|
||||
return accReleases.concat(releases);
|
||||
}
|
||||
|
||||
async function scrapeProfile({ query }, url, actorName, include) {
|
||||
const bioKeys = query.all('.profile-spec-list label', true).map(key => key.replace(/\n+|\s{2,}/g, '').trim());
|
||||
const bioValues = query.all('.profile-spec-list var', true).map(value => value.replace(/\n+|\s{2,}/g, '').trim());
|
||||
|
||||
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
profile.description = query.q('.model-profile-specs p', true);
|
||||
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
if (bio.Measurements && bio.Measurements.match(/\d+[A-Z]+-\d+-\d+/)) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = qu.extractDate(bio['Date of Birth'], 'MMMM DD, YYYY');
|
||||
if (bio['Birth Location']) profile.birthPlace = bio['Birth Location'];
|
||||
if (bio['Pussy Type']) profile.pussy = bio['Pussy Type'].split(',').slice(-1)[0].toLowerCase();
|
||||
|
||||
if (bio.Height) profile.height = heightToCm(bio.Height);
|
||||
if (bio.Weight) profile.weight = lbsToKg(bio.Weight.match(/\d+/)[0]);
|
||||
if (bio['Hair Color']) profile.hair = bio['Hair Color'].toLowerCase();
|
||||
|
||||
if (bio['Tits Type'] && bio['Tits Type'].match('Natural')) profile.naturalBoobs = true;
|
||||
if (bio['Tits Type'] && bio['Tits Type'].match('Enhanced')) profile.naturalBoobs = false;
|
||||
|
||||
if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true;
|
||||
if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true;
|
||||
|
||||
const avatarEl = query.q('.big-pic-model-container img');
|
||||
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
|
||||
|
||||
if (include.releases) {
|
||||
profile.releases = await fetchActorReleases({ query });
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const res = await qu.getAll(`${channel.url}/page/${page}/`, '.release-card.scene');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, channel, false);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(channel) {
|
||||
const res = await qu.getAll(`${channel.url}/page/1`, '.release-card.scene');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, channel, true);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, context, include) {
|
||||
const searchRes = await qu.get('https://brazzers.com/pornstars-search/', `a[title="${actorName}" i]`, {
|
||||
Cookie: `textSearch=${encodeURIComponent(actorName)};`,
|
||||
});
|
||||
|
||||
const actorLink = searchRes.ok && searchRes.item.qu.url(null);
|
||||
|
||||
if (actorLink) {
|
||||
const url = `https://brazzers.com${actorLink}`;
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item, url, actorName, include);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
};
|
|
@ -1,13 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile({ name: actorName }) {
|
||||
return fetchProfile({ name: actorName }, 'digitalplayground', 'modelprofile');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
|
@ -1,13 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile({ name: actorName }) {
|
||||
return fetchProfile({ name: actorName }, 'fakehub', 'modelprofile');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
|
@ -10,6 +10,7 @@ const {
|
|||
fetchApiProfile,
|
||||
scrapeAll,
|
||||
} = require('./gamma');
|
||||
|
||||
const { get } = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const { fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile({ name: actorName }) {
|
||||
return fetchProfile({ name: actorName }, 'iconmale');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile: networkFetchProfile,
|
||||
};
|
|
@ -1,13 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile({ name: actorName }) {
|
||||
return fetchProfile({ name: actorName }, 'men', 'modelprofile');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile: networkFetchProfile,
|
||||
};
|
|
@ -1,13 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile({ name: actorName }) {
|
||||
return fetchProfile({ name: actorName }, 'devianthardcore');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
|
@ -1,13 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile({ name: actorName }) {
|
||||
return fetchProfile({ name: actorName }, 'milehighmedia');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
|
@ -172,14 +172,16 @@ function scrapeProfile(data, html, releases = [], networkName) {
|
|||
aliases: data.aliases,
|
||||
};
|
||||
|
||||
const [bust, waist, hip] = data.measurements.split('-');
|
||||
|
||||
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
|
||||
|
||||
if (profile.gender === 'female') {
|
||||
if (bust) profile.bust = bust.toUpperCase();
|
||||
if (waist) profile.waist = waist;
|
||||
if (hip) profile.hip = hip;
|
||||
if (data.measurements) {
|
||||
const [bust, waist, hip] = data.measurements.split('-');
|
||||
|
||||
if (profile.gender === 'female') {
|
||||
if (bust) profile.bust = bust.toUpperCase();
|
||||
if (waist) profile.waist = waist;
|
||||
if (hip) profile.hip = hip;
|
||||
}
|
||||
}
|
||||
|
||||
if (data.birthPlace) profile.birthPlace = data.birthPlace;
|
||||
|
@ -197,6 +199,14 @@ function scrapeProfile(data, html, releases = [], networkName) {
|
|||
const birthdate = query.all('li').find(el => /Date of Birth/.test(el.textContent));
|
||||
if (birthdate) profile.birthdate = query.date(birthdate, 'span', 'MMMM Do, YYYY');
|
||||
|
||||
if (data.tags.some(tag => /boob type/i.test(tag.category) && /natural tits/i.test(tag.name))) {
|
||||
profile.naturalBoobs = true;
|
||||
}
|
||||
|
||||
if (data.tags.some(tag => /boob type/i.test(tag.category) && /enhanced/i.test(tag.name))) {
|
||||
profile.naturalBoobs = false;
|
||||
}
|
||||
|
||||
profile.releases = releases.map(release => scrapeScene(release, null, null, networkName));
|
||||
|
||||
return profile;
|
||||
|
@ -276,7 +286,7 @@ async function fetchScene(url, site, baseScene) {
|
|||
return null;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, networkOrNetworkSlug, actorPath = 'model') {
|
||||
async function fetchProfile({ name: actorName }, networkOrNetworkSlug) {
|
||||
// const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
|
||||
const { session, instanceToken } = await getSession(networkOrNetworkSlug);
|
||||
|
||||
|
@ -291,7 +301,7 @@ async function fetchProfile({ name: actorName }, networkOrNetworkSlug, actorPath
|
|||
const actorData = res.body.result.find(actor => actor.name.toLowerCase() === actorName.toLowerCase());
|
||||
|
||||
if (actorData) {
|
||||
const actorUrl = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com/${actorPath}/${actorData.id}/`;
|
||||
const actorUrl = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com/${networkOrNetworkSlug?.parameters?.actorPath || 'model'}/${actorData.id}/`;
|
||||
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
|
||||
|
||||
const [actorRes, actorReleasesRes] = await Promise.all([
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile({ name: actorName }) {
|
||||
return fetchProfile({ name: actorName }, 'mofos');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile: networkFetchProfile,
|
||||
};
|
|
@ -1,53 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const cheerio = require('cheerio');
|
||||
|
||||
const http = require('../utils/http');
|
||||
|
||||
const {
|
||||
scrapeLatestX,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
} = require('./mindgeek');
|
||||
|
||||
function scrapeLatestClassic(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const stateTag = $('script:contains("initialState")').html();
|
||||
const prefix = 'initialState = {';
|
||||
const prefixIndex = stateTag.indexOf('initialState = {');
|
||||
const suffix = '};';
|
||||
const stateString = stateTag.slice(prefixIndex + prefix.length - 1, stateTag.indexOf('};', prefixIndex) + suffix.length - 1);
|
||||
const data = JSON.parse(stateString);
|
||||
|
||||
return Object.values(data.entities.releases).map(scene => scrapeLatestX(scene, site));
|
||||
}
|
||||
|
||||
async function fetchClassic(site, page) {
|
||||
const res = await http.get(`${site.url}/scenes?page=${page}`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatestClassic(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchLatestWrap(site, page = 1) {
|
||||
if (site.parameters?.classic) {
|
||||
return fetchClassic(site, page);
|
||||
}
|
||||
|
||||
return fetchLatest(site, page);
|
||||
}
|
||||
|
||||
async function networkFetchProfile({ name: actorName }) {
|
||||
return fetchProfile({ name: actorName }, 'realitykings');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchLatestWrap,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
|
@ -5,7 +5,6 @@ const assylum = require('./assylum');
|
|||
const aziani = require('./aziani');
|
||||
const amateurallure = require('./amateurallure');
|
||||
const americanpornstar = require('./americanpornstar');
|
||||
const babes = require('./babes');
|
||||
const bamvisions = require('./bamvisions');
|
||||
const bang = require('./bang');
|
||||
const bangbros = require('./bangbros');
|
||||
|
@ -13,11 +12,9 @@ const blowpass = require('./blowpass');
|
|||
const cherrypimps = require('./cherrypimps');
|
||||
const czechav = require('./czechav');
|
||||
const ddfnetwork = require('./ddfnetwork');
|
||||
const digitalplayground = require('./digitalplayground');
|
||||
const dogfart = require('./dogfart');
|
||||
const dorcel = require('./dorcel');
|
||||
const elegantangel = require('./elegantangel');
|
||||
const fakehub = require('./fakehub');
|
||||
const famedigital = require('./famedigital');
|
||||
const firstanalquest = require('./firstanalquest');
|
||||
const fcuk = require('./fcuk');
|
||||
|
@ -26,7 +23,6 @@ const gamma = require('./gamma');
|
|||
const hitzefrei = require('./hitzefrei');
|
||||
const hookuphotshot = require('./hookuphotshot');
|
||||
const hush = require('./hush');
|
||||
const iconmale = require('./iconmale');
|
||||
const insex = require('./insex');
|
||||
const inthecrack = require('./inthecrack');
|
||||
const jayrock = require('./jayrock');
|
||||
|
@ -39,12 +35,8 @@ const kink = require('./kink');
|
|||
const legalporno = require('./legalporno');
|
||||
const littlecapricedreams = require('./littlecapricedreams');
|
||||
const porndoe = require('./porndoe');
|
||||
const men = require('./men');
|
||||
const metrohd = require('./metrohd');
|
||||
const mikeadriano = require('./mikeadriano');
|
||||
const milehighmedia = require('./milehighmedia');
|
||||
const mindgeek = require('./mindgeek');
|
||||
const mofos = require('./mofos');
|
||||
const naughtyamerica = require('./naughtyamerica');
|
||||
const newsensations = require('./newsensations');
|
||||
const nubiles = require('./nubiles');
|
||||
|
@ -56,14 +48,11 @@ const pascalssubsluts = require('./pascalssubsluts'); // reserved keyword
|
|||
const pierrewoodman = require('./pierrewoodman');
|
||||
const pinkyxxx = require('./pinkyxxx');
|
||||
const privateNetwork = require('./private'); // reserved keyword
|
||||
const realitykings = require('./realitykings');
|
||||
const score = require('./score');
|
||||
const teamskeet = require('./teamskeet');
|
||||
const teencoreclub = require('./teencoreclub');
|
||||
const topwebmodels = require('./topwebmodels');
|
||||
const transangels = require('./transangels');
|
||||
const traxxx = require('./traxxx');
|
||||
const twistys = require('./twistys');
|
||||
const vivid = require('./vivid');
|
||||
const vixen = require('./vixen');
|
||||
const vogov = require('./vogov');
|
||||
|
@ -73,7 +62,6 @@ const xempire = require('./xempire');
|
|||
// profiles
|
||||
const boobpedia = require('./boobpedia');
|
||||
const freeones = require('./freeones');
|
||||
// const freeoneslegacy = require('./freeones_legacy');
|
||||
|
||||
const scrapers = {
|
||||
releases: {
|
||||
|
@ -83,7 +71,6 @@ const scrapers = {
|
|||
amateureuro: porndoe,
|
||||
assylum,
|
||||
aziani,
|
||||
babes,
|
||||
bamvisions,
|
||||
bang,
|
||||
bangbros,
|
||||
|
@ -92,12 +79,10 @@ const scrapers = {
|
|||
cherrypimps,
|
||||
czechav,
|
||||
pornworld: ddfnetwork,
|
||||
digitalplayground,
|
||||
dogfart,
|
||||
dogfartnetwork: dogfart,
|
||||
dorcel,
|
||||
elegantangel,
|
||||
fakehub,
|
||||
famedigital,
|
||||
fcuk,
|
||||
firstanalquest,
|
||||
|
@ -124,12 +109,8 @@ const scrapers = {
|
|||
letsdoeit: porndoe,
|
||||
littlecapricedreams,
|
||||
mamacitaz: porndoe,
|
||||
men,
|
||||
metrohd,
|
||||
mikeadriano,
|
||||
milehighmedia,
|
||||
mindgeek,
|
||||
mofos,
|
||||
naughtyamerica,
|
||||
newsensations,
|
||||
nubiles,
|
||||
|
@ -142,7 +123,6 @@ const scrapers = {
|
|||
porncz,
|
||||
pornpros: whalemember,
|
||||
private: privateNetwork,
|
||||
realitykings,
|
||||
score,
|
||||
sexyhub: mindgeek,
|
||||
swallowsalon: julesjordan,
|
||||
|
@ -151,7 +131,6 @@ const scrapers = {
|
|||
topwebmodels,
|
||||
transbella: porndoe,
|
||||
traxxx,
|
||||
twistys,
|
||||
vipsexvault: porndoe,
|
||||
vivid,
|
||||
vixen,
|
||||
|
@ -169,7 +148,7 @@ const scrapers = {
|
|||
analviolation: fullpornnetwork,
|
||||
anilos: nubiles,
|
||||
aziani,
|
||||
babes,
|
||||
babes: mindgeek,
|
||||
baddaddypov: fullpornnetwork,
|
||||
bamvisions,
|
||||
bang,
|
||||
|
@ -186,7 +165,7 @@ const scrapers = {
|
|||
deeper: vixen,
|
||||
deeplush: nubiles,
|
||||
devilsfilm: famedigital,
|
||||
digitalplayground,
|
||||
digitalplayground: mindgeek,
|
||||
dtfsluts: fullpornnetwork,
|
||||
dogfartnetwork: dogfart,
|
||||
dorcelclub: dorcel,
|
||||
|
@ -194,7 +173,7 @@ const scrapers = {
|
|||
elegantangel,
|
||||
evilangel: gamma,
|
||||
eyeontheguy: hush,
|
||||
fakehub,
|
||||
fakehub: mindgeek,
|
||||
exploitedcollegegirls: fcuk,
|
||||
firstanalquest,
|
||||
forbondage: porndoe,
|
||||
|
@ -210,7 +189,7 @@ const scrapers = {
|
|||
hotcrazymess: nubiles,
|
||||
hushpass: hush,
|
||||
hussiepass: hush,
|
||||
iconmale,
|
||||
iconmale: mindgeek,
|
||||
interracialpass: hush,
|
||||
interracialpovs: hush,
|
||||
inthecrack,
|
||||
|
@ -222,10 +201,10 @@ const scrapers = {
|
|||
kink,
|
||||
legalporno,
|
||||
littlecapricedreams,
|
||||
men,
|
||||
metrohd,
|
||||
milehighmedia,
|
||||
mofos,
|
||||
men: mindgeek,
|
||||
metrohd: mindgeek,
|
||||
milehighmedia: mindgeek,
|
||||
mofos: mindgeek,
|
||||
mugfucked: fullpornnetwork,
|
||||
naughtyamerica,
|
||||
nfbusty: nubiles,
|
||||
|
@ -247,7 +226,7 @@ const scrapers = {
|
|||
povperverts: fullpornnetwork,
|
||||
povpornstars: hush,
|
||||
private: privateNetwork,
|
||||
realitykings,
|
||||
realitykings: mindgeek,
|
||||
roccosiffredi: famedigital,
|
||||
score,
|
||||
seehimfuck: hush,
|
||||
|
@ -259,12 +238,12 @@ const scrapers = {
|
|||
teamskeet,
|
||||
teencoreclub,
|
||||
thatsitcomshow: nubiles,
|
||||
transangels,
|
||||
transangels: mindgeek,
|
||||
transbella: porndoe,
|
||||
trueanal: mikeadriano,
|
||||
tushy: vixen,
|
||||
tushyraw: vixen,
|
||||
twistys,
|
||||
twistys: mindgeek,
|
||||
vipsexvault: porndoe,
|
||||
vixen,
|
||||
wicked: gamma,
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const { fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile({ name: actorName }) {
|
||||
return fetchProfile({ name: actorName }, 'transangels');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile: networkFetchProfile,
|
||||
};
|
|
@ -1,13 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile({ name: actorName }) {
|
||||
return fetchProfile({ name: actorName }, 'twistys');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
|
@ -1,146 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const util = require('util');
|
||||
const stream = require('stream');
|
||||
const config = require('config');
|
||||
const tunnel = require('tunnel');
|
||||
const bhttp = require('@thependulum/bhttp');
|
||||
const taskQueue = require('promise-task-queue');
|
||||
|
||||
const pipeline = util.promisify(stream.pipeline);
|
||||
const logger = require('../logger')(__filename);
|
||||
|
||||
const defaultHeaders = {
|
||||
'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1',
|
||||
};
|
||||
|
||||
const defaultOptions = {
|
||||
responseTimeout: 30000,
|
||||
};
|
||||
|
||||
const proxyAgent = tunnel.httpsOverHttp({
|
||||
proxy: {
|
||||
host: config.proxy.host,
|
||||
port: config.proxy.port,
|
||||
},
|
||||
});
|
||||
|
||||
function useProxy(url) {
|
||||
if (!config.proxy.enable) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const { hostname } = new URL(url);
|
||||
return config.proxy.hostnames.includes(hostname);
|
||||
}
|
||||
|
||||
const queue = taskQueue();
|
||||
const defaultQueueMethod = '20p';
|
||||
|
||||
async function handler({
|
||||
url,
|
||||
method = 'GET',
|
||||
body,
|
||||
headers = {},
|
||||
options = {},
|
||||
}) {
|
||||
if (body) {
|
||||
logger.silly(`${method.toUpperCase()} ${url} with ${JSON.stringify(body)} ${options.queueMethod || defaultQueueMethod}`);
|
||||
} else {
|
||||
logger.silly(`${method.toUpperCase()} ${url} ${options.queueMethod || defaultQueueMethod}`);
|
||||
}
|
||||
|
||||
const reqOptions = {
|
||||
headers: {
|
||||
...(options?.defaultHeaders !== false && defaultHeaders),
|
||||
...headers,
|
||||
},
|
||||
...defaultOptions,
|
||||
...options,
|
||||
...(options?.timeout && { responseTimeout: options?.timeout }),
|
||||
};
|
||||
|
||||
if (useProxy(url)) {
|
||||
reqOptions.agent = proxyAgent;
|
||||
}
|
||||
|
||||
const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())
|
||||
? await (options.useSession || bhttp)[method.toLowerCase()](url, body, reqOptions)
|
||||
: await (options.useSession || bhttp)[method.toLowerCase()](url, reqOptions);
|
||||
|
||||
if (options?.stream && options?.destination) {
|
||||
await pipeline(res, ...(options?.transforms || []), options?.destination);
|
||||
}
|
||||
|
||||
const html = Buffer.isBuffer(res.body) ? res.body.toString() : null;
|
||||
const json = Buffer.isBuffer(res.body) ? null : res.body;
|
||||
|
||||
return {
|
||||
...res,
|
||||
originalRes: res,
|
||||
html,
|
||||
json,
|
||||
pipe: res.pipe,
|
||||
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
||||
code: res.statusCode,
|
||||
status: res.statusCode,
|
||||
};
|
||||
}
|
||||
|
||||
queue.on('concurrencyReached:http', () => {
|
||||
logger.silly('Queueing requests');
|
||||
});
|
||||
|
||||
queue.define('20p', handler, {
|
||||
concurrency: 20,
|
||||
});
|
||||
|
||||
queue.define('1s', handler, {
|
||||
interval: 1,
|
||||
});
|
||||
|
||||
queue.define('5s', handler, {
|
||||
interval: 5,
|
||||
});
|
||||
|
||||
async function get(url, headers, options) {
|
||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||
method: 'GET',
|
||||
url,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
async function head(url, headers, options) {
|
||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||
method: 'HEAD',
|
||||
url,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
async function post(url, body, headers, options) {
|
||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||
method: 'POST',
|
||||
url,
|
||||
body,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
function session(headers, options) {
|
||||
return bhttp.session({
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
get,
|
||||
post,
|
||||
head,
|
||||
session,
|
||||
};
|
Loading…
Reference in New Issue