forked from DebaucheryLibrarian/traxxx
Re-wrote broken Perv City scraper, added profile scraping.
This commit is contained in:
@@ -1,140 +1,113 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
const qu = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { feetInchesToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
async function getTrailer(entryId) {
|
||||
const trailerRes = await bhttp.post('https://www.pervcity.com/gettoken.php', {
|
||||
setId: entryId,
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('.videoPic a');
|
||||
release.entryId = query.q('.videoPic img', 'id').match(/set-target-(\d+)/)[1];
|
||||
|
||||
release.title = query.q('h3 a', true);
|
||||
release.description = query.q('.runtime + p', true);
|
||||
|
||||
release.date = query.date('.date', 'MM-DD-YYYY');
|
||||
release.duration = query.dur('.runtime');
|
||||
|
||||
release.actors = query.all('.tour_update_models a', true);
|
||||
|
||||
release.poster = query.img('.videoPic img');
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
if (trailerRes.statusCode === 200) {
|
||||
return {
|
||||
poster: trailerRes.body.TrailerImg,
|
||||
trailer: trailerRes.body.TrailerPath || trailerRes.body.Trailerfallback,
|
||||
function scrapeScene({ query }) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = query.q('.trailerLeft img', 'id').match(/set-target-(\d+)/)[1];
|
||||
|
||||
release.title = query.q('.infoHeader h1', true);
|
||||
release.description = query.q('.infoBox p', true);
|
||||
|
||||
release.actors = query.all('.tour_update_models a', true);
|
||||
|
||||
release.poster = query.img('.posterimg');
|
||||
release.photos = query.imgs('.trailerSnaps img').slice(1); // first photo is poster in lower quality
|
||||
|
||||
const trailer = query.q('script')?.textContent.match(/\/trailers\/.+\.mp4/)?.[0];
|
||||
|
||||
if (trailer) {
|
||||
release.trailer = {
|
||||
src: `https://pervcity.com${trailer}`,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function scrapeLatestScene(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const entryId = $('li').attr('id');
|
||||
const sceneLinkElement = $('#scene_title_border a');
|
||||
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
|
||||
|
||||
const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, '')); // replace weird commas
|
||||
const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate();
|
||||
|
||||
const poster = $('a:nth-child(2) > img').attr('src');
|
||||
const photos = $('.sample-picker img').map((index, element) => $(element).attr('src').replace('tourpics', 'trailer')).toArray();
|
||||
|
||||
const stars = $('img[src*="/star.png"]')
|
||||
.toArray()
|
||||
.map(element => $(element).attr('src'))
|
||||
.length || 0;
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const release = { url, site };
|
||||
|
||||
release.entryId = document.querySelector('input#set_ID').value;
|
||||
|
||||
release.title = document.querySelector('title').textContent;
|
||||
release.description = document.querySelector('.player_data').textContent.trim();
|
||||
|
||||
const durationString = document.querySelector('.tag_lineR div:nth-child(2) span').textContent;
|
||||
const [minutes, seconds] = durationString.match(/\d+/g);
|
||||
|
||||
release.duration = Number(minutes) * 60 + Number(seconds);
|
||||
release.tags = document.querySelector('meta[name="keywords"]').content.split(',');
|
||||
|
||||
const { poster, trailer } = await getTrailer(release.entryId);
|
||||
|
||||
release.poster = poster;
|
||||
release.trailer = { src: trailer };
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeFallbackLanding(html) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
function scrapeProfile({ query }) {
|
||||
const profile = {};
|
||||
|
||||
return document.querySelector('input#set_ID').value;
|
||||
const bio = query.all('.moreInfo li').reduce((acc, el) => ({
|
||||
...acc,
|
||||
[slugify(query.q(el, 'span', true), '_')]: query.text(el),
|
||||
}), {});
|
||||
|
||||
profile.description = query.q('.aboutModel p', true);
|
||||
profile.dateOfBirth = qu.extractDate(bio.date_of_birth, ['MMMM D, YYYY', 'DD-MMM-YY']);
|
||||
|
||||
profile.birthPlace = bio.birth_location;
|
||||
profile.ethnicity = bio.ethnicity;
|
||||
|
||||
profile.height = feetInchesToCm(bio.height);
|
||||
profile.weight = lbsToKg(bio.weight);
|
||||
|
||||
profile.eyes = bio.eye_color;
|
||||
profile.hairColor = bio.hair_color;
|
||||
|
||||
profile.avatar = query.img('.starPic img');
|
||||
profile.releases = scrapeAll(qu.initAll(query.all('.aboutScenes .videoBlock')));
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function scrapeFallbackScene(html, entryId, url, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { url, entryId, site };
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `https://pervcity.com/search.php?site[]=${channel.parameters.siteId}&page=${page}`;
|
||||
const res = await qu.getAll(url, '.videoBlock');
|
||||
|
||||
release.title = document.querySelector('.popup_data_set_head label').textContent;
|
||||
release.description = document.querySelector('.popup_data_set_des p').textContent.trim();
|
||||
release.date = moment.utc(document.querySelector('.popup_left_top div span').textContent, 'MM-DD-YYYY').toDate();
|
||||
release.actors = Array.from(document.querySelectorAll('.popup_data_set_models a'), el => el.textContent);
|
||||
|
||||
const { poster, trailer } = await getTrailer(release.entryId);
|
||||
|
||||
release.poster = poster;
|
||||
release.trailer = { src: trailer };
|
||||
|
||||
release.channel = document.querySelector('.popup_left_top div img').alt;
|
||||
|
||||
return release;
|
||||
return res.ok ? scrapeAll(res.items, channel) : res.status;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = page === 1
|
||||
? await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=${(page - 1) * 9}&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`)
|
||||
: await bhttp.get(`${site.url}/final_load_latestupdate_grid_view.php?limitstart=0&limitend=${(page - 1) * 8 + 1}&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
|
||||
const elements = JSON.parse(res.body.toString());
|
||||
async function fetchScene(url, entity) {
|
||||
const res = await qu.get(url, '.trailerArea');
|
||||
|
||||
const latest = Object.values(elements.total_arr).map(html => scrapeLatestScene(html, site)); // total_arr is a key-value object for final_load_latestupdate_grid_view.php
|
||||
|
||||
return latest;
|
||||
return res.ok ? scrapeScene(res.item, entity) : res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
async function fetchProfile(actorName) {
|
||||
const url = `https://pervcity.com/models/${slugify(actorName)}.html`;
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
if (site.isNetwork) {
|
||||
const entryId = scrapeFallbackLanding(res.body.toString(), url);
|
||||
|
||||
const fallbackRes = await bhttp.post('https://www.pervcity.com/set_popupvideo.php', {
|
||||
setId: entryId,
|
||||
});
|
||||
|
||||
return scrapeFallbackScene(fallbackRes.body.toString(), entryId, url, site);
|
||||
}
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item);
|
||||
}
|
||||
|
||||
return null;
|
||||
const url2 = `https://pervcity.com/models/${slugify(actorName, '')}.html`;
|
||||
const res2 = await qu.get(url2);
|
||||
|
||||
if (res2.ok) {
|
||||
return scrapeProfile(res2.item);
|
||||
}
|
||||
|
||||
return res2.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
144
src/scrapers/pervcity_legacy.js
Normal file
144
src/scrapers/pervcity_legacy.js
Normal file
@@ -0,0 +1,144 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
async function getTrailer(entryId) {
|
||||
const trailerRes = await bhttp.post('https://www.pervcity.com/gettoken.php', {
|
||||
setId: entryId,
|
||||
});
|
||||
|
||||
if (trailerRes.statusCode === 200) {
|
||||
return {
|
||||
poster: trailerRes.body.TrailerImg,
|
||||
trailer: trailerRes.body.TrailerPath || trailerRes.body.Trailerfallback,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function scrapeLatestScene(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const entryId = $('li').attr('id');
|
||||
const sceneLinkElement = $('#scene_title_border a');
|
||||
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
|
||||
|
||||
const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, '')); // replace weird commas
|
||||
const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate();
|
||||
|
||||
const poster = $('a:nth-child(2) > img').attr('src');
|
||||
const photos = $('.sample-picker img').map((index, element) => $(element).attr('src').replace('tourpics', 'trailer')).toArray();
|
||||
|
||||
const stars = $('img[src*="/star.png"]')
|
||||
.toArray()
|
||||
.map(element => $(element).attr('src'))
|
||||
.length || 0;
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const release = { url, site };
|
||||
|
||||
release.entryId = document.querySelector('input#set_ID').value;
|
||||
|
||||
release.title = document.querySelector('title').textContent;
|
||||
release.description = document.querySelector('.player_data').textContent.trim();
|
||||
|
||||
const durationString = document.querySelector('.tag_lineR div:nth-child(2) span').textContent;
|
||||
const [minutes, seconds] = durationString.match(/\d+/g);
|
||||
|
||||
release.duration = Number(minutes) * 60 + Number(seconds);
|
||||
release.tags = document.querySelector('meta[name="keywords"]').content.split(',');
|
||||
|
||||
const { poster, trailer } = await getTrailer(release.entryId);
|
||||
|
||||
release.poster = poster;
|
||||
release.trailer = { src: trailer };
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeFallbackLanding(html) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
return document.querySelector('input#set_ID').value;
|
||||
}
|
||||
|
||||
async function scrapeFallbackScene(html, entryId, url, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { url, entryId, site };
|
||||
|
||||
release.title = document.querySelector('.popup_data_set_head label').textContent;
|
||||
release.description = document.querySelector('.popup_data_set_des p').textContent.trim();
|
||||
release.date = moment.utc(document.querySelector('.popup_left_top div span').textContent, 'MM-DD-YYYY').toDate();
|
||||
release.actors = Array.from(document.querySelectorAll('.popup_data_set_models a'), el => el.textContent);
|
||||
|
||||
const { poster, trailer } = await getTrailer(release.entryId);
|
||||
|
||||
release.poster = poster;
|
||||
release.trailer = { src: trailer };
|
||||
|
||||
release.channel = document.querySelector('.popup_left_top div img').alt;
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}/final_latestupdateview.php?limitstart=${(page - 1) * 9}&limitend=9&webchannelid=0&deviceview=browser&tourId=${channel.parameters.tourId}`;
|
||||
const pagedUrl = `${channel.url}/final_load_latestupdate_grid_view.php?limitstart=0&limitend=${(page - 1) * 8 + 1}&webchannelid=0&deviceview=browser&tourId=${channel.parameters.tourId}`;
|
||||
|
||||
const res = page === 1
|
||||
? await bhttp.get(url)
|
||||
: await bhttp.get(pagedUrl);
|
||||
|
||||
const elements = JSON.parse(res.body.toString());
|
||||
|
||||
const latest = Object.values(elements.total_arr).map(html => scrapeLatestScene(html, channel)); // total_arr is a key-value object for final_load_latestupdate_grid_view.php
|
||||
|
||||
return latest;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
if (site.isNetwork) {
|
||||
const entryId = scrapeFallbackLanding(res.body.toString(), url);
|
||||
|
||||
const fallbackRes = await bhttp.post('https://www.pervcity.com/set_popupvideo.php', {
|
||||
setId: entryId,
|
||||
});
|
||||
|
||||
return scrapeFallbackScene(fallbackRes.body.toString(), entryId, url, site);
|
||||
}
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
@@ -193,6 +193,7 @@ module.exports = {
|
||||
nubilesporn: nubiles,
|
||||
nympho: mikeadriano,
|
||||
onlyprince: fullpornnetwork,
|
||||
pervcity,
|
||||
pervertgallery: fullpornnetwork,
|
||||
peternorth: famedigital,
|
||||
pimpxxx: cherrypimps,
|
||||
|
||||
Reference in New Issue
Block a user