Separated Filthy Kings into its channels, upgraded Gamma scraper to accomodate.
This commit is contained in:
@@ -2,12 +2,10 @@
|
||||
|
||||
const util = require('util');
|
||||
const Promise = require('bluebird');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const argv = require('../argv');
|
||||
const qu = require('../utils/qu');
|
||||
const http = require('../utils/http');
|
||||
const { heightToCm } = require('../utils/convert');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
@@ -90,44 +88,23 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeUpcoming(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('#coming_soon_carousel').find('.table').toArray();
|
||||
|
||||
return scenesElements.map((element) => {
|
||||
function scrapeUpcoming(scenes, channel) {
|
||||
return scenes.map(({ query, html }) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = $(element).find('.upcoming_updates_thumb').attr('id').match(/\d+/)[0];
|
||||
release.title = query.text('.overlay-text', { join: false })?.[0];
|
||||
release.date = query.date('.overlay-text', 'MM/DD/YYYY');
|
||||
|
||||
const details = $(element).find('.update_details_comingsoon')
|
||||
.eq(1)
|
||||
.children()
|
||||
.remove();
|
||||
release.actors = query.all('.update_models a').map((actorEl) => ({
|
||||
name: unprint.query.content(actorEl),
|
||||
url: unprint.query.url(actorEl, null),
|
||||
}));
|
||||
|
||||
release.title = details
|
||||
.end()
|
||||
.text()
|
||||
.trim();
|
||||
release.poster = query.img('img') || query.img('img', { attribute: 'src0_1x' });
|
||||
|
||||
release.actors = details
|
||||
.text()
|
||||
.trim()
|
||||
.split(', ');
|
||||
release.entryId = channel.parameters?.entryIdFromTitle ? slugify(release.title) : getEntryId(html);
|
||||
|
||||
release.date = moment
|
||||
.utc($(element).find('.update_date_comingsoon').text().slice(7), 'MM/DD/YYYY')
|
||||
.toDate();
|
||||
|
||||
const photoElement = $(element).find('a img.thumbs');
|
||||
const posterPath = photoElement.attr('src');
|
||||
release.poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
|
||||
|
||||
const videoClass = $(element).find('.update_thumbnail div').attr('class');
|
||||
const videoScript = $(element).find(`script:contains(${videoClass})`).html();
|
||||
|
||||
if (videoScript) {
|
||||
release.teaser = videoScript.slice(videoScript.indexOf('https://'), videoScript.indexOf('.mp4') + 4);
|
||||
}
|
||||
// TODO: teaser
|
||||
|
||||
return release;
|
||||
});
|
||||
@@ -230,8 +207,11 @@ async function scrapeScene({ html, query }, context) {
|
||||
}
|
||||
|
||||
// release.photos = async () => await getPhotos(release.entryId, context.entity); // probably no longer works on any site
|
||||
// release.photos = query.imgs('#images img');
|
||||
release.photos = getPhotos(query, release, context);
|
||||
if (argv.jjFullPhotos) {
|
||||
release.photos = getPhotos(query, release, context);
|
||||
} else {
|
||||
release.photos = query.imgs('#images img');
|
||||
}
|
||||
|
||||
if (query.exists('.update_dvds a')) {
|
||||
release.movie = {
|
||||
@@ -271,55 +251,6 @@ function scrapeMovie({ el, query }, url, site) {
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
function scrapeProfile(html, url, actorName, entity) {
|
||||
const bio = document.querySelector('.model_bio').textContent;
|
||||
const avatarEl = document.querySelector('.model_bio_pic img, .model_bio_thumb');
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
const heightString = bio.match(/\d+ feet \d+ inches/);
|
||||
const ageString = bio.match(/Age:\s*(\d{2})/);
|
||||
const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
|
||||
const measurementsString = bio.match(/\w+-\d+-\d+/);
|
||||
|
||||
if (birthDateString) profile.birthdate = qu.parseDate(birthDateString[1], 'MMMM D, YYYY');
|
||||
if (ageString) profile.age = Number(ageString[1]);
|
||||
|
||||
if (heightString) profile.height = heightToCm(heightString[0]);
|
||||
|
||||
if (measurementsString) {
|
||||
const [bust, waist, hip] = measurementsString[0].split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (avatarEl) {
|
||||
const avatarSources = [
|
||||
avatarEl.getAttribute('src0_3x'),
|
||||
avatarEl.getAttribute('src0_2x'),
|
||||
avatarEl.getAttribute('src0_1x'),
|
||||
avatarEl.getAttribute('src0'),
|
||||
avatarEl.getAttribute('src'),
|
||||
]
|
||||
.filter((avatar) => avatar && !/p\d+.jpe?g/.test(avatar)) // remove non-existing attributes and placeholder images
|
||||
.map((avatar) => qu.prefixUrl(avatar, entity.url));
|
||||
|
||||
if (avatarSources.length) profile.avatar = avatarSources;
|
||||
}
|
||||
|
||||
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), (el) => el.href);
|
||||
|
||||
console.log(profile);
|
||||
|
||||
return profile;
|
||||
}
|
||||
*/
|
||||
|
||||
function scrapeProfile({ query }, url, name, entity) {
|
||||
const profile = { url };
|
||||
|
||||
@@ -368,13 +299,13 @@ async function fetchUpcoming(site) {
|
||||
if (site.parameters?.upcoming === false) return null;
|
||||
|
||||
const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`;
|
||||
const res = await http.get(url);
|
||||
const res = await unprint.get(url, { selectAll: '//img[contains(@alt, "Coming Soon")]/parent::div' });
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeUpcoming(res.body.toString(), site);
|
||||
if (res.ok) {
|
||||
return scrapeUpcoming(res.context, site);
|
||||
}
|
||||
|
||||
return res.statusCode;
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchMovie(url, site) {
|
||||
|
||||
Reference in New Issue
Block a user