|
|
|
|
@@ -1,90 +1,165 @@
|
|
|
|
|
'use strict';
|
|
|
|
|
|
|
|
|
|
const { geta, ed } = require('../utils/q');
|
|
|
|
|
const unprint = require('unprint');
|
|
|
|
|
const slugify = require('../utils/slugify');
|
|
|
|
|
|
|
|
|
|
function scrapeBlockLatest(scenes) {
|
|
|
|
|
return scenes.map(({ html, qu }) => {
|
|
|
|
|
function scrapeLatestBlock(scenes) {
|
|
|
|
|
return scenes.map(({ html, query }) => {
|
|
|
|
|
const release = {};
|
|
|
|
|
|
|
|
|
|
const entryId = qu.q('div[class*="videothumb"]', 'class').match(/videothumb_(\d+)/)
|
|
|
|
|
|| qu.q('div[id*="videothumb"]', 'id').match(/videothumb_(\d+)/);
|
|
|
|
|
release.title = query.content('h4 a');
|
|
|
|
|
release.url = query.url('h4 a');
|
|
|
|
|
release.date = unprint.extractDate(html, 'MM/DD/YYYY', { match: /\d{2}\/\d{2}\/\d{4}/ });
|
|
|
|
|
|
|
|
|
|
release.entryId = entryId[1];
|
|
|
|
|
release.actors = query.contents('.tour_update_models a');
|
|
|
|
|
|
|
|
|
|
release.title = qu.q('h4 a', true);
|
|
|
|
|
release.url = qu.url('h4 a');
|
|
|
|
|
release.date = ed(html, 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
|
|
|
|
|
// native videothumb entry ID does not occur on scene page, date is not available on all sites
|
|
|
|
|
release.entryId = slugify([...release.actors.sort(), release.title]);
|
|
|
|
|
|
|
|
|
|
release.actors = qu.all('.tour_update_models a', true);
|
|
|
|
|
|
|
|
|
|
release.poster = qu.q('div img').dataset.src;
|
|
|
|
|
release.photos = [qu.q('div img', 'src0_4x') || qu.q('div img', 'src0_3x') || qu.q('div img', 'src0_2x')];
|
|
|
|
|
|
|
|
|
|
release.teaser = qu.video();
|
|
|
|
|
|
|
|
|
|
console.log(release);
|
|
|
|
|
release.poster = query.dataset('.video_placeholder', 'src');
|
|
|
|
|
release.teaser = query.video();
|
|
|
|
|
|
|
|
|
|
return release;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function scrapeClassicLatest(scenes) {
|
|
|
|
|
return scenes.map(({ el, qu }) => {
|
|
|
|
|
function scrapeLatestClassic(scenes) {
|
|
|
|
|
return scenes.map(({ query }) => {
|
|
|
|
|
const release = {};
|
|
|
|
|
|
|
|
|
|
release.entryId = el.dataset.setid;
|
|
|
|
|
release.url = qu.url('a');
|
|
|
|
|
release.url = query.url('a');
|
|
|
|
|
|
|
|
|
|
release.title = qu.q('.update_title_small', true) || qu.q('a:nth-child(2)', true);
|
|
|
|
|
release.title = query.content('.update_title_small') || query.content('a:nth-child(2)');
|
|
|
|
|
release.description = query.attribute('a', 'title');
|
|
|
|
|
|
|
|
|
|
const description = qu.q('a', 'title');
|
|
|
|
|
if (description) release.description = description;
|
|
|
|
|
release.date = query.date('.date_small, .update_date', 'MM/DD/YYYY');
|
|
|
|
|
|
|
|
|
|
const date = qu.date('.date_small, .update_date', 'MM/DD/YYYY');
|
|
|
|
|
if (date) release.date = date;
|
|
|
|
|
release.duration = query.number('.update_counts') * 60;
|
|
|
|
|
|
|
|
|
|
const durationLine = qu.q('.update_counts', true);
|
|
|
|
|
if (durationLine) release.duration = Number(durationLine.match(/(\d+) min/i)[1]) * 60;
|
|
|
|
|
const actors = query.contents('.update_models a');
|
|
|
|
|
|
|
|
|
|
const actors = qu.all('.update_models a', true);
|
|
|
|
|
release.actors = actors.length > 0 ? actors : qu.q('.update_models', true).split(/,\s*/);
|
|
|
|
|
release.actors = actors.length === 0
|
|
|
|
|
? query.content('.update_models').split(/,\s*/)
|
|
|
|
|
: actors;
|
|
|
|
|
|
|
|
|
|
// native videothumb entry ID does not occur on scene page, date is not available on all sites
|
|
|
|
|
release.entryId = slugify([...release.actors.sort(), release.title]);
|
|
|
|
|
|
|
|
|
|
const photoCount = query.number('.update_thumb', { attribute: 'cnt' });
|
|
|
|
|
|
|
|
|
|
const photoCount = qu.q('.update_thumb', 'cnt');
|
|
|
|
|
[release.poster, ...release.photos] = Array.from({ length: photoCount })
|
|
|
|
|
.map((value, index) => qu.q('.update_thumb', `src${index}_3x`)
|
|
|
|
|
|| qu.q('.update_thumb', `src${index}_2x`)
|
|
|
|
|
|| qu.q('.update_thumb', `src${index}_1x`));
|
|
|
|
|
.map((value, index) => query.attribute('.update_thumb', `src${index}_3x`)
|
|
|
|
|
|| query.attribute('.update_thumb', `src${index}_2x`)
|
|
|
|
|
|| query.attribute('.update_thumb', `src${index}_1x`));
|
|
|
|
|
|
|
|
|
|
return release;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function scrapeScene({ query }) {
|
|
|
|
|
function scrapeSceneBlock({ query }) {
|
|
|
|
|
const release = {};
|
|
|
|
|
|
|
|
|
|
release.title = query.content('.indScene h2');
|
|
|
|
|
release.description = query.content('.description');
|
|
|
|
|
release.date = query.date('.sceneDateP span', 'MM/DD/YYYY');
|
|
|
|
|
|
|
|
|
|
release.actors = query.all('.sceneTextLink .tour_update_models a').map((actorEl) => ({
|
|
|
|
|
name: unprint.query.content(actorEl),
|
|
|
|
|
url: unprint.query.url(actorEl, null),
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
release.duration = query.number('.sceneDateP', { match: /(\d+)\s+min/i, matchIndex: 1 }) * 60;
|
|
|
|
|
release.entryId = slugify([...release.actors.map((actor) => actor.name).sort(), release.title]);
|
|
|
|
|
|
|
|
|
|
release.stars = query.number('.sceneRating');
|
|
|
|
|
|
|
|
|
|
release.poster = query.img('#trailer_thumb img[src*=content]');
|
|
|
|
|
release.trailer = query.video('#trailerVideo source');
|
|
|
|
|
|
|
|
|
|
return release;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchPhotos(url) {
|
|
|
|
|
const res = await unprint.get(url);
|
|
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
|
return res.context.query.imgs('.grid-gallery img');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return [];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function scrapeSceneClassic({ query }, context, options) {
|
|
|
|
|
const release = {};
|
|
|
|
|
|
|
|
|
|
release.title = query.content('.update_title');
|
|
|
|
|
release.description = query.content('.update_description');
|
|
|
|
|
release.date = query.date('.update_date', 'MM/DD/YYYY');
|
|
|
|
|
|
|
|
|
|
release.actors = query.all('.update_models a').map((actorEl) => ({
|
|
|
|
|
name: unprint.query.content(actorEl),
|
|
|
|
|
url: unprint.query.url(actorEl, null),
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
release.entryId = slugify([...release.actors.map((actor) => actor.name).sort(), release.title]);
|
|
|
|
|
|
|
|
|
|
release.tags = query.contents('.update_tags a');
|
|
|
|
|
release.stars = query.number('.gallery_info', { match: /average\s+rating:\s+(\d+\.\d+)/i, matchIndex: 1 });
|
|
|
|
|
|
|
|
|
|
release.poster = query.img('#vidplayer', { attribute: 'poster' });
|
|
|
|
|
release.trailer = query.video('#vidplayer source');
|
|
|
|
|
|
|
|
|
|
const gallery = query.url('//a[img[contains(@src, "gallery.gif")]]');
|
|
|
|
|
|
|
|
|
|
if (gallery && options.includePhotos) {
|
|
|
|
|
release.photos = await fetchPhotos(gallery);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
console.log(release);
|
|
|
|
|
return release;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchLatest(site, page = 1) {
|
|
|
|
|
const url = `${site.url}/tour_${site.parameters.siteId}/categories/movies_${page}_d.html`;
|
|
|
|
|
const res = await unprint.get(url, { selectAll: '.movieBlock, .videoBlock, .update_details, .update_details' });
|
|
|
|
|
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchLatestClassic(site, page) {
|
|
|
|
|
if (!site.parameters) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const url = `${site.url}/tour_${site.parameters.siteId}/categories/movies_${page}_d.html`;
|
|
|
|
|
const res = await geta(url, '.updatesBlock .movieBlock, .updatesBlock .videoBlock, .latest_updates_block .update_details, .category_listing_block .update_details');
|
|
|
|
|
const res = await fetchLatest(site, page);
|
|
|
|
|
|
|
|
|
|
if (res.ok && site.parameters.block) {
|
|
|
|
|
return scrapeBlockLatest(res.items, site);
|
|
|
|
|
if (res.ok) {
|
|
|
|
|
return scrapeLatestClassic(res.context, site);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res.ok ? scrapeClassicLatest(res.items, site) : res.status;
|
|
|
|
|
return res.status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchLatestBlock(site, page) {
|
|
|
|
|
if (!site.parameters) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const res = await fetchLatest(site, page);
|
|
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
|
return scrapeLatestBlock(res.context, site);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res.status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
module.exports = {
|
|
|
|
|
fetchLatest,
|
|
|
|
|
scrapeScene,
|
|
|
|
|
fetchLatest: fetchLatestClassic,
|
|
|
|
|
scrapeScene: scrapeSceneClassic,
|
|
|
|
|
useUnprint: true,
|
|
|
|
|
block: {
|
|
|
|
|
scrapeScene: scrapeSceneBlock,
|
|
|
|
|
fetchLatest: fetchLatestBlock,
|
|
|
|
|
useUnprint: true,
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|