Changed q get and geta APIs to include status, refactored scrapers. Showing front- and back-cover on movie tiles and release page (fix). Removed icons from main navigation. Returning scenes from Jules Jordan movie scraper.

This commit is contained in:
ThePendulum 2020-03-08 04:23:10 +01:00
parent b45bb0cfbc
commit acad99bdfe
15 changed files with 222 additions and 119 deletions

View File

@ -202,5 +202,9 @@ export default {
.nav-item {
flex-grow: 1;
}
.logo {
display: none;
}
}
</style>

View File

@ -74,6 +74,19 @@
class="thumbnail"
>
<span
v-else-if="release.covers && release.covers.length > 0"
class="covers"
>
<img
v-for="cover in release.covers"
:key="cover.id"
:src="`/${release.batch === 'dummy' ? 'img' : 'media'}/${cover.thumbnail}`"
:alt="release.title"
class="thumbnail cover"
>
</span>
<img
v-else-if="release.covers && release.covers.length > 0"
:src="`/${release.batch === 'dummy' ? 'img' : 'media'}/${release.covers[0].thumbnail}`"
@ -176,6 +189,15 @@ export default {
margin: 0 0 .5rem 0;
}
.covers {
background: $profile;
display: flex;
.cover {
width: 50%;
}
}
.thumbnail {
width: 100%;
height: 14rem;

View File

@ -182,6 +182,7 @@ const releaseFragment = `
${releaseTagsFragment}
${releasePosterFragment}
${releasePhotosFragment}
${releaseCoversFragment}
${releaseTrailerFragment}
${releaseTeaserFragment}
${siteFragment}

View File

@ -43,6 +43,7 @@ function initReleasesActions(store, _router) {
slug
date
url
type
isNew
site {
id
@ -72,6 +73,13 @@ function initReleasesActions(store, _router) {
}
poster: releasesPosterByReleaseId {
media {
id
thumbnail
}
}
covers: releasesCovers {
media {
id
thumbnail
}
}

View File

@ -92,9 +92,8 @@ async function scrapeReleases(sources, release = null, type = 'scene', preflight
const curatedReleases = scrapedReleases.map(scrapedRelease => ({ ...scrapedRelease, type }));
if (argv.scene && argv.inspect) {
if ((argv.scene || argv.movie) && argv.inspect) {
// only show when fetching from URL
console.log(curatedReleases);
}
if (argv.save) {
@ -110,6 +109,9 @@ async function scrapeReleases(sources, release = null, type = 'scene', preflight
*/
const { releases: storedReleases } = await storeReleases(curatedReleases);
const movieScenes = storedReleases.map(movie => movie.scenes).flat();
console.log(movieScenes);
if (storedReleases) {
logger.info(storedReleases.map(storedRelease => `\nhttp://${config.web.host}:${config.web.port}/scene/${storedRelease.id}/${storedRelease.slug}`).join(''));

View File

@ -91,11 +91,11 @@ function extractModels({ el }, site) {
async function fetchModels(site, page = 1, accModels = []) {
const url = `${site.url}/?models/${page}`;
const qModels = await get(url);
const res = await get(url);
if (qModels) {
const models = extractModels(qModels, site);
const nextPage = qModels.qa('.pagenumbers', true)
if (res.ok) {
const models = extractModels(res.item, site);
const nextPage = res.item.qa('.pagenumbers', true)
.map(pageX => Number(pageX))
.filter(Boolean) // remove << and >>
.includes(page + 1);
@ -112,16 +112,16 @@ async function fetchModels(site, page = 1, accModels = []) {
async function fetchLatest(site, page = 1, models) {
const url = `${site.url}/show.php?a=${site.parameters.a}_${page}`;
const qLatest = await geta(url, '.item');
const res = await geta(url, '.item');
return qLatest && scrapeLatest(qLatest, site, models);
return res.ok ? scrapeLatest(res.items, site, models) : res.status;
}
async function fetchScene(url, site, release, beforeFetchLatest) {
const models = beforeFetchLatest || await fetchModels(site);
const qScene = await get(url);
const res = await get(url);
return qScene && scrapeScene(qScene, url, site, models);
return res.ok ? scrapeScene(res.item, url, site, models) : res.status;
}
module.exports = {

View File

@ -109,15 +109,15 @@ async function fetchLatest(site, page = 1) {
const url = site.parameters?.extract
? `https://cherrypimps.com/categories/movies_${page}.html`
: `${site.url}/categories/movies_${page}.html`;
const qLatest = await geta(url, 'div.video-thumb');
const res = await geta(url, 'div.video-thumb');
return qLatest && scrapeAll(qLatest, site);
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchScene(url, site, release) {
const qScene = await get(url);
const res = await get(url);
return qScene && scrapeScene(qScene, url, site, release);
return res.ok ? scrapeScene(res.item, url, site, release) : res.status;
}
async function fetchProfile(actorName, scraperSlug) {
@ -128,9 +128,11 @@ async function fetchProfile(actorName, scraperSlug) {
? [`https://${scraperSlug}.com/models/${actorSlug}.html`, `https://${scraperSlug}.com/models/${actorSlug2}.html`]
: [`https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug}.html`, `https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug2}.html`];
const qActor = await get(url) || await get(url2);
const res = await get(url);
if (res.ok) return scrapeProfile(res.item);
return qActor && scrapeProfile(qActor);
const res2 = await get(url2);
return res2.ok ? scrapeProfile(res2.item) : res2.status;
}
module.exports = {

View File

@ -67,15 +67,15 @@ function scrapeProfile({ el, q, qtx }) {
async function fetchLatest(site, page = 1) {
const url = `${site.url}/categories/movies_${page}_d.html`;
const qLatest = await geta(url, '.latest-updates .update');
const res = await geta(url, '.latest-updates .update');
return qLatest && scrapeAll(qLatest, site);
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchScene(url, site) {
const qScene = await get(url, '.content-wrapper');
const res = await get(url, '.content-wrapper');
return qScene && scrapeScene(qScene, url, site);
return res.ok ? scrapeScene(res.item, url, site) : res.status;
}
async function fetchProfile(actorName, scraperSlug) {
@ -84,9 +84,9 @@ async function fetchProfile(actorName, scraperSlug) {
? `https://povperverts.net/models/${actorSlug}.html`
: `https://${scraperSlug}.com/models/${actorSlug}.html`;
const qProfile = await get(url);
const res = await get(url);
return qProfile && scrapeProfile(qProfile, actorName);
return res.ok ? scrapeProfile(res.item, actorName) : res.status;
}
module.exports = {

View File

@ -361,25 +361,24 @@ async function fetchLatest(site, page = 1, _beforeFetchLatest, accSiteReleases)
|| (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`)
|| `${site.url}/categories/movies_${page}_d.html`;
const qLatest = await geta(url, '.modelfeature, .item-video, .updateItem');
const res = await geta(url, '.modelfeature, .item-video, .updateItem');
if (!qLatest) return null;
if (site.parameters?.t1) return scrapeAllT1(qLatest, site, accSiteReleases);
if (site.parameters?.tour) return scrapeAllTour(qLatest, site, accSiteReleases);
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeAllT1(res.items, site, accSiteReleases);
if (site.parameters?.tour) return scrapeAllTour(res.items, site, accSiteReleases);
return scrapeAll(qLatest, site, accSiteReleases);
return scrapeAll(res.items, site, accSiteReleases);
}
async function fetchScene(url, site, baseRelease, beforeFetchLatest) {
const channelRegExp = beforeFetchLatest || await getChannelRegExp(site);
const qScene = await get(url);
const res = await get(url);
if (!qScene) return null;
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeSceneT1(res.item, site, url, baseRelease, channelRegExp);
if (site.parameters?.tour) return scrapeSceneTour(res.item, site, url, baseRelease);
if (site.parameters?.t1) return scrapeSceneT1(qScene, site, url, baseRelease, channelRegExp);
if (site.parameters?.tour) return scrapeSceneTour(qScene, site, url, baseRelease);
return scrapeScene(qScene, site, url, baseRelease);
return scrapeScene(res.item, site, url, baseRelease);
}
async function fetchProfile(actorName, scraperSlug, site) {
@ -387,14 +386,21 @@ async function fetchProfile(actorName, scraperSlug, site) {
const actorSlugB = slugify(actorName);
const t1 = site.parameters?.t1 ? 't1/' : '';
const qProfile = site.parameters?.profile
? (await get(util.format(site.parameters.profile, actorSlugA)) || await get(site.parameters.profile, actorSlugB))
: (await get(`${site.url}/${t1}models/${actorSlugA}.html`) || await get(`${site.url}/${t1}models/${actorSlugB}.html`));
if (site.parameters?.t1) return qProfile && scrapeProfileT1(qProfile, site);
if (site.parameters?.tour) return qProfile && scrapeProfileTour(qProfile, site);
const res1 = site.parameters?.profile
? await get(util.format(site.parameters.profile, actorSlugA))
: await get(`${site.url}/${t1}models/${actorSlugA}.html`);
return qProfile && scrapeProfile(qProfile, site);
const res = (res1.ok && res1)
|| (site.parameters?.profile
? await get(util.format(site.parameters.profile, actorSlugB))
: await get(`${site.url}/${t1}models/${actorSlugB}.html`));
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeProfileT1(res.item, site);
if (site.parameters?.tour) return scrapeProfileTour(res.item, site);
return scrapeProfile(res.item, site);
}
module.exports = {

View File

@ -8,6 +8,7 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
const logger = require('../logger')(__filename);
const { get, geta, ctxa } = require('../utils/q');
const { heightToCm } = require('../utils/convert');
const slugify = require('../utils/slugify');
@ -117,41 +118,33 @@ async function getPhotos(entryId, site, type = 'highres', page = 1) {
return getPhotosLegacy(entryId, site, 'highres', 1);
}
function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.update_details').toArray();
return scenesElements.map((element) => {
function scrapeAll(scenes, site) {
return scenes.map(({ el, q, qa, qh, qu, qd, qi, qis }) => {
const release = {};
const sceneLinkElement = $(element).find('a[title], .update_title a');
release.url = sceneLinkElement.attr('href');
release.title = sceneLinkElement.text()?.trim() || sceneLinkElement.attr('alt')?.trim();
release.entryId = el.dataset.setid || q('.rating_box')?.dataset.id;
release.entryId = $(element).attr('data-setid');
release.url = qu('.update_title, .dvd_info > a, a ~ a');
release.title = q('.update_title, .dvd_info > a, a ~ a', true);
release.date = qd('.update_date', 'MM/DD/YYYY');
release.date = moment
.utc($(element).find('.update_date').text(), 'MM/DD/YYYY')
.toDate();
release.actors = qa('.update_models a', true);
release.actors = $(element).find('.update_models a')
.map((actorIndex, actorElement) => $(actorElement).text())
.toArray();
const dvdPhotos = qis('.dvd_preview_thumb');
const photoCount = Number(q('a img.thumbs', 'cnt')) || 1;
const photoElement = $(element).find('a img.thumbs');
const photoCount = Number(photoElement.attr('cnt')) || 1;
[release.poster, ...release.photos] = Array.from({ length: photoCount }, (value, index) => {
const src = photoElement.attr(`src${index}_1x`) || photoElement.attr(`src${index}`) || photoElement.attr('src');
[release.poster, ...release.photos] = dvdPhotos.length
? dvdPhotos
: Array.from({ length: photoCount }).map((value, index) => {
const src = qi('a img.thumbs', `src${index}_1x`) || qi('a img.thumbs', `src${index}`) || qi('a img.thumbs');
if (!src) return null;
return src ? {
src: /^http/.test(src) ? src : `${site.url}${src}`,
referer: site.url,
} : null;
}).filter(Boolean);
return {
src: /^http/.test(src) ? src : `${site.url}${src}`,
referer: site.url,
};
}).filter(photoUrl => photoUrl);
const teaserScript = $(element).find('script').html();
const teaserScript = qh('script');
if (teaserScript) {
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
if (src) release.teaser = { src };
@ -294,18 +287,28 @@ async function scrapeScene(html, url, site) {
return release;
}
function scrapeMovie(html, url, site) {
const { document } = new JSDOM(html).window;
function scrapeMovie({ el, q, qus }, url, site) {
const movie = { url, site };
movie.entryId = document.querySelector('.dvd_details_overview .rating_box').dataset.id;
movie.title = document.querySelector('.title_bar span').textContent;
movie.covers = Array.from(document.querySelectorAll('#dvd-cover-flip > a'), el => el.href);
movie.channel = document.querySelector('.update_date a').textContent;
movie.date = new Date();
movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
movie.entryId = q('.dvd_details_overview .rating_box').dataset.id;
movie.title = q('.title_bar span', true);
movie.covers = qus('#dvd-cover-flip > a');
movie.channel = q('.update_date a', true);
return movie;
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
const sceneQs = ctxa(el, '.dvd_details');
const scenes = scrapeAll(sceneQs, site);
const curatedScenes = scenes
.map(scene => ({ ...scene, movie }))
.sort((sceneA, sceneB) => sceneA.date - sceneB.date);
movie.date = curatedScenes[0].date;
return {
...movie,
scenes: curatedScenes,
};
}
function scrapeProfile(html, url, actorName) {
@ -344,14 +347,14 @@ function scrapeProfile(html, url, actorName) {
}
async function fetchLatest(site, page = 1) {
const url = site.parameters?.latest ? util.format(site.parameters.latest, page) : `${site.url}/trial/categories/movies_${page}_d.html`;
const res = await bhttp.get(url);
const url = site.parameters?.latest
? util.format(site.parameters.latest, page)
: `${site.url}/trial/categories/movies_${page}_d.html`;
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
// const res = await bhttp.get(url);
const res = await geta(url, '.update_details');
return res.statusCode;
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchUpcoming(site) {
@ -374,9 +377,9 @@ async function fetchScene(url, site) {
}
async function fetchMovie(url, site) {
const res = await bhttp.get(url);
const res = await get(url);
return scrapeMovie(res.body.toString(), url, site);
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
}
async function fetchProfile(actorName) {

View File

@ -60,13 +60,13 @@ async function fetchLatest(site, page = 1) {
}
const url = `${site.url}/tour_${site.parameters.siteId}/categories/movies_${page}_d.html`;
const qLatest = await geta(url, '.updatesBlock .movieBlock, .updatesBlock .videoBlock, .latest_updates_block .update_details, .category_listing_block .update_details');
const res = await geta(url, '.updatesBlock .movieBlock, .updatesBlock .videoBlock, .latest_updates_block .update_details, .category_listing_block .update_details');
if (qLatest && site.parameters.block) {
return scrapeBlockLatest(qLatest, site);
if (res.ok && site.parameters.block) {
return scrapeBlockLatest(res.items, site);
}
return qLatest && scrapeClassicLatest(qLatest, site);
return res.ok ? scrapeClassicLatest(res.items, site) : res.status;
}
module.exports = {

View File

@ -10,10 +10,10 @@ const slugUrlMap = {
};
async function getPhotos(albumUrl) {
const thumbnails = await geta(albumUrl, '.photo-thumb');
const res = await geta(albumUrl, '.photo-thumb');
return thumbnails
? thumbnails.map(({ q }) => q('source').srcset)
return res.ok
? res.items.map(({ q }) => q('source').srcset)
: [];
}
@ -114,26 +114,26 @@ function scrapeProfile({ q, qa, qi, qu }, _actorName, origin) {
async function fetchLatest(site, page = 1) {
const url = `${site.url}/video/gallery/${(page - 1) * 12}`;
const qLatest = await geta(url, '.content-grid-item');
const res = await geta(url, '.content-grid-item');
return qLatest && scrapeAll(qLatest, site);
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchUpcoming(site) {
if (site.parameters?.upcoming) {
const url = `${site.url}/video/upcoming`;
const qUpcoming = await geta(url, '.content-grid-item');
const res = await geta(url, '.content-grid-item');
return qUpcoming && scrapeAll(qUpcoming, site);
return res.ok ? scrapeAll(res.items, site) : res.status;
}
return [];
}
async function fetchScene(url, site) {
const qScene = await get(url);
const res = await get(url);
return qScene && scrapeScene(qScene, url, site);
return res.ok ? scrapeScene(res.item, url, site) : res.status;
}
async function fetchProfile(actorName, siteSlug) {
@ -141,15 +141,17 @@ async function fetchProfile(actorName, siteSlug) {
const origin = slugUrlMap[siteSlug] || `https://www.${siteSlug}.com`;
const url = `${origin}/model/alpha/${firstLetter}`;
const { qa } = await get(url);
const resModels = await get(url);
const modelPath = qa('.content-grid-item a.title').find(el => slugify(el.textContent) === slugify(actorName));
if (!resModels.ok) return resModels.status;
const modelPath = resModels.item.qa('.content-grid-item a.title').find(el => slugify(el.textContent) === slugify(actorName));
if (modelPath) {
const modelUrl = `${origin}${modelPath}`;
const qModel = await get(modelUrl);
const resModel = await get(modelUrl);
if (qModel) return scrapeProfile(qModel, actorName, origin);
return resModel.ok ? scrapeProfile(resModel.item, actorName, origin) : resModel.status;
}
return null;

View File

@ -34,15 +34,15 @@ function scrapeScene({ q }, _site) {
async function fetchLatest(site, page = 1) {
const url = `${site.url}/${page}`;
const qLatest = await geta(url, '.selector');
const res = await geta(url, '.selector');
return qLatest && scrapeLatest(qLatest, site);
return res.ok ? scrapeLatest(res.items, site) : res.status;
}
async function fetchScene(url, site) {
const qScene = await get(url);
const res = await get(url);
return qScene && scrapeScene(qScene, site);
return res.ok ? scrapeScene(res.item, site) : res.status;
}
module.exports = {

View File

@ -64,6 +64,12 @@ function qall(context, selector, attrArg, applyTrim = true) {
return Array.from(context.querySelectorAll(selector));
}
function qhtml(context, selector) {
const el = q(context, selector, null, true);
return el && el.innerHTML;
}
function qtexts(context, selector, applyTrim = true, filter = true) {
const el = q(context, selector, null, applyTrim);
if (!el) return null;
@ -160,34 +166,36 @@ function qlength(context, selector, match, attr = 'textContent') {
const funcs = {
q,
qa: qall,
qall,
qd: qdate,
qdate,
qh: qhtml,
qhtml,
qi: qimage,
qimage,
qimages,
qposter,
qis: qimages,
ql: qlength,
qlength,
qm: qmeta,
qmeta,
qp: qposter,
qposter,
qs: qall,
qt: qtrailer,
qtext,
qtexts,
qtrailer,
qtrailers,
qurl,
qurls,
qa: qall,
qs: qall,
qd: qdate,
qi: qimage,
qis: qimages,
qp: qposter,
ql: qlength,
qm: qmeta,
qt: qtrailer,
qts: qtrailers,
qtx: qtext,
qtxt: qtext,
qtxs: qtexts,
qtxt: qtext,
qtxts: qtexts,
qu: qurl,
qurl,
qurls,
qus: qurls,
};
@ -246,12 +254,26 @@ async function get(url, selector, headers, all = false) {
});
if (res.statusCode === 200) {
return all
const item = all
? extractAll(res.body.toString(), selector)
: extract(res.body.toString(), selector);
return {
item,
items: all ? item : [item],
res,
ok: true,
status: res.statusCode,
};
}
return null;
return {
item: null,
items: [],
res,
ok: false,
status: res.statusCode,
};
}
async function getAll(url, selector, headers) {

31
src/utils/timeout.js Normal file
View File

@ -0,0 +1,31 @@
'use strict';
const bhttp = require('bhttp');
const sleep = 5000;
const timeout = 1000;
async function init() {
try {
const res = await bhttp.get(`https://httpstat.us/200?sleep=${sleep}`, {
responseTimeout: timeout,
});
console.log(res.statusCode);
} catch (error) {
console.log(error);
}
}
/*
/home/pendulum/projectx/node_modules/bhttp/lib/bhttp.js:159
err.response = response;
^
TypeError: Cannot assign to read only property 'response' of object '[object Object]'
at addErrorData (/home/pendulum/projectx/node_modules/bhttp/lib/bhttp.js:159:16)
at Timeout.timeoutHandler [as _onTimeout] (/home/pendulum/projectx/node_modules/bhttp/lib/bhttp.js:525:27)
*/
init();