forked from DebaucheryLibrarian/traxxx
Major API change for 'q', renamed to 'qu', refactored modules. Fixed Gamma URL entry ID regex.
This commit is contained in:
parent
7d71cf3a8c
commit
6cbb7f9c1e
|
@ -3,21 +3,6 @@
|
||||||
class="banner"
|
class="banner"
|
||||||
@wheel.prevent="scrollBanner"
|
@wheel.prevent="scrollBanner"
|
||||||
>
|
>
|
||||||
<template v-if="release.covers && release.covers.length > 0">
|
|
||||||
<a
|
|
||||||
v-for="cover in release.covers"
|
|
||||||
:key="`cover-${cover.id}`"
|
|
||||||
:href="`/media/${cover.path}`"
|
|
||||||
target="_blank"
|
|
||||||
rel="noopener noreferrer"
|
|
||||||
>
|
|
||||||
<img
|
|
||||||
:src="`/media/${cover.thumbnail}`"
|
|
||||||
class="cover"
|
|
||||||
>
|
|
||||||
</a>
|
|
||||||
</template>
|
|
||||||
|
|
||||||
<div class="trailer">
|
<div class="trailer">
|
||||||
<video
|
<video
|
||||||
v-if="release.trailer"
|
v-if="release.trailer"
|
||||||
|
@ -58,6 +43,21 @@
|
||||||
><Icon icon="image" /></a>
|
><Icon icon="image" /></a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<template v-if="release.covers && release.covers.length > 0">
|
||||||
|
<a
|
||||||
|
v-for="cover in release.covers"
|
||||||
|
:key="`cover-${cover.id}`"
|
||||||
|
:href="`/media/${cover.path}`"
|
||||||
|
target="_blank"
|
||||||
|
rel="noopener noreferrer"
|
||||||
|
>
|
||||||
|
<img
|
||||||
|
:src="`/media/${cover.thumbnail}`"
|
||||||
|
class="item cover"
|
||||||
|
>
|
||||||
|
</a>
|
||||||
|
</template>
|
||||||
|
|
||||||
<a
|
<a
|
||||||
v-for="photo in photos"
|
v-for="photo in photos"
|
||||||
:key="`banner-${photo.index}`"
|
:key="`banner-${photo.index}`"
|
||||||
|
|
|
@ -2391,56 +2391,56 @@ const sites = [
|
||||||
{
|
{
|
||||||
slug: 'sexuallybroken',
|
slug: 'sexuallybroken',
|
||||||
name: 'Sexually Broken',
|
name: 'Sexually Broken',
|
||||||
url: 'http://www.sexuallybroken.com',
|
url: 'https://www.sexuallybroken.com',
|
||||||
tags: ['bdsm'],
|
tags: ['bdsm'],
|
||||||
network: 'insex',
|
network: 'insex',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'infernalrestraints',
|
slug: 'infernalrestraints',
|
||||||
name: 'Infernal Restraints',
|
name: 'Infernal Restraints',
|
||||||
url: 'http://www.infernalrestraints.com',
|
url: 'https://www.infernalrestraints.com',
|
||||||
tags: ['bdsm'],
|
tags: ['bdsm'],
|
||||||
network: 'insex',
|
network: 'insex',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'hardtied',
|
slug: 'hardtied',
|
||||||
name: 'Hardtied',
|
name: 'Hardtied',
|
||||||
url: 'http://www.hardtied.com',
|
url: 'https://www.hardtied.com',
|
||||||
tags: ['bdsm'],
|
tags: ['bdsm'],
|
||||||
network: 'insex',
|
network: 'insex',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'realtimebondage',
|
slug: 'realtimebondage',
|
||||||
name: 'Real Time Bondage',
|
name: 'Real Time Bondage',
|
||||||
url: 'http://www.realtimebondage.com',
|
url: 'https://www.realtimebondage.com',
|
||||||
tags: ['bdsm', 'live'],
|
tags: ['bdsm', 'live'],
|
||||||
network: 'insex',
|
network: 'insex',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'topgrl',
|
slug: 'topgrl',
|
||||||
name: 'TopGrl',
|
name: 'TopGrl',
|
||||||
url: 'http://www.topgrl.com',
|
url: 'https://www.topgrl.com',
|
||||||
tags: ['bdsm', 'femdom'],
|
tags: ['bdsm', 'femdom'],
|
||||||
network: 'insex',
|
network: 'insex',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'paintoy',
|
slug: 'paintoy',
|
||||||
name: 'Paintoy',
|
name: 'Paintoy',
|
||||||
url: 'http://www.paintoy.com',
|
url: 'https://www.paintoy.com',
|
||||||
tags: ['bdsm'],
|
tags: ['bdsm'],
|
||||||
network: 'insex',
|
network: 'insex',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'aganmedon',
|
slug: 'aganmedon',
|
||||||
name: 'Agan Medon',
|
name: 'Agan Medon',
|
||||||
url: 'http://www.aganmedon.com',
|
url: 'https://www.aganmedon.com',
|
||||||
tags: ['bdsm', 'animated'],
|
tags: ['bdsm', 'animated'],
|
||||||
network: 'insex',
|
network: 'insex',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'sensualpain',
|
slug: 'sensualpain',
|
||||||
name: 'Sensual Pain',
|
name: 'Sensual Pain',
|
||||||
url: 'http://www.sensualpain.com',
|
url: 'https://www.sensualpain.com',
|
||||||
tags: ['bdsm'],
|
tags: ['bdsm'],
|
||||||
network: 'insex',
|
network: 'insex',
|
||||||
},
|
},
|
||||||
|
|
|
@ -111,7 +111,7 @@ async function scrapeReleases(sources, release = null, type = 'scene', preflight
|
||||||
const { releases: storedReleases } = await storeReleases(curatedReleases);
|
const { releases: storedReleases } = await storeReleases(curatedReleases);
|
||||||
const movieScenes = storedReleases.map(movie => movie.scenes).flat();
|
const movieScenes = storedReleases.map(movie => movie.scenes).flat();
|
||||||
|
|
||||||
console.log(movieScenes);
|
// console.log(movieScenes);
|
||||||
|
|
||||||
if (storedReleases) {
|
if (storedReleases) {
|
||||||
logger.info(storedReleases.map(storedRelease => `\nhttp://${config.web.host}:${config.web.port}/scene/${storedRelease.id}/${storedRelease.slug}`).join(''));
|
logger.info(storedReleases.map(storedRelease => `\nhttp://${config.web.host}:${config.web.port}/scene/${storedRelease.id}/${storedRelease.slug}`).join(''));
|
||||||
|
@ -120,7 +120,7 @@ async function scrapeReleases(sources, release = null, type = 'scene', preflight
|
||||||
}
|
}
|
||||||
|
|
||||||
async function deepFetchReleases(baseReleases, beforeFetchLatest) {
|
async function deepFetchReleases(baseReleases, beforeFetchLatest) {
|
||||||
return Promise.map(baseReleases, async (release) => {
|
const deepReleases = await Promise.map(baseReleases, async (release) => {
|
||||||
if (release.url || (release.path && release.site)) {
|
if (release.url || (release.path && release.site)) {
|
||||||
try {
|
try {
|
||||||
const fullRelease = await scrapeRelease(release.url, release, 'scene', beforeFetchLatest);
|
const fullRelease = await scrapeRelease(release.url, release, 'scene', beforeFetchLatest);
|
||||||
|
@ -150,6 +150,10 @@ async function deepFetchReleases(baseReleases, beforeFetchLatest) {
|
||||||
}, {
|
}, {
|
||||||
concurrency: 2,
|
concurrency: 2,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// console.log(deepReleases);
|
||||||
|
|
||||||
|
return deepReleases;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|
|
@ -18,23 +18,23 @@ function matchActors(actorString, models) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeLatest(scenes, site, models) {
|
function scrapeLatest(scenes, site, models) {
|
||||||
return scenes.map(({ q, qd, qu, qi }) => {
|
return scenes.map(({ qu }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
const pathname = qu('a.itemimg').slice(1);
|
const pathname = qu.url('a.itemimg').slice(1);
|
||||||
[release.entryId] = pathname.split('/').slice(-1);
|
[release.entryId] = pathname.split('/').slice(-1);
|
||||||
release.url = `${site.url}${pathname}`;
|
release.url = `${site.url}${pathname}`;
|
||||||
|
|
||||||
release.title = q('.itemimg img', 'alt') || q('h4 a', true);
|
release.title = qu.q('.itemimg img', 'alt') || qu.q('h4 a', true);
|
||||||
release.description = q('.mas_longdescription', true);
|
release.description = qu.q('.mas_longdescription', true);
|
||||||
release.date = qd('.movie_info2', 'MM/DD/YY', /\d{2}\/\d{2}\/\d{2}/);
|
release.date = qu.date('.movie_info2', 'MM/DD/YY', /\d{2}\/\d{2}\/\d{2}/);
|
||||||
|
|
||||||
const actorString = q('.mas_description', true);
|
const actorString = qu.q('.mas_description', true);
|
||||||
const actors = matchActors(actorString, models);
|
const actors = matchActors(actorString, models);
|
||||||
if (actors.length > 0) release.actors = actors;
|
if (actors.length > 0) release.actors = actors;
|
||||||
else release.actors = extractActors(actorString);
|
else release.actors = extractActors(actorString);
|
||||||
|
|
||||||
const posterPath = qi('.itemimg img');
|
const posterPath = qu.img('.itemimg img');
|
||||||
release.poster = `${site.url}/${posterPath}`;
|
release.poster = `${site.url}/${posterPath}`;
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
|
@ -72,17 +72,17 @@ function scrapeScene({ html, q, qa, qd, qis }, url, site, models) {
|
||||||
function extractModels({ el }, site) {
|
function extractModels({ el }, site) {
|
||||||
const models = ctxa(el, '.item');
|
const models = ctxa(el, '.item');
|
||||||
|
|
||||||
return models.map(({ q, qu }) => {
|
return models.map(({ qu }) => {
|
||||||
const actor = { gender: 'female' };
|
const actor = { gender: 'female' };
|
||||||
|
|
||||||
const avatar = q('.itemimg img');
|
const avatar = qu.q('.itemimg img');
|
||||||
actor.avatar = `${site.url}/${avatar.src}`;
|
actor.avatar = `${site.url}/${avatar.src}`;
|
||||||
actor.name = avatar.alt
|
actor.name = avatar.alt
|
||||||
.split(':').slice(-1)[0]
|
.split(':').slice(-1)[0]
|
||||||
.replace(/xtreme girl|nurse/ig, '')
|
.replace(/xtreme girl|nurse/ig, '')
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
const actorPath = qu('.itemimg');
|
const actorPath = qu.url('.itemimg');
|
||||||
actor.url = `${site.url}${actorPath.slice(1)}`;
|
actor.url = `${site.url}${actorPath.slice(1)}`;
|
||||||
|
|
||||||
return actor;
|
return actor;
|
||||||
|
|
|
@ -79,32 +79,32 @@ function scrapeUpcoming(html, site) {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
function scrapeScene(html, url, _site) {
|
function scrapeScene(html, url, _site) {
|
||||||
const { q, qa, qu, qi, qt } = ex(html, '.playerSection');
|
const { qu } = ex(html, '.playerSection');
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
[release.shootId] = q('.vdoTags + .vdoCast', true).match(/\w+$/);
|
[release.shootId] = qu.q('.vdoTags + .vdoCast', true).match(/\w+$/);
|
||||||
[release.entryId] = url.split('/')[3].match(/\d+$/);
|
[release.entryId] = url.split('/')[3].match(/\d+$/);
|
||||||
release.title = q('.ps-vdoHdd h1', true);
|
release.title = qu.q('.ps-vdoHdd h1', true);
|
||||||
release.description = q('.vdoDesc', true);
|
release.description = qu.q('.vdoDesc', true);
|
||||||
|
|
||||||
release.actors = qa('a[href*="/model"]', true);
|
release.actors = qu.all('a[href*="/model"]', true);
|
||||||
release.tags = qa('.vdoTags a', true);
|
release.tags = qu.all('.vdoTags a', true);
|
||||||
|
|
||||||
release.stars = Number(q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
|
release.stars = Number(qu.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
|
||||||
|
|
||||||
const poster = qi('img#player-overlay-image');
|
const poster = qu.img('img#player-overlay-image');
|
||||||
release.poster = [
|
release.poster = [
|
||||||
poster,
|
poster,
|
||||||
poster.replace('/big_trailer', '/members/450x340'), // load error fallback
|
poster.replace('/big_trailer', '/members/450x340'), // load error fallback
|
||||||
];
|
];
|
||||||
|
|
||||||
release.trailer = { src: qt() };
|
release.trailer = { src: qu.trailer() };
|
||||||
|
|
||||||
// all scenes seem to have 12 album photos available, not always included on the page
|
// all scenes seem to have 12 album photos available, not always included on the page
|
||||||
const firstPhotoUrl = ex(html).qi('img[data-slider-index="1"]');
|
const firstPhotoUrl = ex(html).qu.img('img[data-slider-index="1"]');
|
||||||
release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
|
release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
|
||||||
|
|
||||||
const [channel] = qu('a[href*="/websites"]').match(/\w+$/);
|
const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/);
|
||||||
release.channel = channel === 'bangcasting' ? 'bangbroscasting' : channel;
|
release.channel = channel === 'bangcasting' ? 'bangbroscasting' : channel;
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
|
|
|
@ -127,13 +127,15 @@ function scrapeActorSearch(html, url, actorName) {
|
||||||
|
|
||||||
async function fetchActorReleases({ qu, html }, accReleases = []) {
|
async function fetchActorReleases({ qu, html }, accReleases = []) {
|
||||||
const releases = scrapeAll(html);
|
const releases = scrapeAll(html);
|
||||||
const next = qu('.pagination .next a');
|
const next = qu.url('.pagination .next a');
|
||||||
|
|
||||||
if (next) {
|
if (next) {
|
||||||
const url = `https://www.brazzers.com${next}`;
|
const url = `https://www.brazzers.com${next}`;
|
||||||
const qNext = await get(url);
|
const res = await get(url);
|
||||||
|
|
||||||
return fetchActorReleases(qNext, accReleases.concat(releases));
|
if (res.ok) {
|
||||||
|
return fetchActorReleases(res.item, accReleases.concat(releases));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return accReleases.concat(releases);
|
return accReleases.concat(releases);
|
||||||
|
|
|
@ -4,12 +4,12 @@ const { get, geta, ctxa, ed } = require('../utils/q');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
|
||||||
function scrapeAll(scenes, site) {
|
function scrapeAll(scenes, site) {
|
||||||
return scenes.map(({ q, qa, qu, qd, ql, qi, qt }) => {
|
return scenes.map(({ qu }) => {
|
||||||
const url = qu('.text-thumb a');
|
const url = qu.url('.text-thumb a');
|
||||||
const { pathname } = new URL(url);
|
const { pathname } = new URL(url);
|
||||||
const channelUrl = qu('.badge');
|
const channelUrl = qu.url('.badge');
|
||||||
|
|
||||||
if (site?.parameters?.extract && q('.badge', true) !== site.name) {
|
if (site?.parameters?.extract && qu.q('.badge', true) !== site.name) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,15 +17,15 @@ function scrapeAll(scenes, site) {
|
||||||
|
|
||||||
release.url = channelUrl ? `${channelUrl}${pathname}` : url;
|
release.url = channelUrl ? `${channelUrl}${pathname}` : url;
|
||||||
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
|
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
|
||||||
release.title = q('.text-thumb a', true);
|
release.title = qu.q('.text-thumb a', true);
|
||||||
|
|
||||||
release.date = qd('.date', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
release.date = qu.date('.date', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
||||||
release.duration = ql('.date', /(\d{2}:)?\d{2}:\d{2}/);
|
release.duration = qu.dur('.date', /(\d{2}:)?\d{2}:\d{2}/);
|
||||||
|
|
||||||
release.actors = qa('.category a', true);
|
release.actors = qu.all('.category a', true);
|
||||||
|
|
||||||
release.poster = qi('img.video_placeholder, .video-images img');
|
release.poster = qu.img('img.video_placeholder, .video-images img');
|
||||||
release.teaser = { src: qt() };
|
release.teaser = { src: qu.trailer() };
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}).filter(Boolean);
|
}).filter(Boolean);
|
||||||
|
|
|
@ -62,20 +62,24 @@ async function fetchClassicProfile(actorName, siteSlug) {
|
||||||
const actorSlug = slugify(actorName);
|
const actorSlug = slugify(actorName);
|
||||||
|
|
||||||
const url = `https://${siteSlug}.com/en/pornstars`;
|
const url = `https://${siteSlug}.com/en/pornstars`;
|
||||||
const { qa } = await get(url);
|
const pornstarsRes = await get(url);
|
||||||
|
|
||||||
const actorPath = qa('option[value*="/pornstar"]')
|
if (!pornstarsRes.ok) return null;
|
||||||
|
|
||||||
|
const actorPath = pornstarsRes.item.qa('option[value*="/pornstar"]')
|
||||||
.find(el => slugify(el.textContent) === actorSlug)
|
.find(el => slugify(el.textContent) === actorSlug)
|
||||||
?.value;
|
?.value;
|
||||||
|
|
||||||
if (actorPath) {
|
if (actorPath) {
|
||||||
const actorUrl = `https://${siteSlug}.com${actorPath}`;
|
const actorUrl = `https://${siteSlug}.com${actorPath}`;
|
||||||
const { html } = await get(actorUrl);
|
const res = await get(actorUrl);
|
||||||
|
|
||||||
const releases = scrapeAll(html, null, `https://www.${siteSlug}.com`, false);
|
if (res.ok) {
|
||||||
|
const releases = scrapeAll(res.item, null, `https://www.${siteSlug}.com`, false);
|
||||||
|
|
||||||
return { releases };
|
return { releases };
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,21 +4,21 @@ const { get, geta, ctxa } = require('../utils/q');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
|
||||||
function scrapeAll(scenes) {
|
function scrapeAll(scenes) {
|
||||||
return scenes.map(({ el, q, qa, qd, qu, ql }) => {
|
return scenes.map(({ el, qu }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.entryId = el.dataset.setid || q('.update_thumb', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
release.entryId = el.dataset.setid || qu.q('.update_thumb', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
||||||
release.url = qu('.title');
|
release.url = qu.url('.title');
|
||||||
|
|
||||||
release.title = q('.title', true);
|
release.title = qu.q('.title', true);
|
||||||
release.description = q('.title', 'title');
|
release.description = qu.q('.title', 'title');
|
||||||
|
|
||||||
release.date = qd('.video-data > span:last-child', 'YYYY-MM-DD');
|
release.date = qu.date('.video-data > span:last-child', 'YYYY-MM-DD');
|
||||||
release.duration = ql('.video-data > span');
|
release.duration = qu.dur('.video-data > span');
|
||||||
|
|
||||||
release.actors = qa('.update_models a', true);
|
release.actors = qu.all('.update_models a', true);
|
||||||
|
|
||||||
const poster = q('.update_thumb', 'src0_1x');
|
const poster = qu.q('.update_thumb', 'src0_1x');
|
||||||
release.poster = [
|
release.poster = [
|
||||||
poster.replace('-1x', '-2x'),
|
poster.replace('-1x', '-2x'),
|
||||||
poster,
|
poster,
|
||||||
|
|
|
@ -198,7 +198,7 @@ async function scrapeScene(html, url, site, baseRelease, mobileHtml) {
|
||||||
const [data, data2] = json ? JSON.parse(json) : [];
|
const [data, data2] = json ? JSON.parse(json) : [];
|
||||||
const videoData = videoJson && JSON.parse(videoJson.slice(videoJson.indexOf('{'), videoJson.indexOf('};') + 1));
|
const videoData = videoJson && JSON.parse(videoJson.slice(videoJson.indexOf('{'), videoJson.indexOf('};') + 1));
|
||||||
|
|
||||||
release.entryId = (baseRelease?.path || new URL(url).pathname).match(/\/(\d{2,})\//)[1];
|
release.entryId = (baseRelease?.path || new URL(url).pathname).match(/\/(\d{2,})(\/|$)/)?.[1];
|
||||||
release.title = videoData?.playerOptions?.sceneInfos.sceneTitle || data?.name;
|
release.title = videoData?.playerOptions?.sceneInfos.sceneTitle || data?.name;
|
||||||
|
|
||||||
// date in data object is not the release date of the scene, but the date the entry was added; only use as fallback
|
// date in data object is not the release date of the scene, but the date the entry was added; only use as fallback
|
||||||
|
@ -298,10 +298,12 @@ async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, acc
|
||||||
const profilePath = `/${pathname.split('/').slice(-2).join('/')}`;
|
const profilePath = `/${pathname.split('/').slice(-2).join('/')}`;
|
||||||
|
|
||||||
const url = getActorReleasesUrl(profilePath, page);
|
const url = getActorReleasesUrl(profilePath, page);
|
||||||
const { html, qu } = await get(url);
|
const res = await get(url);
|
||||||
|
|
||||||
const releases = scrapeAll(html, null, origin);
|
if (!res.ok) return [];
|
||||||
const nextPage = qu('.Gamma_Paginator a.next');
|
|
||||||
|
const releases = scrapeAll(res.html, null, origin);
|
||||||
|
const nextPage = res.item.qu.url('.Gamma_Paginator a.next');
|
||||||
|
|
||||||
if (nextPage) {
|
if (nextPage) {
|
||||||
return fetchActorReleases(profileUrl, getActorReleasesUrl, page + 1, accReleases.concat(releases));
|
return fetchActorReleases(profileUrl, getActorReleasesUrl, page + 1, accReleases.concat(releases));
|
||||||
|
|
|
@ -60,18 +60,18 @@ function getImageWithFallbacks(q, selector, site, el) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeAll(scenes, site) {
|
function scrapeAll(scenes, site) {
|
||||||
return scenes.map(({ q, qu, qd, ql }) => {
|
return scenes.map(({ qu }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.title = q('h3 a', 'title') || q('h3 a', true);
|
release.title = qu.q('h3 a', 'title') || qu.q('h3 a', true);
|
||||||
release.url = qu('h3 a');
|
release.url = qu.url('h3 a');
|
||||||
|
|
||||||
release.date = qd('.modeldata p', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
release.date = qu.date('.modeldata p', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
||||||
release.duration = ql('.modeldata p');
|
release.duration = qu.dur('.modeldata p');
|
||||||
|
|
||||||
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||||
|
|
||||||
release.poster = getImageWithFallbacks(q, '.modelimg img', site);
|
release.poster = getImageWithFallbacks(qu.q, '.modelimg img', site);
|
||||||
|
|
||||||
// release.entryId = q('.modelimg img', 'id').match(/set-target-(\d+)/)[1];
|
// release.entryId = q('.modelimg img', 'id').match(/set-target-(\d+)/)[1];
|
||||||
release.entryId = deriveEntryId(release);
|
release.entryId = deriveEntryId(release);
|
||||||
|
@ -81,18 +81,18 @@ function scrapeAll(scenes, site) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeAllT1(scenes, site, accSiteReleases) {
|
function scrapeAllT1(scenes, site, accSiteReleases) {
|
||||||
return scenes.map(({ q, qi, qd, ql, qu }) => {
|
return scenes.map(({ qu }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.title = q('h4 a', 'title') || q('h4 a', true);
|
release.title = qu.q('h4 a', 'title') || qu.q('h4 a', true);
|
||||||
release.url = qu('h4 a');
|
release.url = qu.url('h4 a');
|
||||||
|
|
||||||
release.date = qd('.more-info-div', 'MMM D, YYYY');
|
release.date = qu.date('.more-info-div', 'MMM D, YYYY');
|
||||||
release.duration = ql('.more-info-div');
|
release.duration = qu.dur('.more-info-div');
|
||||||
|
|
||||||
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||||
|
|
||||||
const posterPath = q('.img-div img', 'src0_1x') || qi('img.video_placeholder');
|
const posterPath = qu.q('.img-div img', 'src0_1x') || qu.img('img.video_placeholder');
|
||||||
|
|
||||||
if (posterPath) {
|
if (posterPath) {
|
||||||
const poster = /^http/.test(posterPath) ? posterPath : `${site.parameters?.media || site.url}${posterPath}`;
|
const poster = /^http/.test(posterPath) ? posterPath : `${site.parameters?.media || site.url}${posterPath}`;
|
||||||
|
@ -117,16 +117,16 @@ function scrapeAllT1(scenes, site, accSiteReleases) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeAllTour(scenes) {
|
function scrapeAllTour(scenes) {
|
||||||
return scenes.map(({ q, qa, qu, qd, qi }) => {
|
return scenes.map(({ qu }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.title = q('h4 a', true);
|
release.title = qu.q('h4 a', true);
|
||||||
release.url = qu('a');
|
release.url = qu.url('a');
|
||||||
release.date = qd('.tour_update_models + span', 'YYYY-MM-DD');
|
release.date = qu.date('.tour_update_models + span', 'YYYY-MM-DD');
|
||||||
|
|
||||||
release.actors = qa('.tour_update_models a', true);
|
release.actors = qu.all('.tour_update_models a', true);
|
||||||
|
|
||||||
release.poster = qi('a img');
|
release.poster = qu.img('a img');
|
||||||
|
|
||||||
release.entryId = deriveEntryId(release);
|
release.entryId = deriveEntryId(release);
|
||||||
|
|
||||||
|
@ -343,7 +343,7 @@ function scrapeProfileTour({ el, q, qtxs }, site) {
|
||||||
|
|
||||||
const qReleases = ctxa(el, '.update_block');
|
const qReleases = ctxa(el, '.update_block');
|
||||||
profile.releases = qReleases.map((qRelease) => {
|
profile.releases = qReleases.map((qRelease) => {
|
||||||
const url = qRelease.qu('.update_image a[href]');
|
const url = qRelease.qu.url('.update_image a[href]');
|
||||||
const release = scrapeSceneTour(qRelease, site);
|
const release = scrapeSceneTour(qRelease, site);
|
||||||
|
|
||||||
if (!/\/(signup|join)/i.test(url)) release.url = url;
|
if (!/\/(signup|join)/i.test(url)) release.url = url;
|
||||||
|
|
|
@ -8,15 +8,15 @@ function scrapeLatest(html, site) {
|
||||||
? exa(html, '#articleTable table[cellspacing="2"]')
|
? exa(html, '#articleTable table[cellspacing="2"]')
|
||||||
: exa(html, 'body > table');
|
: exa(html, 'body > table');
|
||||||
|
|
||||||
return scenes.map(({ q, qd, qi, qu, ql }) => {
|
return scenes.map(({ qu }) => {
|
||||||
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
|
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
const titleEl = q('.galleryTitleText, .articleTitleText');
|
const titleEl = qu.q('.galleryTitleText, .articleTitleText');
|
||||||
const [title, ...actors] = titleEl.textContent.split('|');
|
const [title, ...actors] = titleEl.textContent.split('|');
|
||||||
const date = qd('.articlePostDateText td', 'MMM D, YYYY');
|
const date = qu.date('.articlePostDateText td', 'MMM D, YYYY');
|
||||||
|
|
||||||
const url = qu(titleEl, 'a');
|
const url = qu.url(titleEl, 'a');
|
||||||
[release.entryId] = url.split('/').slice(-2);
|
[release.entryId] = url.split('/').slice(-2);
|
||||||
release.url = `${site.url}${url}`;
|
release.url = `${site.url}${url}`;
|
||||||
|
|
||||||
|
@ -31,15 +31,15 @@ function scrapeLatest(html, site) {
|
||||||
|
|
||||||
release.actors = actors.map(actor => actor.trim());
|
release.actors = actors.map(actor => actor.trim());
|
||||||
|
|
||||||
const description = q('.articleCopyText', true);
|
const description = qu.q('.articleCopyText', true);
|
||||||
if (description) release.description = description.slice(0, description.lastIndexOf('('));
|
if (description) release.description = description.slice(0, description.lastIndexOf('('));
|
||||||
|
|
||||||
const duration = ql('.articleCopyText a:nth-child(2)');
|
const duration = qu.dur('.articleCopyText a:nth-child(2)');
|
||||||
if (duration) release.duration = duration;
|
if (duration) release.duration = duration;
|
||||||
|
|
||||||
release.likes = parseInt(q('.articlePostDateText td:nth-child(3)', true), 10);
|
release.likes = parseInt(qu.q('.articlePostDateText td:nth-child(3)', true), 10);
|
||||||
|
|
||||||
const cover = qi('a img');
|
const cover = qu.img('a img');
|
||||||
release.covers = [[
|
release.covers = [[
|
||||||
cover.replace('_thumbnail', ''),
|
cover.replace('_thumbnail', ''),
|
||||||
cover,
|
cover,
|
||||||
|
@ -49,31 +49,31 @@ function scrapeLatest(html, site) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene({ q, qd, ql, qu, qis, qp, qt }, site) {
|
function scrapeScene({ qu }, site) {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
const titleEl = q('.articleTitleText');
|
const titleEl = qu.q('.articleTitleText');
|
||||||
const [title, ...actors] = titleEl.textContent.split('|');
|
const [title, ...actors] = titleEl.textContent.split('|');
|
||||||
|
|
||||||
const url = qu(titleEl, 'a');
|
const url = qu.url(titleEl, 'a');
|
||||||
[release.entryId] = url.split('/').slice(-2);
|
[release.entryId] = url.split('/').slice(-2);
|
||||||
release.url = `${site.url}${url}`;
|
release.url = `${site.url}${url}`;
|
||||||
|
|
||||||
release.title = title.trim();
|
release.title = title.trim();
|
||||||
release.description = q('.articleCopyText', true);
|
release.description = qu.q('.articleCopyText', true);
|
||||||
|
|
||||||
release.actors = actors.map(actor => actor.trim());
|
release.actors = actors.map(actor => actor.trim());
|
||||||
release.date = qd('.articlePostDateText', 'MMMM D, YYYY');
|
release.date = qu.date('.articlePostDateText', 'MMMM D, YYYY');
|
||||||
release.duration = ql('.articlePostDateText a:nth-child(2)');
|
release.duration = qu.dur('.articlePostDateText a:nth-child(2)');
|
||||||
|
|
||||||
const [cover, ...photos] = qis('img[src*="images"]');
|
const [cover, ...photos] = qu.imgs('img[src*="images"]');
|
||||||
release.covers = [cover];
|
release.covers = [cover];
|
||||||
release.photos = photos;
|
release.photos = photos;
|
||||||
|
|
||||||
release.poster = qp();
|
release.poster = qu.poster();
|
||||||
|
|
||||||
const trailer = qt();
|
const trailer = qu.trailer();
|
||||||
release.trailer = { src: trailer };
|
if (trailer) release.trailer = { src: trailer };
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
@ -96,9 +96,9 @@ async function fetchLatest(site, page = 1) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site) {
|
||||||
const qScene = await get(url);
|
const res = await get(url);
|
||||||
|
|
||||||
return qScene && scrapeScene(qScene, site);
|
return res.ok ? scrapeScene(res.item, site) : res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|
|
@ -118,25 +118,41 @@ async function getPhotos(entryId, site, type = 'highres', page = 1) {
|
||||||
return getPhotosLegacy(entryId, site, 'highres', 1);
|
return getPhotosLegacy(entryId, site, 'highres', 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getEntryId(html) {
|
||||||
|
const entryId = html.match(/showtagform\((\d+)\)/);
|
||||||
|
|
||||||
|
if (entryId) {
|
||||||
|
return entryId[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
const setIdIndex = html.indexOf('setid:"');
|
||||||
|
|
||||||
|
if (setIdIndex) {
|
||||||
|
return html.slice(setIdIndex, html.indexOf(',', setIdIndex)).match(/\d+/)[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
function scrapeAll(scenes, site) {
|
function scrapeAll(scenes, site) {
|
||||||
return scenes.map(({ el, q, qa, qh, qu, qd, qi, qis }) => {
|
return scenes.map(({ qu }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.entryId = el.dataset.setid || q('.rating_box')?.dataset.id;
|
release.entryId = qu.el.dataset.setid || qu.q('.rating_box')?.dataset.id;
|
||||||
|
|
||||||
release.url = qu('.update_title, .dvd_info > a, a ~ a');
|
release.url = qu.url('.update_title, .dvd_info > a, a ~ a');
|
||||||
release.title = q('.update_title, .dvd_info > a, a ~ a', true);
|
release.title = qu.q('.update_title, .dvd_info > a, a ~ a', true);
|
||||||
release.date = qd('.update_date', 'MM/DD/YYYY');
|
release.date = qu.date('.update_date', 'MM/DD/YYYY');
|
||||||
|
|
||||||
release.actors = qa('.update_models a', true);
|
release.actors = qu.all('.update_models a', true);
|
||||||
|
|
||||||
const dvdPhotos = qis('.dvd_preview_thumb');
|
const dvdPhotos = qu.imgs('.dvd_preview_thumb');
|
||||||
const photoCount = Number(q('a img.thumbs', 'cnt')) || 1;
|
const photoCount = Number(qu.q('a img.thumbs', 'cnt')) || 1;
|
||||||
|
|
||||||
[release.poster, ...release.photos] = dvdPhotos.length
|
[release.poster, ...release.photos] = dvdPhotos.length
|
||||||
? dvdPhotos
|
? dvdPhotos
|
||||||
: Array.from({ length: photoCount }).map((value, index) => {
|
: Array.from({ length: photoCount }).map((value, index) => {
|
||||||
const src = qi('a img.thumbs', `src${index}_1x`) || qi('a img.thumbs', `src${index}`) || qi('a img.thumbs');
|
const src = qu.img('a img.thumbs', `src${index}_1x`) || qu.img('a img.thumbs', `src${index}`) || qu.img('a img.thumbs');
|
||||||
|
|
||||||
return src ? {
|
return src ? {
|
||||||
src: /^http/.test(src) ? src : `${site.url}${src}`,
|
src: /^http/.test(src) ? src : `${site.url}${src}`,
|
||||||
|
@ -144,7 +160,7 @@ function scrapeAll(scenes, site) {
|
||||||
} : null;
|
} : null;
|
||||||
}).filter(Boolean);
|
}).filter(Boolean);
|
||||||
|
|
||||||
const teaserScript = qh('script');
|
const teaserScript = qu.content('script');
|
||||||
if (teaserScript) {
|
if (teaserScript) {
|
||||||
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
||||||
if (src) release.teaser = { src };
|
if (src) release.teaser = { src };
|
||||||
|
@ -204,50 +220,17 @@ function scrapeUpcoming(html, site) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeScene(html, url, site) {
|
async function scrapeScene({ qu }, url, site) {
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
|
||||||
|
|
||||||
const release = { url, site };
|
const release = { url, site };
|
||||||
|
|
||||||
release.title = $('.title_bar_hilite').text().trim();
|
release.entryId = getEntryId(qu.html);
|
||||||
|
release.title = qu.q('.title_bar_hilite', true);
|
||||||
|
release.description = qu.q('.update_description', true);
|
||||||
|
|
||||||
const entryId = html.match(/showtagform\((\d+)\)/);
|
release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
|
||||||
|
release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
|
||||||
|
|
||||||
if (entryId) release.entryId = entryId[1];
|
const posterPath = qu.html.match(/useimage = "(.*)"/)?.[1];
|
||||||
else {
|
|
||||||
const setIdIndex = html.indexOf('setid:"');
|
|
||||||
if (setIdIndex) release.entryId = html.slice(setIdIndex, html.indexOf(',', setIdIndex)).match(/\d+/)[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
const dateElement = $('.update_date').text().trim();
|
|
||||||
const dateComment = $('*')
|
|
||||||
.contents()
|
|
||||||
.toArray()
|
|
||||||
.find(({ type, data }) => type === 'comment' && data.match('Date OFF'));
|
|
||||||
|
|
||||||
if (dateElement) {
|
|
||||||
release.date = moment
|
|
||||||
.utc($('.update_date').text(), 'MM/DD/YYYY')
|
|
||||||
.toDate();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dateComment) {
|
|
||||||
release.date = moment
|
|
||||||
.utc(dateComment.nodeValue.match(/\d{2}\/\d{2}\/\d{4}/), 'MM/DD/YYYY')
|
|
||||||
.toDate();
|
|
||||||
}
|
|
||||||
|
|
||||||
release.description = $('.update_description').text().trim();
|
|
||||||
|
|
||||||
release.actors = $('.backgroundcolor_info > .update_models a, .item .update_models a')
|
|
||||||
.map((_actorIndex, actorElement) => $(actorElement).text())
|
|
||||||
.toArray();
|
|
||||||
|
|
||||||
const infoLines = $('script:contains("useimage")')
|
|
||||||
.html()
|
|
||||||
.split('\n');
|
|
||||||
|
|
||||||
const posterPath = infoLines.find(line => line.match('useimage')).replace('useimage = "', '').slice(0, -2);
|
|
||||||
|
|
||||||
if (posterPath) {
|
if (posterPath) {
|
||||||
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
|
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
|
||||||
|
@ -261,7 +244,7 @@ async function scrapeScene(html, url, site) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (site.slug !== 'manuelferrara') {
|
if (site.slug !== 'manuelferrara') {
|
||||||
const trailerLines = infoLines.filter(line => /movie\["Trailer\w*"\]\[/.test(line));
|
const trailerLines = qu.html.split('\n').filter(line => /movie\["trailer\w*"\]\[/i.test(line));
|
||||||
|
|
||||||
if (trailerLines.length) {
|
if (trailerLines.length) {
|
||||||
release.trailer = trailerLines.map((trailerLine) => {
|
release.trailer = trailerLines.map((trailerLine) => {
|
||||||
|
@ -270,19 +253,24 @@ async function scrapeScene(html, url, site) {
|
||||||
|
|
||||||
return src && {
|
return src && {
|
||||||
src: /^http/.test(src) ? src : `${site.url}${src}`,
|
src: /^http/.test(src) ? src : `${site.url}${src}`,
|
||||||
quality: quality && Number(quality),
|
quality: quality && Number(quality.replace('558', '540')),
|
||||||
};
|
};
|
||||||
}).filter(Boolean);
|
}).filter(Boolean);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
release.photos = await getPhotos(release.entryId, site);
|
release.photos = await getPhotos(release.entryId, site);
|
||||||
release.tags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
release.tags = qu.all('.update_tags a', true);
|
||||||
|
|
||||||
const movie = $('.update_dvds a').attr('href');
|
if (qu.exists('.update_dvds a')) {
|
||||||
if (movie) release.movie = movie;
|
release.movie = {
|
||||||
|
url: qu.url('.update_dvds a'),
|
||||||
|
title: qu.q('.update_dvds a', true),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
release.stars = Number($('.avg_rating').text().trim().replace(/[\s|Avg Rating:]/g, ''));
|
const stars = Number(qu.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
|
||||||
|
if (stars) release.stars = stars;
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
@ -371,9 +359,9 @@ async function fetchUpcoming(site) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site) {
|
||||||
const res = await bhttp.get(url);
|
const res = await get(url);
|
||||||
|
|
||||||
return scrapeScene(res.body.toString(), url, site);
|
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchMovie(url, site) {
|
async function fetchMovie(url, site) {
|
||||||
|
|
|
@ -3,52 +3,55 @@
|
||||||
const { geta, ed } = require('../utils/q');
|
const { geta, ed } = require('../utils/q');
|
||||||
|
|
||||||
function scrapeBlockLatest(scenes) {
|
function scrapeBlockLatest(scenes) {
|
||||||
return scenes.map(({ html, q, qa, qu, qt }) => {
|
return scenes.map(({ html, qu }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
const entryId = q('div[class*="videothumb"]', 'class').match(/videothumb_(\d+)/)
|
const entryId = qu.q('div[class*="videothumb"]', 'class').match(/videothumb_(\d+)/)
|
||||||
|| q('div[id*="videothumb"]', 'id').match(/videothumb_(\d+)/);
|
|| qu.q('div[id*="videothumb"]', 'id').match(/videothumb_(\d+)/);
|
||||||
|
|
||||||
release.entryId = entryId[1];
|
release.entryId = entryId[1];
|
||||||
|
|
||||||
release.title = q('h4 a', true);
|
release.title = qu.q('h4 a', true);
|
||||||
release.url = qu('h4 a');
|
release.url = qu.url('h4 a');
|
||||||
release.date = ed(html, 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
|
release.date = ed(html, 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
|
||||||
|
|
||||||
release.actors = qa('.tour_update_models a', true);
|
release.actors = qu.all('.tour_update_models a', true);
|
||||||
|
|
||||||
release.poster = q('div img').dataset.src;
|
release.poster = qu.q('div img').dataset.src;
|
||||||
release.photos = [q('div img', 'src0_4x') || q('div img', 'src0_3x') || q('div img', 'src0_2x')];
|
release.photos = [qu.q('div img', 'src0_4x') || qu.q('div img', 'src0_3x') || qu.q('div img', 'src0_2x')];
|
||||||
|
|
||||||
release.teaser = qt();
|
release.teaser = qu.video();
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeClassicLatest(scenes) {
|
function scrapeClassicLatest(scenes) {
|
||||||
return scenes.map(({ el, q, qa, qd, qu }) => {
|
return scenes.map(({ el, qu }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.entryId = el.dataset.setid;
|
release.entryId = el.dataset.setid;
|
||||||
release.url = qu('a');
|
release.url = qu.url('a');
|
||||||
|
|
||||||
release.title = q('.update_title_small', true) || q('a:nth-child(2)', true);
|
release.title = qu.q('.update_title_small', true) || qu.q('a:nth-child(2)', true);
|
||||||
|
|
||||||
const description = q('a', 'title');
|
const description = qu.q('a', 'title');
|
||||||
if (description) release.description = description;
|
if (description) release.description = description;
|
||||||
|
|
||||||
const date = qd('.date_small, .update_date', 'MM/DD/YYYY');
|
const date = qu.date('.date_small, .update_date', 'MM/DD/YYYY');
|
||||||
if (date) release.date = date;
|
if (date) release.date = date;
|
||||||
|
|
||||||
const durationLine = q('.update_counts', true);
|
const durationLine = qu.q('.update_counts', true);
|
||||||
if (durationLine) release.duration = Number(durationLine.match(/(\d+) min/i)[1]) * 60;
|
if (durationLine) release.duration = Number(durationLine.match(/(\d+) min/i)[1]) * 60;
|
||||||
|
|
||||||
const actors = qa('.update_models a', true);
|
const actors = qu.all('.update_models a', true);
|
||||||
release.actors = actors.length > 0 ? actors : q('.update_models', true).split(/,\s*/);
|
release.actors = actors.length > 0 ? actors : qu.q('.update_models', true).split(/,\s*/);
|
||||||
|
|
||||||
const photoCount = q('.update_thumb', 'cnt');
|
const photoCount = qu.q('.update_thumb', 'cnt');
|
||||||
[release.poster, ...release.photos] = Array.from({ length: photoCount }).map((value, index) => q('.update_thumb', `src${index}_3x`) || q('.update_thumb', `src${index}_2x`) || q('.update_thumb', `src${index}_1x`));
|
[release.poster, ...release.photos] = Array.from({ length: photoCount })
|
||||||
|
.map((value, index) => qu.q('.update_thumb', `src${index}_3x`)
|
||||||
|
|| qu.q('.update_thumb', `src${index}_2x`)
|
||||||
|
|| qu.q('.update_thumb', `src${index}_1x`));
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
});
|
});
|
||||||
|
|
|
@ -18,13 +18,13 @@ async function getPhotos(albumUrl) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeAll(scenes, site, origin) {
|
function scrapeAll(scenes, site, origin) {
|
||||||
return scenes.map(({ q, qa, qu, qd }) => {
|
return scenes.map(({ qu }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.title = q('.title a', true);
|
release.title = qu.q('.title a', true);
|
||||||
|
|
||||||
const url = qu('.title a').split('?')[0];
|
const url = qu.url('.title a').split('?')[0];
|
||||||
const channelUrl = qu('.site-link');
|
const channelUrl = qu.url('.site-link');
|
||||||
|
|
||||||
if (/^http/.test(url)) {
|
if (/^http/.test(url)) {
|
||||||
const { pathname } = new URL(url);
|
const { pathname } = new URL(url);
|
||||||
|
@ -39,74 +39,74 @@ function scrapeAll(scenes, site, origin) {
|
||||||
else if (site?.url) release.url = `${site.url}${url}`;
|
else if (site?.url) release.url = `${site.url}${url}`;
|
||||||
else if (origin) release.url = `${origin}${url}`;
|
else if (origin) release.url = `${origin}${url}`;
|
||||||
} else {
|
} else {
|
||||||
release.entryId = q('a img', 'tube_tour_thumb_id');
|
release.entryId = qu.q('a img', 'tube_tour_thumb_id');
|
||||||
}
|
}
|
||||||
|
|
||||||
release.date = qd('.date', 'MMM D, YYYY');
|
release.date = qu.date('.date', 'MMM D, YYYY');
|
||||||
release.actors = qa('.models a.model', true);
|
release.actors = qu.all('.models a.model', true);
|
||||||
|
|
||||||
const poster = q('img').dataset.original;
|
const poster = qu.q('img').dataset.original;
|
||||||
release.poster = [
|
release.poster = [
|
||||||
poster.replace('_640', '_1280'),
|
poster.replace('_640', '_1280'),
|
||||||
poster,
|
poster,
|
||||||
];
|
];
|
||||||
|
|
||||||
release.stars = Number(q('.rating', true));
|
release.stars = Number(qu.q('.rating', true));
|
||||||
release.likes = Number(q('.likes', true));
|
release.likes = Number(qu.q('.likes', true));
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeScene({ q, qa, qd, qp, qu, qi }, url, site) {
|
async function scrapeScene(qu, url, site) {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
const { origin, pathname } = new URL(url);
|
const { origin, pathname } = new URL(url);
|
||||||
release.url = `${origin}${pathname}`;
|
release.url = `${origin}${pathname}`;
|
||||||
|
|
||||||
release.entryId = new URL(url).pathname.split('/')[3];
|
release.entryId = new URL(url).pathname.split('/')[3];
|
||||||
release.title = q('.content-pane-title h2', true);
|
release.title = qu.q('.content-pane-title h2', true);
|
||||||
release.description = q('.content-pane-column div', true);
|
release.description = qu.q('.content-pane-column div', true);
|
||||||
|
|
||||||
release.date = qd('.date', 'MMM D, YYYY');
|
release.date = qu.q('.date', 'MMM D, YYYY');
|
||||||
|
|
||||||
release.actors = qa('.content-pane-performers .model', true);
|
release.actors = qu.all('.content-pane-performers .model', true);
|
||||||
release.tags = qa('.categories a', true);
|
release.tags = qu.all('.categories a', true);
|
||||||
|
|
||||||
release.poster = qp() || qi('.fake-video-player img');
|
release.poster = qu.poster() || qu.img('.fake-video-player img');
|
||||||
release.trailer = qa('source').map(source => ({
|
release.trailer = qu.all('source').map(source => ({
|
||||||
src: source.src,
|
src: source.src,
|
||||||
quality: Number(source.getAttribute('res')),
|
quality: Number(source.getAttribute('res')),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
release.stars = Number(q('.score', true));
|
release.stars = Number(qu.q('.score', true));
|
||||||
release.likes = Number(q('#likecount', true));
|
release.likes = Number(qu.q('#likecount', true));
|
||||||
|
|
||||||
const albumLink = qu('.content-pane-related-links a[href*="gallery"]');
|
const albumLink = qu.url('.content-pane-related-links a[href*="gallery"]');
|
||||||
if (albumLink) release.photos = await getPhotos(`${site.url}${albumLink}`);
|
if (albumLink) release.photos = await getPhotos(`${site.url}${albumLink}`);
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeProfile({ q, qa, qi, qu }, _actorName, origin) {
|
function scrapeProfile({ qu }, _actorName, origin) {
|
||||||
const profile = {};
|
const profile = {};
|
||||||
|
|
||||||
const keys = qa('.model-profile h5', true);
|
const keys = qu.all('.model-profile h5', true);
|
||||||
const values = qa('.model-profile h5 + p', true);
|
const values = qu.all('.model-profile h5 + p', true);
|
||||||
|
|
||||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, { delimiter: '_' })]: values[index] }), {});
|
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, { delimiter: '_' })]: values[index] }), {});
|
||||||
|
|
||||||
profile.age = Number(bio.age);
|
profile.age = Number(bio.age);
|
||||||
profile.description = q('.model-bio', true);
|
profile.description = qu.q('.model-bio', true);
|
||||||
|
|
||||||
profile.residencePlace = bio.location;
|
profile.residencePlace = bio.location;
|
||||||
|
|
||||||
profile.height = heightToCm(bio.height);
|
profile.height = heightToCm(bio.height);
|
||||||
[profile.bust, profile.waist, profile.hip] = bio.figure.split('-').map(v => Number(v) || v);
|
[profile.bust, profile.waist, profile.hip] = bio.figure.split('-').map(v => Number(v) || v);
|
||||||
|
|
||||||
profile.avatar = qi('.model-profile img');
|
profile.avatar = qu.img('.model-profile img');
|
||||||
|
|
||||||
const releases = qa('.content-grid-item').filter(el => /video\//.test(qu(el, '.img-wrapper a'))); // filter out photos
|
const releases = qu.all('.content-grid-item').filter(el => /video\//.test(qu.url(el, '.img-wrapper a'))); // filter out photos
|
||||||
profile.releases = scrapeAll(ctxa(releases), null, origin);
|
profile.releases = scrapeAll(ctxa(releases), null, origin);
|
||||||
|
|
||||||
return profile;
|
return profile;
|
||||||
|
@ -145,7 +145,7 @@ async function fetchProfile(actorName, siteSlug) {
|
||||||
|
|
||||||
if (!resModels.ok) return resModels.status;
|
if (!resModels.ok) return resModels.status;
|
||||||
|
|
||||||
const modelPath = resModels.item.qa('.content-grid-item a.title').find(el => slugify(el.textContent) === slugify(actorName));
|
const modelPath = resModels.item.qu.all('.content-grid-item a.title').find(el => slugify(el.textContent) === slugify(actorName));
|
||||||
|
|
||||||
if (modelPath) {
|
if (modelPath) {
|
||||||
const modelUrl = `${origin}${modelPath}`;
|
const modelUrl = `${origin}${modelPath}`;
|
||||||
|
|
|
@ -178,16 +178,16 @@ async function fetchScene(url, site) {
|
||||||
async function fetchProfile(actorName) {
|
async function fetchProfile(actorName) {
|
||||||
const actorSearchSlug = slugify(actorName, { delimiter: '+' });
|
const actorSearchSlug = slugify(actorName, { delimiter: '+' });
|
||||||
const url = `https://www.private.com/search.php?query=${actorSearchSlug}`;
|
const url = `https://www.private.com/search.php?query=${actorSearchSlug}`;
|
||||||
const modelLinks = await geta(url, '.model h3 a');
|
const modelRes = await geta(url, '.model h3 a');
|
||||||
|
|
||||||
if (modelLinks) {
|
if (modelRes.ok) {
|
||||||
const actorSlug = slugify(actorName);
|
const actorSlug = slugify(actorName);
|
||||||
const model = modelLinks.find(({ text }) => slugify(text) === actorSlug);
|
const model = modelRes.items.find(({ text }) => slugify(text) === actorSlug);
|
||||||
|
|
||||||
if (model) {
|
if (model) {
|
||||||
const qProfile = await get(model.el.href);
|
const res = await get(model.el.href);
|
||||||
|
|
||||||
return qProfile && scrapeProfile(qProfile);
|
return res.ok ? scrapeProfile(res.item) : res.status;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -65,49 +65,49 @@ function scrapeAll(html, site) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeScene(html, url, site) {
|
async function scrapeScene(html, url, site) {
|
||||||
const { q, qa, qtext, qi, qd, ql, qu, qis, qp } = ex(html, '#videos-page, #content');
|
const { qu } = ex(html, '#videos-page, #content');
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
[release.entryId] = new URL(url).pathname.split('/').slice(-2);
|
[release.entryId] = new URL(url).pathname.split('/').slice(-2);
|
||||||
|
|
||||||
release.title = q('h2.text-uppercase, h2.title, #breadcrumb-top + h1', true)
|
release.title = qu.q('h2.text-uppercase, h2.title, #breadcrumb-top + h1', true)
|
||||||
|| q('h1.m-title', true)?.split(/»|\//).slice(-1)[0].trim();
|
|| qu.q('h1.m-title', true)?.split(/»|\//).slice(-1)[0].trim();
|
||||||
release.description = qtext('.p-desc, .desc');
|
release.description = qu.text('.p-desc, .desc');
|
||||||
|
|
||||||
release.actors = qa('.value a[href*=models], .value a[href*=performer], .value a[href*=teen-babes]', true);
|
release.actors = qu.all('.value a[href*=models], .value a[href*=performer], .value a[href*=teen-babes]', true);
|
||||||
|
|
||||||
if (release.actors.length === 0) {
|
if (release.actors.length === 0) {
|
||||||
const actorEl = qa('.stat').find(stat => /Featuring/.test(stat.textContent));
|
const actorEl = qu.all('.stat').find(stat => /Featuring/.test(stat.textContent));
|
||||||
const actorString = qtext(actorEl);
|
const actorString = qu.text(actorEl);
|
||||||
|
|
||||||
release.actors = actorString?.split(/,\band\b|,/g).map(actor => actor.trim()) || [];
|
release.actors = actorString?.split(/,\band\b|,/g).map(actor => actor.trim()) || [];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (release.actors.length === 0 && site.parameters?.actors) release.actors = site.parameters.actors;
|
if (release.actors.length === 0 && site.parameters?.actors) release.actors = site.parameters.actors;
|
||||||
|
|
||||||
release.tags = qa('a[href*=tag]', true);
|
release.tags = qu.all('a[href*=tag]', true);
|
||||||
|
|
||||||
const dateEl = qa('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent));
|
const dateEl = qu.all('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent));
|
||||||
release.date = qd(dateEl, null, 'MMMM Do, YYYY')
|
release.date = qu.date(dateEl, null, 'MMMM Do, YYYY')
|
||||||
|| qd('.date', 'MMMM Do, YYYY', /\w+ \d{1,2}\w+, \d{4}/)
|
|| qu.date('.date', 'MMMM Do, YYYY', /\w+ \d{1,2}\w+, \d{4}/)
|
||||||
|| qd('.info .holder', 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
|
|| qu.date('.info .holder', 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
|
||||||
|
|
||||||
const durationEl = qa('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
|
const durationEl = qu.all('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
|
||||||
release.duration = ql(durationEl);
|
release.duration = qu.dur(durationEl);
|
||||||
|
|
||||||
release.poster = qp('video') || qi('.flowplayer img') || qi('img'); // _800.jpg is larger than _xl.jpg in landscape
|
release.poster = qu.poster('video') || qu.img('.flowplayer img') || qu.img('img'); // _800.jpg is larger than _xl.jpg in landscape
|
||||||
const photosUrl = qu('.stat a[href*=photos]');
|
const photosUrl = qu.url('.stat a[href*=photos]');
|
||||||
|
|
||||||
if (photosUrl) {
|
if (photosUrl) {
|
||||||
release.photos = await fetchPhotos(photosUrl);
|
release.photos = await fetchPhotos(photosUrl);
|
||||||
} else {
|
} else {
|
||||||
release.photos = qis('img[src*=ThumbNails], .p-photos .tn img').map(photo => [
|
release.photos = qu.imgs('img[src*=ThumbNails], .p-photos .tn img').map(photo => [
|
||||||
photo.replace('_tn', ''),
|
photo.replace('_tn', ''),
|
||||||
photo,
|
photo,
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
const trailers = qa('a[href*=Trailers]');
|
const trailers = qu.all('a[href*=Trailers]');
|
||||||
|
|
||||||
if (trailers) {
|
if (trailers) {
|
||||||
release.trailer = trailers.map((trailer) => {
|
release.trailer = trailers.map((trailer) => {
|
||||||
|
@ -119,7 +119,7 @@ async function scrapeScene(html, url, site) {
|
||||||
}).filter(Boolean);
|
}).filter(Boolean);
|
||||||
}
|
}
|
||||||
|
|
||||||
const stars = q('.rate-box').dataset.score;
|
const stars = qu.q('.rate-box').dataset.score;
|
||||||
if (stars) release.rating = { stars };
|
if (stars) release.rating = { stars };
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
|
@ -133,11 +133,11 @@ function scrapeModels(html, actorName) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchActorReleases(url, accReleases = []) {
|
async function fetchActorReleases(url, accReleases = []) {
|
||||||
const { document, qu } = await get(url);
|
const res = await get(url);
|
||||||
|
|
||||||
if (document) {
|
if (res.ok) {
|
||||||
const releases = accReleases.concat(scrapeAll(document.body.outerHTML));
|
const releases = accReleases.concat(scrapeAll(res.item.document.body.outerHTML));
|
||||||
const nextPage = qu('.next-pg');
|
const nextPage = res.item.qu.url('.next-pg');
|
||||||
|
|
||||||
if (nextPage && new URL(nextPage).searchParams.has('page')) { // last page has 'next' button linking to join page
|
if (nextPage && new URL(nextPage).searchParams.has('page')) { // last page has 'next' button linking to join page
|
||||||
return fetchActorReleases(nextPage, releases);
|
return fetchActorReleases(nextPage, releases);
|
||||||
|
|
|
@ -3,29 +3,29 @@
|
||||||
const { get, geta } = require('../utils/q');
|
const { get, geta } = require('../utils/q');
|
||||||
|
|
||||||
function scrapeLatest(scenes, site) {
|
function scrapeLatest(scenes, site) {
|
||||||
return scenes.map(({ q, qa, qu, qd }) => {
|
return scenes.map(({ qu }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.title = q('.title a', true);
|
release.title = qu.q('.title a', true);
|
||||||
|
|
||||||
const pathname = qu('.title a');
|
const pathname = qu.url('.title a');
|
||||||
release.entryId = pathname.split('/')[3];
|
release.entryId = pathname.split('/')[3];
|
||||||
release.url = `${site.url}${pathname}`;
|
release.url = `${site.url}${pathname}`;
|
||||||
|
|
||||||
release.date = qd('.date', 'MMM DD, YYYY');
|
release.date = qu.date('.date', 'MMM DD, YYYY');
|
||||||
release.actors = qa('.models a.model', true);
|
release.actors = qu.all('.models a.model', true);
|
||||||
|
|
||||||
release.poster = q('img').dataset.original;
|
release.poster = qu.q('img').dataset.original;
|
||||||
|
|
||||||
release.stars = Number(q('.rating', true));
|
release.stars = Number(qu.q('.rating', true));
|
||||||
release.likes = Number(q('.likes', true));
|
release.likes = Number(qu.q('.likes', true));
|
||||||
|
|
||||||
console.log(release);
|
console.log(release);
|
||||||
return release;
|
return release;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene({ q }, _site) {
|
function scrapeScene({ qu }, _site) {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
console.log(release);
|
console.log(release);
|
||||||
|
|
|
@ -96,9 +96,9 @@ async function fetchSceneNative(url, site, release) {
|
||||||
return fetchScene(url, site, release);
|
return fetchScene(url, site, release);
|
||||||
}
|
}
|
||||||
|
|
||||||
const qScene = await get(url);
|
const res = await get(url);
|
||||||
|
|
||||||
return qScene && scrapeSceneNative(qScene, url, site);
|
return res.ok ? scrapeSceneNative(res.item, url, site) : res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchSceneWrapper(url, site, release) {
|
async function fetchSceneWrapper(url, site, release) {
|
||||||
|
|
303
src/utils/q.js
303
src/utils/q.js
|
@ -1,304 +1,5 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const { JSDOM } = require('jsdom');
|
const qu = require('./qu');
|
||||||
const moment = require('moment');
|
|
||||||
const http = require('./http');
|
|
||||||
|
|
||||||
function trim(str) {
|
module.exports = qu;
|
||||||
if (!str) return null;
|
|
||||||
return str.trim().replace(/\s+/g, ' ');
|
|
||||||
}
|
|
||||||
|
|
||||||
function extractDate(dateString, format, match) {
|
|
||||||
if (match) {
|
|
||||||
const dateStamp = trim(dateString).match(match);
|
|
||||||
|
|
||||||
if (dateStamp) {
|
|
||||||
const date = moment.utc(dateStamp[0], format);
|
|
||||||
|
|
||||||
return date.isValid() ? date.toDate() : null;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
const date = moment.utc(trim(dateString), format);
|
|
||||||
|
|
||||||
return date.isValid() ? date.toDate() : null;
|
|
||||||
}
|
|
||||||
|
|
||||||
function formatDate(date, format, inputFormat) {
|
|
||||||
if (inputFormat) return moment(date, inputFormat).format(format);
|
|
||||||
|
|
||||||
return moment(date).format(format);
|
|
||||||
}
|
|
||||||
|
|
||||||
function prefixProtocol(url, protocol = 'https') {
|
|
||||||
if (protocol && /^\/\//.test(url)) {
|
|
||||||
return `${protocol}:${url}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
return url;
|
|
||||||
}
|
|
||||||
|
|
||||||
function q(context, selector, attrArg, applyTrim = true) {
|
|
||||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
|
||||||
|
|
||||||
if (attr) {
|
|
||||||
const value = selector
|
|
||||||
? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value
|
|
||||||
: context[attr] || context[attr]?.attributes[attr]?.value;
|
|
||||||
|
|
||||||
return applyTrim && value ? trim(value) : value;
|
|
||||||
}
|
|
||||||
|
|
||||||
return selector ? context.querySelector(selector) : context;
|
|
||||||
}
|
|
||||||
|
|
||||||
function qall(context, selector, attrArg, applyTrim = true) {
|
|
||||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
|
||||||
|
|
||||||
if (attr) {
|
|
||||||
return Array.from(context.querySelectorAll(selector), el => (applyTrim && el[attr] ? trim(el[attr]) : el[attr]));
|
|
||||||
}
|
|
||||||
|
|
||||||
return Array.from(context.querySelectorAll(selector));
|
|
||||||
}
|
|
||||||
|
|
||||||
function qhtml(context, selector) {
|
|
||||||
const el = q(context, selector, null, true);
|
|
||||||
|
|
||||||
return el && el.innerHTML;
|
|
||||||
}
|
|
||||||
|
|
||||||
function qtexts(context, selector, applyTrim = true, filter = true) {
|
|
||||||
const el = q(context, selector, null, applyTrim);
|
|
||||||
if (!el) return null;
|
|
||||||
|
|
||||||
const nodes = Array.from(el.childNodes)
|
|
||||||
.filter(node => node.nodeName === '#text')
|
|
||||||
.map(node => (applyTrim ? trim(node.textContent) : node.textContent));
|
|
||||||
|
|
||||||
return filter ? nodes.filter(Boolean) : nodes;
|
|
||||||
}
|
|
||||||
|
|
||||||
function qtext(context, selector, applyTrim = true) {
|
|
||||||
const nodes = qtexts(context, selector, applyTrim, true);
|
|
||||||
if (!nodes) return null;
|
|
||||||
|
|
||||||
const text = nodes.join(' ');
|
|
||||||
|
|
||||||
return applyTrim ? trim(text) : text;
|
|
||||||
}
|
|
||||||
|
|
||||||
function qmeta(context, selector, attrArg = 'content', applyTrim = true) {
|
|
||||||
if (/meta\[.*\]/.test(selector)) {
|
|
||||||
return q(context, selector, attrArg, applyTrim);
|
|
||||||
}
|
|
||||||
|
|
||||||
return q(context, `meta[${selector}]`, attrArg, applyTrim);
|
|
||||||
}
|
|
||||||
|
|
||||||
function qdate(context, selector, format, match, attr = 'textContent') {
|
|
||||||
const dateString = q(context, selector, attr, true);
|
|
||||||
|
|
||||||
if (!dateString) return null;
|
|
||||||
|
|
||||||
return extractDate(dateString, format, match);
|
|
||||||
}
|
|
||||||
|
|
||||||
function qimage(context, selector = 'img', attr = 'src', protocol = 'https') {
|
|
||||||
const image = q(context, selector, attr);
|
|
||||||
|
|
||||||
// no attribute means q output will be HTML element
|
|
||||||
return attr ? prefixProtocol(image, protocol) : image;
|
|
||||||
}
|
|
||||||
|
|
||||||
function qimages(context, selector = 'img', attr = 'src', protocol = 'https') {
|
|
||||||
const images = qall(context, selector, attr);
|
|
||||||
|
|
||||||
return attr ? images.map(image => prefixProtocol(image, protocol)) : images;
|
|
||||||
}
|
|
||||||
|
|
||||||
function qurl(context, selector = 'a', attr = 'href', protocol = 'https') {
|
|
||||||
const url = q(context, selector, attr);
|
|
||||||
|
|
||||||
return attr ? prefixProtocol(url, protocol) : url;
|
|
||||||
}
|
|
||||||
|
|
||||||
function qurls(context, selector = 'a', attr = 'href', protocol = 'https') {
|
|
||||||
const urls = qall(context, selector, attr);
|
|
||||||
|
|
||||||
return attr ? urls.map(url => prefixProtocol(url, protocol)) : urls;
|
|
||||||
}
|
|
||||||
|
|
||||||
function qposter(context, selector = 'video', attr = 'poster', protocol = 'https') {
|
|
||||||
const poster = q(context, selector, attr);
|
|
||||||
|
|
||||||
return attr ? prefixProtocol(poster, protocol) : poster;
|
|
||||||
}
|
|
||||||
|
|
||||||
function qtrailer(context, selector = 'source', attr = 'src', protocol = 'https') {
|
|
||||||
const trailer = q(context, selector, attr);
|
|
||||||
|
|
||||||
return attr ? prefixProtocol(trailer, protocol) : trailer;
|
|
||||||
}
|
|
||||||
|
|
||||||
function qtrailers(context, selector = 'source', attr = 'src', protocol = 'https') {
|
|
||||||
const trailers = qall(context, selector, attr);
|
|
||||||
|
|
||||||
return attr ? trailers.map(trailer => prefixProtocol(trailer, protocol)) : trailers;
|
|
||||||
}
|
|
||||||
|
|
||||||
function qlength(context, selector, match, attr = 'textContent') {
|
|
||||||
const durationString = q(context, selector, attr);
|
|
||||||
|
|
||||||
if (!durationString) return null;
|
|
||||||
const duration = durationString.match(match || /(\d+:)?\d+:\d+/);
|
|
||||||
|
|
||||||
if (duration) {
|
|
||||||
const segments = ['00'].concat(duration[0].split(':')).slice(-3);
|
|
||||||
|
|
||||||
return moment.duration(segments.join(':')).asSeconds();
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
const funcs = {
|
|
||||||
q,
|
|
||||||
qa: qall,
|
|
||||||
qall,
|
|
||||||
qd: qdate,
|
|
||||||
qdate,
|
|
||||||
qh: qhtml,
|
|
||||||
qhtml,
|
|
||||||
qi: qimage,
|
|
||||||
qimage,
|
|
||||||
qimages,
|
|
||||||
qis: qimages,
|
|
||||||
ql: qlength,
|
|
||||||
qlength,
|
|
||||||
qm: qmeta,
|
|
||||||
qmeta,
|
|
||||||
qp: qposter,
|
|
||||||
qposter,
|
|
||||||
qs: qall,
|
|
||||||
qt: qtrailer,
|
|
||||||
qtext,
|
|
||||||
qtexts,
|
|
||||||
qtrailer,
|
|
||||||
qtrailers,
|
|
||||||
qts: qtrailers,
|
|
||||||
qtx: qtext,
|
|
||||||
qtxs: qtexts,
|
|
||||||
qtxt: qtext,
|
|
||||||
qtxts: qtexts,
|
|
||||||
qu: qurl,
|
|
||||||
qurl,
|
|
||||||
qurls,
|
|
||||||
qus: qurls,
|
|
||||||
};
|
|
||||||
|
|
||||||
function init(element, window) {
|
|
||||||
if (!element) return null;
|
|
||||||
|
|
||||||
const contextFuncs = Object.entries(funcs) // dynamically attach methods with context
|
|
||||||
.reduce((acc, [key, func]) => ({
|
|
||||||
...acc,
|
|
||||||
[key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context
|
|
||||||
? func(...args)
|
|
||||||
: func(element, ...args)),
|
|
||||||
}), {});
|
|
||||||
|
|
||||||
return {
|
|
||||||
element,
|
|
||||||
el: element,
|
|
||||||
html: element.outerHTML || element.body.outerHTML,
|
|
||||||
text: trim(element.textContent),
|
|
||||||
...(window && {
|
|
||||||
window,
|
|
||||||
document: window.document,
|
|
||||||
}),
|
|
||||||
...contextFuncs,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
function initAll(context, selector, window) {
|
|
||||||
if (Array.isArray(context)) {
|
|
||||||
return context.map(element => init(element, window));
|
|
||||||
}
|
|
||||||
|
|
||||||
return Array.from(context.querySelectorAll(selector))
|
|
||||||
.map(element => init(element, window));
|
|
||||||
}
|
|
||||||
|
|
||||||
function extract(html, selector) {
|
|
||||||
const { window } = new JSDOM(html);
|
|
||||||
|
|
||||||
if (selector) {
|
|
||||||
return init(window.document.querySelector(selector), window);
|
|
||||||
}
|
|
||||||
|
|
||||||
return init(window.document, window);
|
|
||||||
}
|
|
||||||
|
|
||||||
function extractAll(html, selector) {
|
|
||||||
const { window } = new JSDOM(html);
|
|
||||||
|
|
||||||
return initAll(window.document, selector, window);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function get(url, selector, headers, all = false) {
|
|
||||||
const res = await http.get(url, {
|
|
||||||
headers,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
|
||||||
const item = all
|
|
||||||
? extractAll(res.body.toString(), selector)
|
|
||||||
: extract(res.body.toString(), selector);
|
|
||||||
|
|
||||||
return {
|
|
||||||
item,
|
|
||||||
items: all ? item : [item],
|
|
||||||
res,
|
|
||||||
ok: true,
|
|
||||||
status: res.statusCode,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
item: null,
|
|
||||||
items: [],
|
|
||||||
res,
|
|
||||||
ok: false,
|
|
||||||
status: res.statusCode,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
async function getAll(url, selector, headers) {
|
|
||||||
return get(url, selector, headers, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
module.exports = {
|
|
||||||
extractDate,
|
|
||||||
extract,
|
|
||||||
extractAll,
|
|
||||||
init,
|
|
||||||
initAll,
|
|
||||||
formatDate,
|
|
||||||
get,
|
|
||||||
getAll,
|
|
||||||
context: init,
|
|
||||||
contextAll: initAll,
|
|
||||||
ed: extractDate,
|
|
||||||
ex: extract,
|
|
||||||
exa: extractAll,
|
|
||||||
fd: formatDate,
|
|
||||||
ctx: init,
|
|
||||||
ctxa: initAll,
|
|
||||||
geta: getAll,
|
|
||||||
edate: extractDate,
|
|
||||||
fdate: formatDate,
|
|
||||||
...funcs,
|
|
||||||
};
|
|
||||||
|
|
|
@ -0,0 +1,346 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const { JSDOM } = require('jsdom');
|
||||||
|
const moment = require('moment');
|
||||||
|
const http = require('./http');
|
||||||
|
|
||||||
|
function trim(str) {
|
||||||
|
if (!str) return null;
|
||||||
|
return str.trim().replace(/\s+/g, ' ');
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractDate(dateString, format, match) {
|
||||||
|
if (match) {
|
||||||
|
const dateStamp = trim(dateString).match(match);
|
||||||
|
|
||||||
|
if (dateStamp) {
|
||||||
|
const dateValue = moment.utc(dateStamp[0], format);
|
||||||
|
|
||||||
|
return dateValue.isValid() ? dateValue.toDate() : null;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const dateValue = moment.utc(trim(dateString), format);
|
||||||
|
|
||||||
|
return dateValue.isValid() ? dateValue.toDate() : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatDate(dateValue, format, inputFormat) {
|
||||||
|
if (inputFormat) {
|
||||||
|
return moment(dateValue, inputFormat).format(format);
|
||||||
|
}
|
||||||
|
|
||||||
|
return moment(dateValue).format(format);
|
||||||
|
}
|
||||||
|
|
||||||
|
function prefixProtocol(urlValue, protocol = 'https') {
|
||||||
|
if (protocol && /^\/\//.test(urlValue)) {
|
||||||
|
return `${protocol}:${urlValue}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return urlValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
function q(context, selector, attrArg, applyTrim = true) {
|
||||||
|
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||||
|
|
||||||
|
if (attr) {
|
||||||
|
const value = selector
|
||||||
|
? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value
|
||||||
|
: context[attr] || context[attr]?.attributes[attr]?.value;
|
||||||
|
|
||||||
|
return applyTrim && value ? trim(value) : value;
|
||||||
|
}
|
||||||
|
|
||||||
|
return selector ? context.querySelector(selector) : context;
|
||||||
|
}
|
||||||
|
|
||||||
|
function all(context, selector, attrArg, applyTrim = true) {
|
||||||
|
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||||
|
|
||||||
|
if (attr) {
|
||||||
|
return Array.from(context.querySelectorAll(selector), el => (applyTrim && el[attr] ? trim(el[attr]) : el[attr]));
|
||||||
|
}
|
||||||
|
|
||||||
|
return Array.from(context.querySelectorAll(selector));
|
||||||
|
}
|
||||||
|
|
||||||
|
function exists(context, selector) {
|
||||||
|
return !!q(context, selector);
|
||||||
|
}
|
||||||
|
|
||||||
|
function content(context, selector) {
|
||||||
|
const el = q(context, selector, null, true);
|
||||||
|
|
||||||
|
return el && el.innerHTML;
|
||||||
|
}
|
||||||
|
|
||||||
|
function texts(context, selector, applyTrim = true, filter = true) {
|
||||||
|
const el = q(context, selector, null, applyTrim);
|
||||||
|
if (!el) return null;
|
||||||
|
|
||||||
|
const nodes = Array.from(el.childNodes)
|
||||||
|
.filter(node => node.nodeName === '#text')
|
||||||
|
.map(node => (applyTrim ? trim(node.textContent) : node.textContent));
|
||||||
|
|
||||||
|
return filter ? nodes.filter(Boolean) : nodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
function text(context, selector, applyTrim = true) {
|
||||||
|
const nodes = texts(context, selector, applyTrim, true);
|
||||||
|
if (!nodes) return null;
|
||||||
|
|
||||||
|
const textValue = nodes.join(' ');
|
||||||
|
|
||||||
|
return applyTrim ? trim(textValue) : textValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
function meta(context, selector, attrArg = 'content', applyTrim = true) {
|
||||||
|
if (/meta\[.*\]/.test(selector)) {
|
||||||
|
return q(context, selector, attrArg, applyTrim);
|
||||||
|
}
|
||||||
|
|
||||||
|
return q(context, `meta[${selector}]`, attrArg, applyTrim);
|
||||||
|
}
|
||||||
|
|
||||||
|
function date(context, selector, format, match, attr = 'textContent') {
|
||||||
|
const dateString = q(context, selector, attr, true);
|
||||||
|
|
||||||
|
if (!dateString) return null;
|
||||||
|
|
||||||
|
return extractDate(dateString, format, match);
|
||||||
|
}
|
||||||
|
|
||||||
|
function image(context, selector = 'img', attr = 'src', protocol = 'https') {
|
||||||
|
const imageEl = q(context, selector, attr);
|
||||||
|
|
||||||
|
// no attribute means q output will be HTML element
|
||||||
|
return attr ? prefixProtocol(imageEl, protocol) : imageEl;
|
||||||
|
}
|
||||||
|
|
||||||
|
function images(context, selector = 'img', attr = 'src', protocol = 'https') {
|
||||||
|
const imageEls = all(context, selector, attr);
|
||||||
|
|
||||||
|
return attr ? imageEls.map(imageEl => prefixProtocol(imageEl, protocol)) : imageEls;
|
||||||
|
}
|
||||||
|
|
||||||
|
function url(context, selector = 'a', attr = 'href', protocol = 'https') {
|
||||||
|
const urlEl = q(context, selector, attr);
|
||||||
|
|
||||||
|
return attr ? prefixProtocol(urlEl, protocol) : urlEl;
|
||||||
|
}
|
||||||
|
|
||||||
|
function urls(context, selector = 'a', attr = 'href', protocol = 'https') {
|
||||||
|
const urlEls = all(context, selector, attr);
|
||||||
|
|
||||||
|
return attr ? urlEls.map(urlEl => prefixProtocol(urlEl, protocol)) : urlEls;
|
||||||
|
}
|
||||||
|
|
||||||
|
function poster(context, selector = 'video', attr = 'poster', protocol = 'https') {
|
||||||
|
const posterEl = q(context, selector, attr);
|
||||||
|
|
||||||
|
return attr ? prefixProtocol(posterEl, protocol) : posterEl;
|
||||||
|
}
|
||||||
|
|
||||||
|
function video(context, selector = 'source', attr = 'src', protocol = 'https') {
|
||||||
|
const trailerEl = q(context, selector, attr);
|
||||||
|
|
||||||
|
return attr ? prefixProtocol(trailerEl, protocol) : trailerEl;
|
||||||
|
}
|
||||||
|
|
||||||
|
function videos(context, selector = 'source', attr = 'src', protocol = 'https') {
|
||||||
|
const trailerEls = all(context, selector, attr);
|
||||||
|
|
||||||
|
return attr ? trailerEls.map(trailerEl => prefixProtocol(trailerEl, protocol)) : trailerEls;
|
||||||
|
}
|
||||||
|
|
||||||
|
function duration(context, selector, match, attr = 'textContent') {
|
||||||
|
const durationString = q(context, selector, attr);
|
||||||
|
|
||||||
|
if (!durationString) return null;
|
||||||
|
const durationMatch = durationString.match(match || /(\d+:)?\d+:\d+/);
|
||||||
|
|
||||||
|
if (durationMatch) {
|
||||||
|
const segments = ['00'].concat(durationMatch[0].split(':')).slice(-3);
|
||||||
|
|
||||||
|
return moment.duration(segments.join(':')).asSeconds();
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const legacyFuncs = {
|
||||||
|
q,
|
||||||
|
qa: all,
|
||||||
|
qall: all,
|
||||||
|
qd: date,
|
||||||
|
qdate: date,
|
||||||
|
qh: content,
|
||||||
|
qhtml: content,
|
||||||
|
qi: image,
|
||||||
|
qimage: image,
|
||||||
|
qimages: images,
|
||||||
|
qis: images,
|
||||||
|
ql: duration,
|
||||||
|
qlength: duration,
|
||||||
|
qm: meta,
|
||||||
|
qmeta: meta,
|
||||||
|
qp: poster,
|
||||||
|
qposter: poster,
|
||||||
|
qs: all,
|
||||||
|
qt: video,
|
||||||
|
qtext: text,
|
||||||
|
qtexts: texts,
|
||||||
|
qtrailer: video,
|
||||||
|
qtrailers: videos,
|
||||||
|
qts: videos,
|
||||||
|
qtx: text,
|
||||||
|
qtxs: texts,
|
||||||
|
qtxt: text,
|
||||||
|
qtxts: texts,
|
||||||
|
// qu: url,
|
||||||
|
qurl: url,
|
||||||
|
qurls: urls,
|
||||||
|
qus: urls,
|
||||||
|
};
|
||||||
|
|
||||||
|
const quFuncs = {
|
||||||
|
all,
|
||||||
|
body: content,
|
||||||
|
content,
|
||||||
|
date,
|
||||||
|
dur: duration,
|
||||||
|
duration,
|
||||||
|
exists,
|
||||||
|
image,
|
||||||
|
images,
|
||||||
|
img: image,
|
||||||
|
imgs: images,
|
||||||
|
inner: content,
|
||||||
|
length: duration,
|
||||||
|
meta,
|
||||||
|
poster,
|
||||||
|
q,
|
||||||
|
text,
|
||||||
|
texts,
|
||||||
|
trailer: video,
|
||||||
|
url,
|
||||||
|
urls,
|
||||||
|
video,
|
||||||
|
videos,
|
||||||
|
};
|
||||||
|
|
||||||
|
function init(element, window) {
|
||||||
|
if (!element) return null;
|
||||||
|
|
||||||
|
const legacyContextFuncs = Object.entries(legacyFuncs) // dynamically attach methods with context
|
||||||
|
.reduce((acc, [key, func]) => ({
|
||||||
|
...acc,
|
||||||
|
[key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context
|
||||||
|
? func(...args)
|
||||||
|
: func(element, ...args)),
|
||||||
|
}), {});
|
||||||
|
|
||||||
|
const quContextFuncs = Object.entries(quFuncs) // dynamically attach methods with context
|
||||||
|
.reduce((acc, [key, func]) => ({
|
||||||
|
...acc,
|
||||||
|
[key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context
|
||||||
|
? func(...args)
|
||||||
|
: func(element, ...args)),
|
||||||
|
}), {});
|
||||||
|
|
||||||
|
return {
|
||||||
|
element,
|
||||||
|
el: element,
|
||||||
|
html: element.outerHTML || element.body.outerHTML,
|
||||||
|
text: trim(element.textContent),
|
||||||
|
...(window && {
|
||||||
|
window,
|
||||||
|
document: window.document,
|
||||||
|
}),
|
||||||
|
...legacyContextFuncs,
|
||||||
|
qu: quContextFuncs,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function initAll(context, selector, window) {
|
||||||
|
if (Array.isArray(context)) {
|
||||||
|
return context.map(element => init(element, window));
|
||||||
|
}
|
||||||
|
|
||||||
|
return Array.from(context.querySelectorAll(selector))
|
||||||
|
.map(element => init(element, window));
|
||||||
|
}
|
||||||
|
|
||||||
|
function extract(htmlValue, selector) {
|
||||||
|
const { window } = new JSDOM(htmlValue);
|
||||||
|
|
||||||
|
if (selector) {
|
||||||
|
return init(window.document.querySelector(selector), window);
|
||||||
|
}
|
||||||
|
|
||||||
|
return init(window.document, window);
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractAll(htmlValue, selector) {
|
||||||
|
const { window } = new JSDOM(htmlValue);
|
||||||
|
|
||||||
|
return initAll(window.document, selector, window);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function get(urlValue, selector, headers, queryAll = false) {
|
||||||
|
const res = await http.get(urlValue, {
|
||||||
|
headers,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res.statusCode === 200) {
|
||||||
|
const item = queryAll
|
||||||
|
? extractAll(res.body.toString(), selector)
|
||||||
|
: extract(res.body.toString(), selector);
|
||||||
|
|
||||||
|
return {
|
||||||
|
item,
|
||||||
|
items: all ? item : [item],
|
||||||
|
res,
|
||||||
|
ok: true,
|
||||||
|
status: res.statusCode,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
item: null,
|
||||||
|
items: [],
|
||||||
|
res,
|
||||||
|
ok: false,
|
||||||
|
status: res.statusCode,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getAll(urlValue, selector, headers) {
|
||||||
|
return get(urlValue, selector, headers, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
extractDate,
|
||||||
|
extract,
|
||||||
|
extractAll,
|
||||||
|
init,
|
||||||
|
initAll,
|
||||||
|
formatDate,
|
||||||
|
get,
|
||||||
|
getAll,
|
||||||
|
context: init,
|
||||||
|
contextAll: initAll,
|
||||||
|
ed: extractDate,
|
||||||
|
ex: extract,
|
||||||
|
exa: extractAll,
|
||||||
|
fd: formatDate,
|
||||||
|
ctx: init,
|
||||||
|
ctxa: initAll,
|
||||||
|
geta: getAll,
|
||||||
|
edate: extractDate,
|
||||||
|
fdate: formatDate,
|
||||||
|
qu: quFuncs,
|
||||||
|
...legacyFuncs,
|
||||||
|
};
|
Loading…
Reference in New Issue