Added tour layout scraper to Hush, enabling Interracial POVs, POV Pornstars and See Him Fuck.
This commit is contained in:
parent
0feac66e94
commit
fd6e90e74c
|
@ -850,10 +850,22 @@ const aliases = [
|
||||||
name: 'anilingus',
|
name: 'anilingus',
|
||||||
for: 'ass-eating',
|
for: 'ass-eating',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: 'analingus',
|
||||||
|
for: 'ass-eating',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'analingus male',
|
||||||
|
for: 'ass-eating',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: 'asians',
|
name: 'asians',
|
||||||
for: 'asian',
|
for: 'asian',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: 'asian female',
|
||||||
|
for: 'asian',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: 'anal fingering',
|
name: 'anal fingering',
|
||||||
for: 'ass-fingering',
|
for: 'ass-fingering',
|
||||||
|
|
|
@ -2075,6 +2075,9 @@ const sites = [
|
||||||
url: 'https://seehimfuck.com',
|
url: 'https://seehimfuck.com',
|
||||||
tags: ['male-focus'],
|
tags: ['male-focus'],
|
||||||
network: 'hussiepass',
|
network: 'hussiepass',
|
||||||
|
parameters: {
|
||||||
|
tour: true,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'interracialpovs',
|
slug: 'interracialpovs',
|
||||||
|
@ -2082,13 +2085,20 @@ const sites = [
|
||||||
url: 'https://www.interracialpovs.com',
|
url: 'https://www.interracialpovs.com',
|
||||||
tags: ['interracial', 'pov'],
|
tags: ['interracial', 'pov'],
|
||||||
network: 'hussiepass',
|
network: 'hussiepass',
|
||||||
|
parameters: {
|
||||||
|
tour: true,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'povpornstars',
|
slug: 'povpornstars',
|
||||||
name: 'POV Pornstars',
|
name: 'POV Pornstars',
|
||||||
url: 'https://www.povpornstars.com',
|
url: 'http://www.povpornstars.com',
|
||||||
tags: ['pov'],
|
tags: ['pov'],
|
||||||
network: 'hussiepass',
|
network: 'hussiepass',
|
||||||
|
parameters: {
|
||||||
|
latest: 'http://www.povpornstars.com/tour/categories/movies_%d_d.html',
|
||||||
|
tour: true,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
// HUSH PASS
|
// HUSH PASS
|
||||||
{
|
{
|
||||||
|
|
|
@ -423,6 +423,11 @@ async function updateReleasesSearch(releaseIds) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function storeRelease(release, batchId) {
|
async function storeRelease(release, batchId) {
|
||||||
|
if (!release.entryId) {
|
||||||
|
logger.warn(`Missing entry ID, unable to store ${release.url}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
const existingRelease = await knex('releases')
|
const existingRelease = await knex('releases')
|
||||||
.where({
|
.where({
|
||||||
entry_id: release.entryId,
|
entry_id: release.entryId,
|
||||||
|
@ -473,11 +478,11 @@ async function storeReleases(releases) {
|
||||||
try {
|
try {
|
||||||
const releaseWithChannelSite = await attachChannelSite(release);
|
const releaseWithChannelSite = await attachChannelSite(release);
|
||||||
const releaseWithStudio = await attachStudio(releaseWithChannelSite);
|
const releaseWithStudio = await attachStudio(releaseWithChannelSite);
|
||||||
const { id, slug } = await storeRelease(releaseWithStudio, batchId);
|
const storedRelease = await storeRelease(releaseWithStudio, batchId);
|
||||||
|
|
||||||
return {
|
return storedRelease && {
|
||||||
id,
|
id: storedRelease.id,
|
||||||
slug,
|
slug: storedRelease.slug,
|
||||||
...releaseWithChannelSite,
|
...releaseWithChannelSite,
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
@ -487,7 +492,7 @@ async function storeReleases(releases) {
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
concurrency: 10,
|
concurrency: 10,
|
||||||
}).filter(release => release);
|
}).filter(Boolean);
|
||||||
|
|
||||||
logger.info(`Stored ${storedReleases.length} new releases`);
|
logger.info(`Stored ${storedReleases.length} new releases`);
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,10 @@ const knex = require('../knex');
|
||||||
const { get, geta, fd } = require('../utils/q');
|
const { get, geta, fd } = require('../utils/q');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
|
||||||
|
function deriveEntryId(release) {
|
||||||
|
return `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
|
||||||
|
}
|
||||||
|
|
||||||
function extractPoster(posterPath, site, baseRelease) {
|
function extractPoster(posterPath, site, baseRelease) {
|
||||||
if (posterPath && !/400.jpg/.test(posterPath)) {
|
if (posterPath && !/400.jpg/.test(posterPath)) {
|
||||||
const poster = `${site.parameters?.media || site.url}${posterPath}`;
|
const poster = `${site.parameters?.media || site.url}${posterPath}`;
|
||||||
|
@ -60,9 +64,6 @@ function scrapeLatestT1(scenes, site) {
|
||||||
release.date = qd('.more-info-div', 'MMM D, YYYY');
|
release.date = qd('.more-info-div', 'MMM D, YYYY');
|
||||||
release.duration = ql('.more-info-div');
|
release.duration = ql('.more-info-div');
|
||||||
|
|
||||||
// release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1];
|
|
||||||
release.entryId = `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
|
|
||||||
|
|
||||||
const posterPath = q('.img-div img', 'src0_1x') || qi('img.video_placeholder');
|
const posterPath = q('.img-div img', 'src0_1x') || qi('img.video_placeholder');
|
||||||
|
|
||||||
if (posterPath) {
|
if (posterPath) {
|
||||||
|
@ -75,6 +76,27 @@ function scrapeLatestT1(scenes, site) {
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1];
|
||||||
|
release.entryId = deriveEntryId(release);
|
||||||
|
|
||||||
|
return release;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeLatestTour(scenes) {
|
||||||
|
return scenes.map(({ q, qa, qu, qd, qi }) => {
|
||||||
|
const release = {};
|
||||||
|
|
||||||
|
release.title = q('h4 a', true);
|
||||||
|
release.url = qu('a');
|
||||||
|
release.date = qd('.tour_update_models + span', 'YYYY-MM-DD');
|
||||||
|
|
||||||
|
release.actors = qa('.tour_update_models a', true);
|
||||||
|
|
||||||
|
release.poster = qi('a img');
|
||||||
|
|
||||||
|
release.entryId = deriveEntryId(release);
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -112,9 +134,6 @@ function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease, cha
|
||||||
release.date = qd('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
release.date = qd('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||||
release.duration = ql('.update-info-row:nth-child(2)');
|
release.duration = ql('.update-info-row:nth-child(2)');
|
||||||
|
|
||||||
// release.entryId = q('.player-thumb img', 'id')?.match(/set-target-(\d+)/)[1];
|
|
||||||
release.entryId = `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
|
|
||||||
|
|
||||||
release.actors = qa('.models-list-thumbs a').map(el => ({
|
release.actors = qa('.models-list-thumbs a').map(el => ({
|
||||||
name: q(el, 'span', true),
|
name: q(el, 'span', true),
|
||||||
avatar: [
|
avatar: [
|
||||||
|
@ -148,6 +167,35 @@ function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease, cha
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// release.entryId = q('.player-thumb img', 'id')?.match(/set-target-(\d+)/)[1];
|
||||||
|
release.entryId = deriveEntryId(release);
|
||||||
|
|
||||||
|
return release;
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeSceneTour({ html, q, qd, qa, qis }, site, url) {
|
||||||
|
const release = { url };
|
||||||
|
|
||||||
|
release.title = q('.update_title, .video-title', true);
|
||||||
|
release.description = q('.latest_update_description, .video-summary', true);
|
||||||
|
|
||||||
|
const date = qd('.availdate', 'YYYY-MM-DD');
|
||||||
|
if (date) release.date = date;
|
||||||
|
|
||||||
|
release.actors = qa('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true);
|
||||||
|
release.tags = qa('.update_tags a, .tour_update_tags a', true);
|
||||||
|
|
||||||
|
const [photo, poster, ...photos] = qis('.update_image img:not(.play_icon_overlay)');
|
||||||
|
if (poster || photo) release.poster = poster || photo;
|
||||||
|
if ((photo && poster) || photos) release.photos = poster ? [photo, ...photos] : photos; // don't use first photo when already used as fallback poster
|
||||||
|
|
||||||
|
if (release.date) release.entryId = deriveEntryId(release);
|
||||||
|
|
||||||
|
const trailerCode = q('.update_image a', 'onclick');
|
||||||
|
const trailerPath = trailerCode?.match(/tload\('(.*)'\)/)?.[1] || html.match(/\/trailer\/.*\.mp4/)?.[0];
|
||||||
|
if (trailerPath && /^http/.test(trailerPath)) release.trailer = { src: trailerPath };
|
||||||
|
else if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -164,11 +212,13 @@ async function fetchLatest(site, page = 1) {
|
||||||
|| (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`)
|
|| (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`)
|
||||||
|| `${site.url}/categories/movies_${page}_d.html`;
|
|| `${site.url}/categories/movies_${page}_d.html`;
|
||||||
|
|
||||||
const qLatest = await geta(url, '.modelfeature, .item-video');
|
const qLatest = await geta(url, '.modelfeature, .item-video, .updateItem');
|
||||||
|
|
||||||
if (!qLatest) return null;
|
if (!qLatest) return null;
|
||||||
|
if (site.parameters?.t1) return scrapeLatestT1(qLatest, site);
|
||||||
|
if (site.parameters?.tour) return scrapeLatestTour(qLatest, site);
|
||||||
|
|
||||||
return site.parameters?.t1 ? scrapeLatestT1(qLatest, site) : scrapeLatest(qLatest, site);
|
return scrapeLatest(qLatest, site);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site, baseRelease, beforeFetchLatest) {
|
async function fetchScene(url, site, baseRelease, beforeFetchLatest) {
|
||||||
|
@ -177,7 +227,10 @@ async function fetchScene(url, site, baseRelease, beforeFetchLatest) {
|
||||||
|
|
||||||
if (!qScene) return null;
|
if (!qScene) return null;
|
||||||
|
|
||||||
return site.parameters?.t1 ? scrapeSceneT1(qScene, site, url, baseRelease, channelRegExp) : scrapeScene(qScene, site, url, baseRelease);
|
if (site.parameters?.t1) return scrapeSceneT1(qScene, site, url, baseRelease, channelRegExp);
|
||||||
|
if (site.parameters?.tour) return scrapeSceneTour(qScene, site, url, baseRelease);
|
||||||
|
|
||||||
|
return scrapeScene(qScene, site, url, baseRelease);
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|
Loading…
Reference in New Issue