Extracting shoot IDs from title in PornBox scraper.

This commit is contained in:
DebaucheryLibrarian
2026-02-01 01:31:45 +01:00
parent f76341f0dd
commit 762e605bd1
5 changed files with 51 additions and 75 deletions

View File

@@ -2,7 +2,6 @@
const unprint = require('unprint');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
function extractTitle(originalTitle) {
@@ -43,6 +42,25 @@ function scrapeAll(scenes, channel) {
});
}
async function fetchLatest(channel, page) {
// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
// studios as channels
const url = `${channel.url}/latest/${page}`;
const res = await unprint.get(url, {
selectAll: '.card-scene',
headers: {
Referer: url,
},
});
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
function scrapeScene({ query }, url) {
const release = {};
@@ -76,71 +94,6 @@ function scrapeScene({ query }, url) {
return release;
}
function scrapeProfile({ query }, url, channel) {
const profile = { url };
profile.nationality = query.content('.model__info a[href*="/nationality"]');
profile.age = query.number('//td[contains(text(), "Age")]/following-sibling::td');
profile.avatar = query.img('.model__left img');
profile.scenes = scrapeAll(unprint.initAll(query.all('.card-scene')), channel);
return profile;
}
async function fetchLatest(channel, page) {
// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
// studios as channels
const url = `${channel.url}/latest/${page}`;
const res = await unprint.get(url, {
selectAll: '.card-scene',
headers: {
Referer: url,
},
});
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
/*
async function fetchLatest(channel, page) {
// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
// const res = await unprint.get(`${channel.url}/latest/${page}`, { selectAll: '.card-scene' }); // studios as channels
const url = `${channel.url}/latest/${page}`; // studios as channels
const { tab } = await http.getBrowserSession('analvids', {
bypass: {
headless: false,
},
});
const res = await tab.goto(url);
const status = res.status();
console.log('STATUS', status);
if (status === 200) {
const html = await tab.content();
const context = unprint.initAll(html, '.card-scene'); // studios as channels
const scenes = scrapeAll(context, channel);
tab.close();
return scenes;
}
return res.status;
}
*/
async function fetchScene(url) {
const res = await unprint.get(url, {
headers: {
@@ -155,6 +108,19 @@ async function fetchScene(url) {
return res.status;
}
function scrapeProfile({ query }, url, channel) {
const profile = { url };
profile.nationality = query.content('.model__info a[href*="/nationality"]');
profile.age = query.number('//td[contains(text(), "Age")]/following-sibling::td');
profile.avatar = query.img('.model__left img');
profile.scenes = scrapeAll(unprint.initAll(query.all('.card-scene')), channel);
return profile;
}
async function getActorUrl(actor, channel) {
if (actor.url) {
return actor.url;
@@ -162,7 +128,7 @@ async function getActorUrl(actor, channel) {
const searchUrl = `${channel.url}/api/autocomplete/search?q=${slugify(actor.name, '+')}`;
const searchRes = await http.get(searchUrl, {
const searchRes = await unprint.get(searchUrl, {
headers: {
Referer: actor.url,
},

View File

@@ -25,14 +25,24 @@ async function getTrailer(data) {
return null;
}
function extractShootId(title) {
if (!title) {
return null;
}
return title.trim().match(/[A-Z]{2,3}\d{3,4}\w?/)?.[0].toUpperCase();
}
async function scrapeScene(data, channel, include) {
const release = {};
const entityUrl = new URL(channel.url).origin;
release.entryId = data.id;
release.title = data.scene_name || data.custom_name;
release.entryId = data.id;
release.url = `${entityUrl}/watch/${data.id}/${slugify(release.title, '_')}`;
release.url = `${entityUrl}/watch/${data.id}/${slugify(release.title, '_') || ''}`;
release.shootId = extractShootId(release.title);
release.date = new Date(data.release_date || data.publish_date);
release.duration = unprint.extractDuration(data.runtime);

View File

@@ -56,7 +56,7 @@ function slugify(strings, delimiter = '-', {
symbolRegex = defaultSymbolRegex,
} = {}) {
if (!strings || (typeof strings !== 'string' && !Array.isArray(strings))) {
return strings;
return '';
}
const string = [].concat(strings).join(' ');