Extracting shoot IDs from title in PornBox scraper.
This commit is contained in:
@@ -2,7 +2,6 @@
|
||||
|
||||
const unprint = require('unprint');
|
||||
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function extractTitle(originalTitle) {
|
||||
@@ -43,6 +42,25 @@ function scrapeAll(scenes, channel) {
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page) {
|
||||
// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
|
||||
// studios as channels
|
||||
const url = `${channel.url}/latest/${page}`;
|
||||
|
||||
const res = await unprint.get(url, {
|
||||
selectAll: '.card-scene',
|
||||
headers: {
|
||||
Referer: url,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, url) {
|
||||
const release = {};
|
||||
|
||||
@@ -76,71 +94,6 @@ function scrapeScene({ query }, url) {
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, url, channel) {
|
||||
const profile = { url };
|
||||
|
||||
profile.nationality = query.content('.model__info a[href*="/nationality"]');
|
||||
profile.age = query.number('//td[contains(text(), "Age")]/following-sibling::td');
|
||||
|
||||
profile.avatar = query.img('.model__left img');
|
||||
|
||||
profile.scenes = scrapeAll(unprint.initAll(query.all('.card-scene')), channel);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page) {
|
||||
// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
|
||||
// studios as channels
|
||||
const url = `${channel.url}/latest/${page}`;
|
||||
|
||||
const res = await unprint.get(url, {
|
||||
selectAll: '.card-scene',
|
||||
headers: {
|
||||
Referer: url,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
/*
|
||||
async function fetchLatest(channel, page) {
|
||||
// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
|
||||
// const res = await unprint.get(`${channel.url}/latest/${page}`, { selectAll: '.card-scene' }); // studios as channels
|
||||
const url = `${channel.url}/latest/${page}`; // studios as channels
|
||||
|
||||
const { tab } = await http.getBrowserSession('analvids', {
|
||||
bypass: {
|
||||
headless: false,
|
||||
},
|
||||
});
|
||||
|
||||
const res = await tab.goto(url);
|
||||
|
||||
const status = res.status();
|
||||
|
||||
console.log('STATUS', status);
|
||||
|
||||
if (status === 200) {
|
||||
const html = await tab.content();
|
||||
const context = unprint.initAll(html, '.card-scene'); // studios as channels
|
||||
|
||||
const scenes = scrapeAll(context, channel);
|
||||
|
||||
tab.close();
|
||||
|
||||
return scenes;
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
*/
|
||||
|
||||
async function fetchScene(url) {
|
||||
const res = await unprint.get(url, {
|
||||
headers: {
|
||||
@@ -155,6 +108,19 @@ async function fetchScene(url) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, url, channel) {
|
||||
const profile = { url };
|
||||
|
||||
profile.nationality = query.content('.model__info a[href*="/nationality"]');
|
||||
profile.age = query.number('//td[contains(text(), "Age")]/following-sibling::td');
|
||||
|
||||
profile.avatar = query.img('.model__left img');
|
||||
|
||||
profile.scenes = scrapeAll(unprint.initAll(query.all('.card-scene')), channel);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function getActorUrl(actor, channel) {
|
||||
if (actor.url) {
|
||||
return actor.url;
|
||||
@@ -162,7 +128,7 @@ async function getActorUrl(actor, channel) {
|
||||
|
||||
const searchUrl = `${channel.url}/api/autocomplete/search?q=${slugify(actor.name, '+')}`;
|
||||
|
||||
const searchRes = await http.get(searchUrl, {
|
||||
const searchRes = await unprint.get(searchUrl, {
|
||||
headers: {
|
||||
Referer: actor.url,
|
||||
},
|
||||
|
||||
@@ -25,14 +25,24 @@ async function getTrailer(data) {
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractShootId(title) {
|
||||
if (!title) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return title.trim().match(/[A-Z]{2,3}\d{3,4}\w?/)?.[0].toUpperCase();
|
||||
}
|
||||
|
||||
async function scrapeScene(data, channel, include) {
|
||||
const release = {};
|
||||
const entityUrl = new URL(channel.url).origin;
|
||||
|
||||
release.entryId = data.id;
|
||||
|
||||
release.title = data.scene_name || data.custom_name;
|
||||
|
||||
release.entryId = data.id;
|
||||
release.url = `${entityUrl}/watch/${data.id}/${slugify(release.title, '_')}`;
|
||||
release.url = `${entityUrl}/watch/${data.id}/${slugify(release.title, '_') || ''}`;
|
||||
release.shootId = extractShootId(release.title);
|
||||
|
||||
release.date = new Date(data.release_date || data.publish_date);
|
||||
release.duration = unprint.extractDuration(data.runtime);
|
||||
|
||||
@@ -56,7 +56,7 @@ function slugify(strings, delimiter = '-', {
|
||||
symbolRegex = defaultSymbolRegex,
|
||||
} = {}) {
|
||||
if (!strings || (typeof strings !== 'string' && !Array.isArray(strings))) {
|
||||
return strings;
|
||||
return '';
|
||||
}
|
||||
|
||||
const string = [].concat(strings).join(' ');
|
||||
|
||||
Reference in New Issue
Block a user