Switched to tabs. Adding missing actor entries when scraping actors, with batch ID.

This commit is contained in:
2020-05-14 04:26:05 +02:00
parent f1eb29c713
commit 11eb66f834
178 changed files with 16594 additions and 16929 deletions

View File

@@ -4,49 +4,49 @@ const bhttp = require('bhttp');
const cheerio = require('cheerio');
const {
scrapeLatestX,
fetchLatest,
fetchScene,
fetchProfile,
scrapeLatestX,
fetchLatest,
fetchScene,
fetchProfile,
} = require('./mindgeek');
function scrapeLatestClassic(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const $ = cheerio.load(html, { normalizeWhitespace: true });
const stateTag = $('script:contains("initialState")').html();
const prefix = 'initialState = {';
const prefixIndex = stateTag.indexOf('initialState = {');
const suffix = '};';
const stateString = stateTag.slice(prefixIndex + prefix.length - 1, stateTag.indexOf('};', prefixIndex) + suffix.length - 1);
const data = JSON.parse(stateString);
const stateTag = $('script:contains("initialState")').html();
const prefix = 'initialState = {';
const prefixIndex = stateTag.indexOf('initialState = {');
const suffix = '};';
const stateString = stateTag.slice(prefixIndex + prefix.length - 1, stateTag.indexOf('};', prefixIndex) + suffix.length - 1);
const data = JSON.parse(stateString);
return Object.values(data.entities.releases).map(scene => scrapeLatestX(scene, site));
return Object.values(data.entities.releases).map(scene => scrapeLatestX(scene, site));
}
async function fetchClassic(site, page) {
const res = await bhttp.get(`${site.url}/scenes?page=${page}`);
const res = await bhttp.get(`${site.url}/scenes?page=${page}`);
if (res.statusCode === 200) {
return scrapeLatestClassic(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeLatestClassic(res.body.toString(), site);
}
return null;
return null;
}
async function fetchLatestWrap(site, page = 1) {
if (site.parameters?.classic) {
return fetchClassic(site, page);
}
if (site.parameters?.classic) {
return fetchClassic(site, page);
}
return fetchLatest(site, page);
return fetchLatest(site, page);
}
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'realitykings');
return fetchProfile(actorName, 'realitykings');
}
module.exports = {
fetchLatest: fetchLatestWrap,
fetchProfile: networkFetchProfile,
fetchScene,
fetchLatest: fetchLatestWrap,
fetchProfile: networkFetchProfile,
fetchScene,
};