Filtering out empty or unidentified scenes from update scraper, with warning. Improved Jesse Loads Monster Facials reliability.

This commit is contained in:
DebaucheryLibrarian 2020-10-29 15:20:59 +01:00
parent f4b1fb4831
commit b188bc5744
6 changed files with 30 additions and 10 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 20 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 5.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -3415,21 +3415,21 @@ const sites = [
{ {
slug: 'paintoy', slug: 'paintoy',
name: 'Paintoy', name: 'Paintoy',
url: 'https://www.paintoy.com', url: 'http://www.paintoy.com',
tags: ['bdsm'], tags: ['bdsm'],
parent: 'insex', parent: 'insex',
}, },
{ {
slug: 'aganmedon', slug: 'aganmedon',
name: 'Agan Medon', name: 'Agan Medon',
url: 'https://www.aganmedon.com', url: 'http://www.aganmedon.com',
tags: ['bdsm', 'animated'], tags: ['bdsm', 'animated'],
parent: 'insex', parent: 'insex',
}, },
{ {
slug: 'sensualpain', slug: 'sensualpain',
name: 'Sensual Pain', name: 'Sensual Pain',
url: 'https://www.sensualpain.com', url: 'http://www.sensualpain.com',
tags: ['bdsm'], tags: ['bdsm'],
parent: 'insex', parent: 'insex',
}, },

View File

@ -1,23 +1,35 @@
'use strict'; 'use strict';
const { get, initAll } = require('../utils/qu'); const { get, initAll, formatDate } = require('../utils/qu');
function scrapeLatest(scenes, dates, site) { function scrapeLatest(scenes, dates, site) {
return scenes.map(({ qu }, index) => { return scenes.map(({ qu }, index) => {
const release = {}; const release = {};
const path = qu.url('a[href*="videos/"]');
const path = qu.url('a'); if (path) {
release.url = `${site.url}/visitors/${path}`; release.url = `${site.url}/visitors/${path}`;
release.entryId = path.match(/videos\/([a-zA-Z0-9]+)(?:_hd)?_trailer/)?.[1]; }
if (dates && dates[index]) { if (dates && dates[index]) {
release.date = dates[index].qu.date(null, 'MM/DD/YYYY'); release.date = dates[index].qu.date(null, 'MM/DD/YYYY');
} }
const entryId = path?.match(/videos\/([a-zA-Z0-9]+)(?:_hd)?_trailer/)?.[1]
|| qu.img('img[src*="graphics/fft"]')?.match(/fft_(\w+).gif/)?.[1];
if (!entryId) {
return null;
}
release.entryId = release.date ? `${formatDate(release.date, 'YYYY-MM-DD')}-${entryId}` : entryId;
release.description = qu.q('tbody tr:nth-child(3) font', true); release.description = qu.q('tbody tr:nth-child(3) font', true);
const infoLine = qu.q('font[color="#663366"]', true); const infoLine = qu.q('font[color="#663366"]', true);
if (infoLine) release.duration = Number(infoLine.match(/(\d+) min/)[1]) * 60;
if (infoLine) {
release.duration = Number(infoLine.match(/(\d+) min/i)?.[1] || infoLine.match(/video: (\d+)/i)?.[1]) * 60 || null;
}
const poster = qu.img('img[src*="photos/"][width="400"]'); const poster = qu.img('img[src*="photos/"][width="400"]');
release.poster = `${site.url}/visitors/${poster}`; release.poster = `${site.url}/visitors/${poster}`;

View File

@ -109,7 +109,12 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
return accReleases; return accReleases;
} }
const pageReleasesWithEntity = pageReleases.map(release => ({ ...release, entity: release.entity || entity })); const validPageReleases = pageReleases.filter(release => release?.entryId); // filter out empty and unidentified releases
const pageReleasesWithEntity = validPageReleases.map(release => ({ ...release, entity: release.entity || entity }));
if (pageReleases.length > validPageReleases.length) {
logger.warn(`Found ${pageReleases.length - validPageReleases.length} empty or unidentified releases on page ${page} for '${entity.name}'`);
}
if (needNextPage(pageReleasesWithEntity, accReleases, isUpcoming)) { if (needNextPage(pageReleasesWithEntity, accReleases, isUpcoming)) {
return scrapeReleasesPage(page + 1, accReleases.concat(pageReleasesWithEntity), isUpcoming); return scrapeReleasesPage(page + 1, accReleases.concat(pageReleasesWithEntity), isUpcoming);
@ -119,6 +124,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
} }
const releases = await scrapeReleasesPage(argv.page || 1, []); const releases = await scrapeReleasesPage(argv.page || 1, []);
const hasDates = releases.every(release => !!release.date); const hasDates = releases.every(release => !!release.date);
const limitedReleases = (argv.last && releases.slice(0, Math.max(argv.last, 0))) const limitedReleases = (argv.last && releases.slice(0, Math.max(argv.last, 0)))
@ -133,7 +139,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
} }
async function scrapeLatestReleases(scraper, entity, preData) { async function scrapeLatestReleases(scraper, entity, preData) {
if ((!argv.latest && !argv.last && !argv.after) || !scraper.fetchLatest) { if ((!argv.latest && !argv.last) || !scraper.fetchLatest) {
return emptyReleases; return emptyReleases;
} }