Returning unextracted scenes from Kelly Madison / Teen Fidelity scraper. Fixed Dogfart profile scraper to use extract scenes.

This commit is contained in:
DebaucheryLibrarian 2022-04-03 00:53:27 +02:00
parent e202e887f9
commit 9be80e2be9
2 changed files with 13 additions and 9 deletions

View File

@ -106,8 +106,9 @@ async function fetchProfile(baseActor, entity) {
const res = await qu.getAll(url, '.recent-updates'); const res = await qu.getAll(url, '.recent-updates');
if (res.ok) { if (res.ok) {
const scenes = scrapeLatest(res.items, entity, false); const { scenes } = scrapeLatest(res.items, entity, false);
// no bio available
return { scenes }; return { scenes };
} }

View File

@ -16,7 +16,7 @@ const siteMapByKey = {
const siteMapBySlug = Object.entries(siteMapByKey).reduce((acc, [key, value]) => ({ ...acc, [value]: key }), {}); const siteMapBySlug = Object.entries(siteMapByKey).reduce((acc, [key, value]) => ({ ...acc, [value]: key }), {});
function scrapeLatest(scenes, site) { function scrapeLatest(scenes, site) {
return scenes.map(({ query }) => { return scenes.reduce((acc, { query }) => {
const release = {}; const release = {};
release.shootId = query.q('.card-meta .text-right, .row .text-right, .card-footer-item:last-child', true); release.shootId = query.q('.card-meta .text-right, .row .text-right, .card-footer-item:last-child', true);
@ -24,11 +24,6 @@ function scrapeLatest(scenes, site) {
const siteId = release.shootId.match(/\d?\w{2}/)[0]; const siteId = release.shootId.match(/\d?\w{2}/)[0];
const siteSlug = siteMapByKey[siteId]; const siteSlug = siteMapByKey[siteId];
if (site.slug !== siteSlug) {
// using generic network overview, scene is not from the site we want
return null;
}
const { pathname } = new URL(query.url('h5 a, .ep-title a, .title a')); const { pathname } = new URL(query.url('h5 a, .ep-title a, .title a'));
[release.entryId] = pathname.match(/\d+$/); [release.entryId] = pathname.match(/\d+$/);
release.url = `${site.url}${pathname}`; release.url = `${site.url}${pathname}`;
@ -52,8 +47,16 @@ function scrapeLatest(scenes, site) {
}; };
} }
return release; if (site.slug !== siteSlug) {
}).filter((scene) => scene); // using generic network overview, scene is not from the site we want
return { ...acc, unextracted: [...acc.unextracted, release] };
}
return { ...acc, scenes: [...acc.scenes, release] };
}, {
scenes: [],
unextracted: [],
});
} }
async function scrapeScene({ query, html }, url, baseRelease, channel, session) { async function scrapeScene({ query, html }, url, baseRelease, channel, session) {