Added actor page deep scrape for the few Sperm Mania scenes not on the homepage.
This commit is contained in:
parent
0ac8ae4d5a
commit
1163b010fb
|
@ -185,7 +185,7 @@ async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') {
|
||||||
|
|
||||||
if (!scrapedRelease || typeof scrapedRelease !== 'object' || Array.isArray(scrapedRelease)) {
|
if (!scrapedRelease || typeof scrapedRelease !== 'object' || Array.isArray(scrapedRelease)) {
|
||||||
// scraper is unable to fetch the releases and returned a HTTP code or null
|
// scraper is unable to fetch the releases and returned a HTTP code or null
|
||||||
throw new Error(`Scraper returned '${scrapedRelease}' when fetching latest from '${entity.name}' (${entity.parent?.name})`);
|
throw new Error(`Scraper returned '${scrapedRelease}' when deep fetching (${entity.name}, ${entity.parent?.name}) ${baseRelease.url || baseRelease.path}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// object-merge-advance will use null as explicit false on hard merged keys, even when null as explicit falls is disabled
|
// object-merge-advance will use null as explicit false on hard merged keys, even when null as explicit falls is disabled
|
||||||
|
|
|
@ -15,8 +15,18 @@ const tagsMap = {
|
||||||
'pussy bukkake': ['cum-on-pussy'],
|
'pussy bukkake': ['cum-on-pussy'],
|
||||||
};
|
};
|
||||||
|
|
||||||
function entryIdFromMedia(release) {
|
function entryIdFromMedia(release, toLowercase = true) {
|
||||||
return [release.poster, release.trailer, ...(release.photos || [])].flat().filter(Boolean)[0]?.match(/(?:(?:preview)|(?:samples)|(?:tour))\/(.*)\//)?.[1].toLowerCase();
|
const originalEntryId = [release.poster, release.trailer, ...(release.photos || [])].flat().filter(Boolean)[0]?.match(/(?:(?:preview)|(?:samples)|(?:tour))\/(.*)\//)?.[1];
|
||||||
|
|
||||||
|
if (!originalEntryId) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (toLowercase) {
|
||||||
|
return originalEntryId.toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
return originalEntryId;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeAll(scenes, tilesByEntryId, channel) {
|
function scrapeAll(scenes, tilesByEntryId, channel) {
|
||||||
|
@ -63,6 +73,9 @@ function scrapeAll(scenes, tilesByEntryId, channel) {
|
||||||
release[key] = value;
|
release[key] = value;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
} else {
|
||||||
|
// most tiles are on the front page, but not all, deep scrape actor's page
|
||||||
|
release.path = release.actors[0]?.url;
|
||||||
}
|
}
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
|
@ -94,9 +107,6 @@ function scrapeAllTiles(tiles, channel) {
|
||||||
const release = {};
|
const release = {};
|
||||||
const sceneString = query.content();
|
const sceneString = query.content();
|
||||||
|
|
||||||
const originalEntryId = query.attribute('.scene-hover', 'data-path');
|
|
||||||
release.entryId = originalEntryId?.toLowerCase();
|
|
||||||
|
|
||||||
release.title = query.content('.scene-title');
|
release.title = query.content('.scene-title');
|
||||||
|
|
||||||
release.date = query.date('.scene-date, .sDate', 'YYYY-MM-DD');
|
release.date = query.date('.scene-date, .sDate', 'YYYY-MM-DD');
|
||||||
|
@ -106,7 +116,7 @@ function scrapeAllTiles(tiles, channel) {
|
||||||
name: unprint.query.content(actorEl),
|
name: unprint.query.content(actorEl),
|
||||||
url: channel.slug === 'fellatiojapan'
|
url: channel.slug === 'fellatiojapan'
|
||||||
? `${channel.url}/en/girl/${unprint.query.url(actorEl, null)}`
|
? `${channel.url}/en/girl/${unprint.query.url(actorEl, null)}`
|
||||||
: unprint.query.element(actorEl, null, { origin: channel.url }),
|
: unprint.query.url(actorEl, null, { origin: channel.url }),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
release.tags = [...query.contents('.data a[href*="/tag"]'), ...(tagsMap[query.content('.scene-type')?.toLowerCase()] || [])].filter(Boolean);
|
release.tags = [...query.contents('.data a[href*="/tag"]'), ...(tagsMap[query.content('.scene-type')?.toLowerCase()] || [])].filter(Boolean);
|
||||||
|
@ -124,6 +134,10 @@ function scrapeAllTiles(tiles, channel) {
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const originalEntryId = query.attribute('.scene-hover', 'data-path') || entryIdFromMedia(release, false);
|
||||||
|
|
||||||
|
release.entryId = originalEntryId?.toLowerCase();
|
||||||
|
|
||||||
release.teaser = originalEntryId && `https://img.${channel.slug}.com/preview/${originalEntryId}/hover.mp4`;
|
release.teaser = originalEntryId && `https://img.${channel.slug}.com/preview/${originalEntryId}/hover.mp4`;
|
||||||
|
|
||||||
release.photoCount = Number(sceneString.match(/(\d+) photos/)?.[1]) || null;
|
release.photoCount = Number(sceneString.match(/(\d+) photos/)?.[1]) || null;
|
||||||
|
@ -699,20 +713,21 @@ function scrapeSceneCospuri({ query }, { url, entity }) {
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fellatio Japan
|
// Sperm Mania, Fellatio Japan
|
||||||
async function fetchSceneFellatio(url, channel, baseRelease) {
|
async function fetchScene(url, channel, baseRelease) {
|
||||||
if (!baseRelease.entryId || !baseRelease.path) {
|
if (!baseRelease.entryId || !baseRelease.path) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// no dedicated scene page, but there are dates on actor page; use that as 'deep' scrape
|
// no dedicated scene page, but there are dates on actor page; use that as 'deep' scrape
|
||||||
// can't use front page like on Sperm Mania because dates are missing
|
// can't use front page like on Sperm Mania because dates are missing
|
||||||
const res = await unprint.get(baseRelease.path, { selectAll: '.scene-obj' });
|
const res = await unprint.get(baseRelease.path, { selectAll: '.scene, .scene-obj' });
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
const tiles = scrapeAllTiles(res.context, channel);
|
const tiles = scrapeAllTiles(res.context, channel);
|
||||||
|
const sceneTile = tiles.find((tile) => tile.entryId === baseRelease.entryId) || null;
|
||||||
|
|
||||||
return tiles.find((tile) => tile.entryId === baseRelease.entryId) || null;
|
return sceneTile;
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
return res.status;
|
||||||
|
@ -830,6 +845,7 @@ async function fetchProfile({ slug, url: actorUrl }, { entity, parameters }) {
|
||||||
module.exports = {
|
module.exports = {
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
fetchProfile,
|
fetchProfile,
|
||||||
|
fetchScene,
|
||||||
cospuri: {
|
cospuri: {
|
||||||
fetchLatest: fetchLatestCospuri,
|
fetchLatest: fetchLatestCospuri,
|
||||||
scrapeScene: scrapeSceneCospuri,
|
scrapeScene: scrapeSceneCospuri,
|
||||||
|
@ -837,7 +853,7 @@ module.exports = {
|
||||||
},
|
},
|
||||||
fellatio: {
|
fellatio: {
|
||||||
fetchLatest: fetchLatestFellatio,
|
fetchLatest: fetchLatestFellatio,
|
||||||
fetchScene: fetchSceneFellatio,
|
fetchScene,
|
||||||
fetchProfile,
|
fetchProfile,
|
||||||
},
|
},
|
||||||
handjob: {
|
handjob: {
|
||||||
|
|
Loading…
Reference in New Issue