Splitting Han titles and actors in Model Media scraper.

This commit is contained in:
DebaucheryLibrarian
2023-08-02 02:14:41 +02:00
parent 5783507344
commit 8c1f1b69ff
22 changed files with 80 additions and 10 deletions

View File

@@ -14,10 +14,38 @@ function scrapeAll(scenes) {
const { origin, pathname, searchParams } = new URL(url);
release.url = `${origin}${pathname}`;
release.actors = searchParams.get('models_name')?.split(',');
release.shootId = pathname.match(/((LA)|(LT)|(MA)|(MD)|(MM)|(MS)|(MT)|(RR))[\w-]+/)?.[0]; // pathname sometimes contains other text, match at least two letters to prevent false positives
release.actors = searchParams.get('models_name')?.split(',').map((actor) => {
const [han, english] = actor.split('/').map((name) => name.trim());
if (/amateur/i.test(english)) {
// not a name
return null;
}
return {
name: english || han,
alias: english && han,
};
}).filter(Boolean);
}
const rawTitle = query.content('.video-title div')?.replace(release.shootId, '');
if (rawTitle) {
// find / closest to Han in case there are multiple, account for no / at all
const hanIndex = rawTitle.match(/\p{Script_Extensions=Han}/u)?.index;
const splitIndex = rawTitle.slice(0, hanIndex).lastIndexOf('/') || hanIndex;
if (hanIndex && splitIndex > -1) {
release.title = rawTitle.slice(0, splitIndex).trim();
release.altTitles = [rawTitle.slice(splitIndex + 1).trim()];
} else {
release.title = rawTitle;
}
}
release.title = query.content('.video-title div');
release.duration = query.duration('.timestamp');
const poster = query.img('img', { attribute: 'data-src' });
@@ -31,8 +59,6 @@ function scrapeAll(scenes) {
release.teaser = query.video(null, { attribute: 'data-video-src' });
console.log(release);
return release;
});
}
@@ -49,17 +75,16 @@ function scrapeProfile({ query }) {
}
profile.description = query.content('h2') || null;
profile.height = query.number('//span[text()="Measurements"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
profile.height = query.number('//span[text()="Height"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
profile.weight = query.number('//span[text()="Weight"]/following-sibling::span', { match: /(\d+) kg/, matchIndex: 1 });
profile.measurements = query.number('//span[text()="Birth Place"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
// can't find a single profile wiht this information available, but add for good measure
profile.measurements = query.content('//span[text()="Measurements"]/following-sibling::span');
profile.birthPlace = query.number('//span[text()="Birth Place"]/following-sibling::span');
profile.banner = query.img('div[class*="banner"] > img');
profile.photos = query.imgs('#MusModelSwiper img');
console.log(profile);
return profile;
}

View File

@@ -82,6 +82,7 @@ const scrapers = {
americanpornstar,
amateureuro: porndoe,
archangel,
asiam: modelmedia,
assylum,
aziani,
badoink,
@@ -115,6 +116,7 @@ const scrapers = {
interracialpass: hush,
inthecrack,
jayrock,
jerkaoke: modelmedia,
jesseloadsmonsterfacials,
julesjordan,
karups,
@@ -178,6 +180,7 @@ const scrapers = {
analviolation: fullpornnetwork,
anilos: nubiles,
archangel,
asiam: modelmedia,
aziani,
babes: mindgeek,
babevr: badoink,
@@ -234,6 +237,7 @@ const scrapers = {
interracialpovs: hush,
inthecrack,
jamesdeen: fullpornnetwork,
jerkaoke: modelmedia,
julesjordan,
karups,
kellymadison,