Splitting Han titles and actors in Model Media scraper.
This commit is contained in:
@@ -14,10 +14,38 @@ function scrapeAll(scenes) {
|
||||
const { origin, pathname, searchParams } = new URL(url);
|
||||
|
||||
release.url = `${origin}${pathname}`;
|
||||
release.actors = searchParams.get('models_name')?.split(',');
|
||||
release.shootId = pathname.match(/((LA)|(LT)|(MA)|(MD)|(MM)|(MS)|(MT)|(RR))[\w-]+/)?.[0]; // pathname sometimes contains other text, match at least two letters to prevent false positives
|
||||
|
||||
release.actors = searchParams.get('models_name')?.split(',').map((actor) => {
|
||||
const [han, english] = actor.split('/').map((name) => name.trim());
|
||||
|
||||
if (/amateur/i.test(english)) {
|
||||
// not a name
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
name: english || han,
|
||||
alias: english && han,
|
||||
};
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
const rawTitle = query.content('.video-title div')?.replace(release.shootId, '');
|
||||
|
||||
if (rawTitle) {
|
||||
// find / closest to Han in case there are multiple, account for no / at all
|
||||
const hanIndex = rawTitle.match(/\p{Script_Extensions=Han}/u)?.index;
|
||||
const splitIndex = rawTitle.slice(0, hanIndex).lastIndexOf('/') || hanIndex;
|
||||
|
||||
if (hanIndex && splitIndex > -1) {
|
||||
release.title = rawTitle.slice(0, splitIndex).trim();
|
||||
release.altTitles = [rawTitle.slice(splitIndex + 1).trim()];
|
||||
} else {
|
||||
release.title = rawTitle;
|
||||
}
|
||||
}
|
||||
|
||||
release.title = query.content('.video-title div');
|
||||
release.duration = query.duration('.timestamp');
|
||||
|
||||
const poster = query.img('img', { attribute: 'data-src' });
|
||||
@@ -31,8 +59,6 @@ function scrapeAll(scenes) {
|
||||
|
||||
release.teaser = query.video(null, { attribute: 'data-video-src' });
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
@@ -49,17 +75,16 @@ function scrapeProfile({ query }) {
|
||||
}
|
||||
|
||||
profile.description = query.content('h2') || null;
|
||||
profile.height = query.number('//span[text()="Measurements"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
|
||||
profile.height = query.number('//span[text()="Height"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
|
||||
profile.weight = query.number('//span[text()="Weight"]/following-sibling::span', { match: /(\d+) kg/, matchIndex: 1 });
|
||||
|
||||
profile.measurements = query.number('//span[text()="Birth Place"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
|
||||
// can't find a single profile wiht this information available, but add for good measure
|
||||
profile.measurements = query.content('//span[text()="Measurements"]/following-sibling::span');
|
||||
profile.birthPlace = query.number('//span[text()="Birth Place"]/following-sibling::span');
|
||||
|
||||
profile.banner = query.img('div[class*="banner"] > img');
|
||||
profile.photos = query.imgs('#MusModelSwiper img');
|
||||
|
||||
console.log(profile);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user