Fixed RK scraper returning dick size as bust size.

This commit is contained in:
ThePendulum 2020-02-23 22:01:12 +01:00
parent b9c8950f6d
commit 8359f78e2e
3 changed files with 14 additions and 7 deletions

View File

@ -315,7 +315,7 @@ async function mergeProfiles(profiles, actor) {
residencePlace: prevProfile.residencePlace || profile.residencePlace,
nationality: prevProfile.nationality || profile.nationality, // used to derive country when not available
ethnicity: prevProfile.ethnicity || profile.ethnicity,
bust: prevProfile.bust || profile.bust,
bust: prevProfile.bust || (/\d+\w+/.test(profile.bust) && profile.bust),
waist: prevProfile.waist || profile.waist,
hip: prevProfile.hip || profile.hip,
naturalBoobs: prevProfile.naturalBoobs === undefined ? profile.naturalBoobs : prevProfile.naturalBoobs,

View File

@ -115,7 +115,7 @@ async function fetchItem(source, index, existingItemsBySource, domain, role, att
return null;
}
logger.verbose(`Fetching media item from ${source.src || source}`);
logger.verbose(`Fetching ${domain} ${role} from ${source.src || source}`);
// const res = await bhttp.get(source.src || source);
const res = await get(source.src || source);
@ -199,8 +199,13 @@ async function saveItems(items, domain, role) {
logger.verbose(`Saved ${domain} ${role} to ${filepath}`);
return {
...item,
filepath,
mimetype: item.mimetype,
extension: item.extension,
hash: item.hash,
entropy: item.entropy,
quality: item.quality,
source: item.source,
};
} catch (error) {
logger.error(`Failed to store ${domain} ${role} from ${item.source}: ${error.message}`);
@ -250,7 +255,7 @@ async function storeMedia(sources, domain, role, { entropyFilter = 2.5 } = {}) {
const { hash: fetchedItemsByHash } = groupItems(fetchedItems);
// find hash duplicates that don't need to be re-saved
const uniqueFetchedItems = Object.values(fetchedItemsByHash).filter(item => !entropyFilter || item.entropy >= entropyFilter);
const uniqueFetchedItems = Object.values(fetchedItemsByHash).filter(item => !entropyFilter || item.entropy === null || item.entropy >= entropyFilter);
const existingHashItems = await knex('media').whereIn('hash', uniqueFetchedItems.map(item => item.hash));
const { hash: existingHashItemsByHash } = groupItems(existingHashItems);

View File

@ -150,9 +150,11 @@ function scrapeProfile(data, html, releases = [], networkName) {
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
if (bust) profile.bust = bust.toUpperCase();
if (waist) profile.waist = waist;
if (hip) profile.hip = hip;
if (profile.gender === 'female') {
if (bust) profile.bust = bust.toUpperCase();
if (waist) profile.waist = waist;
if (hip) profile.hip = hip;
}
if (data.birthPlace) profile.birthPlace = data.birthPlace;
if (data.height) profile.height = inchesToCm(data.height);