Fixed RK scraper returning dick size as bust size.

This commit is contained in:
ThePendulum 2020-02-23 22:01:12 +01:00
parent b9c8950f6d
commit 8359f78e2e
3 changed files with 14 additions and 7 deletions

View File

@ -315,7 +315,7 @@ async function mergeProfiles(profiles, actor) {
residencePlace: prevProfile.residencePlace || profile.residencePlace, residencePlace: prevProfile.residencePlace || profile.residencePlace,
nationality: prevProfile.nationality || profile.nationality, // used to derive country when not available nationality: prevProfile.nationality || profile.nationality, // used to derive country when not available
ethnicity: prevProfile.ethnicity || profile.ethnicity, ethnicity: prevProfile.ethnicity || profile.ethnicity,
bust: prevProfile.bust || profile.bust, bust: prevProfile.bust || (/\d+\w+/.test(profile.bust) && profile.bust),
waist: prevProfile.waist || profile.waist, waist: prevProfile.waist || profile.waist,
hip: prevProfile.hip || profile.hip, hip: prevProfile.hip || profile.hip,
naturalBoobs: prevProfile.naturalBoobs === undefined ? profile.naturalBoobs : prevProfile.naturalBoobs, naturalBoobs: prevProfile.naturalBoobs === undefined ? profile.naturalBoobs : prevProfile.naturalBoobs,

View File

@ -115,7 +115,7 @@ async function fetchItem(source, index, existingItemsBySource, domain, role, att
return null; return null;
} }
logger.verbose(`Fetching media item from ${source.src || source}`); logger.verbose(`Fetching ${domain} ${role} from ${source.src || source}`);
// const res = await bhttp.get(source.src || source); // const res = await bhttp.get(source.src || source);
const res = await get(source.src || source); const res = await get(source.src || source);
@ -199,8 +199,13 @@ async function saveItems(items, domain, role) {
logger.verbose(`Saved ${domain} ${role} to ${filepath}`); logger.verbose(`Saved ${domain} ${role} to ${filepath}`);
return { return {
...item,
filepath, filepath,
mimetype: item.mimetype,
extension: item.extension,
hash: item.hash,
entropy: item.entropy,
quality: item.quality,
source: item.source,
}; };
} catch (error) { } catch (error) {
logger.error(`Failed to store ${domain} ${role} from ${item.source}: ${error.message}`); logger.error(`Failed to store ${domain} ${role} from ${item.source}: ${error.message}`);
@ -250,7 +255,7 @@ async function storeMedia(sources, domain, role, { entropyFilter = 2.5 } = {}) {
const { hash: fetchedItemsByHash } = groupItems(fetchedItems); const { hash: fetchedItemsByHash } = groupItems(fetchedItems);
// find hash duplicates that don't need to be re-saved // find hash duplicates that don't need to be re-saved
const uniqueFetchedItems = Object.values(fetchedItemsByHash).filter(item => !entropyFilter || item.entropy >= entropyFilter); const uniqueFetchedItems = Object.values(fetchedItemsByHash).filter(item => !entropyFilter || item.entropy === null || item.entropy >= entropyFilter);
const existingHashItems = await knex('media').whereIn('hash', uniqueFetchedItems.map(item => item.hash)); const existingHashItems = await knex('media').whereIn('hash', uniqueFetchedItems.map(item => item.hash));
const { hash: existingHashItemsByHash } = groupItems(existingHashItems); const { hash: existingHashItemsByHash } = groupItems(existingHashItems);

View File

@ -150,9 +150,11 @@ function scrapeProfile(data, html, releases = [], networkName) {
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender; profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
if (bust) profile.bust = bust.toUpperCase(); if (profile.gender === 'female') {
if (waist) profile.waist = waist; if (bust) profile.bust = bust.toUpperCase();
if (hip) profile.hip = hip; if (waist) profile.waist = waist;
if (hip) profile.hip = hip;
}
if (data.birthPlace) profile.birthPlace = data.birthPlace; if (data.birthPlace) profile.birthPlace = data.birthPlace;
if (data.height) profile.height = inchesToCm(data.height); if (data.height) profile.height = inchesToCm(data.height);