diff --git a/assets/js/actors/actions.js b/assets/js/actors/actions.js index ee2de9c9..91bf3fa0 100644 --- a/assets/js/actors/actions.js +++ b/assets/js/actors/actions.js @@ -76,7 +76,7 @@ function initActorActions(store, _router) { name slug gender - birthdate + birthdate: dateOfBirth age ethnicity bust @@ -229,7 +229,7 @@ function initActorActions(store, _router) { name slug age - birthdate + birthdate: dateOfBirth gender network { id diff --git a/assets/js/fragments.js b/assets/js/fragments.js index d89c7bdc..420519e9 100644 --- a/assets/js/fragments.js +++ b/assets/js/fragments.js @@ -34,7 +34,7 @@ const actorFields = ` id name slug - birthdate + birthdate: dateOfBirth age gender network { diff --git a/migrations/20190325001339_releases.js b/migrations/20190325001339_releases.js index 043c8add..fcdf76e8 100644 --- a/migrations/20190325001339_releases.js +++ b/migrations/20190325001339_releases.js @@ -272,7 +272,9 @@ exports.up = knex => Promise.resolve() .references('id') .inTable('actors'); - table.date('birthdate'); + table.date('date_of_birth'); + table.date('date_of_death'); + table.string('gender', 18); table.text('description'); @@ -290,7 +292,8 @@ exports.up = knex => Promise.resolve() table.string('ethnicity'); - table.string('bust', 10); + table.string('cup', 4); + table.integer('bust', 3); table.integer('waist', 3); table.integer('hip', 3); table.boolean('natural_boobs'); @@ -330,10 +333,11 @@ exports.up = knex => Promise.resolve() .references('id') .inTable('sites'); - table.unique(['actor_id', 'network_id']); - table.unique(['actor_id', 'site_id']); + table.unique(['actor_id', 'network_id', 'site_id']); + + table.date('date_of_birth'); + table.date('date_of_death'); - table.date('birthdate'); table.string('gender', 18); table.text('description'); @@ -351,7 +355,8 @@ exports.up = knex => Promise.resolve() table.string('ethnicity'); - table.string('bust', 10); + table.string('cup', 4); + table.integer('bust', 3); table.integer('waist', 3); table.integer('hip', 3); table.boolean('natural_boobs'); diff --git a/public/img/logos/modelcentro/misc/spank-monster_tld.png b/public/img/logos/modelcentro/misc/spank-monster_tld.png new file mode 100644 index 00000000..d71d476f Binary files /dev/null and b/public/img/logos/modelcentro/misc/spank-monster_tld.png differ diff --git a/public/img/logos/modelcentro/spankmonster.png b/public/img/logos/modelcentro/spankmonster.png new file mode 100644 index 00000000..1f91af7a Binary files /dev/null and b/public/img/logos/modelcentro/spankmonster.png differ diff --git a/src/actors.js b/src/actors.js index 5dcb7012..f0e02d57 100644 --- a/src/actors.js +++ b/src/actors.js @@ -5,11 +5,16 @@ const Promise = require('bluebird'); // const logger = require('./logger')(__filename); const knex = require('./knex'); -const scrapers = require('./scrapers/scrapers'); +const scrapers = require('./scrapers/scrapers').actors; const argv = require('./argv'); +const include = require('./utils/argv-include')(argv); +const logger = require('./logger')(__filename); const slugify = require('./utils/slugify'); const capitalize = require('./utils/capitalize'); +const resolvePlace = require('./utils/resolve-place'); + +const { toBaseReleases } = require('./deep'); function toBaseActors(actorsOrNames, release) { return actorsOrNames.map((actorOrName) => { @@ -46,6 +51,224 @@ function curateActorEntries(baseActors, batchId) { return baseActors.map(baseActor => curateActorEntry(baseActor, batchId)); } +function curateProfileEntry(profile) { + const curatedProfileEntry = { + actor_id: profile.id, + site_id: profile.site?.id || null, + network_id: profile.network?.id || null, + date_of_birth: profile.dateOfBirth, + date_of_death: profile.dateOfDeath, + gender: profile.gender, + ethnicity: profile.ethnicity, + description: profile.description, + birth_city: profile.placeOfBirth?.city || null, + birth_state: profile.placeOfBirth?.state || null, + birth_country_alpha2: profile.placeOfBirth?.country?.alpha2 || null, + residence_city: profile.placeOfResidence?.city || null, + residence_state: profile.placeOfResidence?.state || null, + residence_country_alpha2: profile.placeOfResidence?.country?.alpha2 || null, + cup: profile.cup, + bust: profile.bust, + waist: profile.waist, + hip: profile.hip, + natural_boobs: profile.naturalBoobs, + height: profile.height, + weight: profile.weight, + hair: profile.hair, + eyes: profile.eyes, + has_tattoos: profile.hasTattoos, + has_piercings: profile.hasPiercings, + piercings: profile.piercings, + tattoos: profile.tattoos, + }; + + return curatedProfileEntry; +} + +async function curateProfile(profile) { + try { + const curatedProfile = { + id: profile.id, + name: profile.name, + avatar: profile.avatar, + }; + + curatedProfile.site = profile.site.isNetwork ? null : profile.site; + curatedProfile.network = profile.site.isNetwork ? profile.site : null; + + curatedProfile.description = profile.description?.trim() || null; + curatedProfile.nationality = profile.nationality?.trim() || null; // used to derive country when country not available + curatedProfile.ethnicity = profile.ethnicity?.trim() || null; + curatedProfile.hair = profile.hair?.trim() || null; + curatedProfile.eyes = profile.eyes?.trim() || null; + curatedProfile.tattoos = profile.tattoos?.trim() || null; + curatedProfile.piercings = profile.piercings?.trim() || null; + + curatedProfile.gender = (/female/i.test(profile.gender) && 'female') + || (/shemale/i.test(profile.gender) && 'transsexual') + || (/male/i.test(profile.gender) && 'male') + || (/trans/i.test(profile.gender) && 'transsexual') + || null; + + curatedProfile.dateOfBirth = (!Number.isNaN(Number(profile.dateOfBirth || profile.birthdate)) // possibly valid date + && new Date() - profile.birthdate > 567648000000 // over 18 + && profile.birthdate) + || null; + + curatedProfile.dateOfDeath = Number.isNaN(Number(profile.dateOfDeath)) ? null : profile.dateOfDeath; + + curatedProfile.cup = profile.cup || profile.bust?.match(/[a-zA-Z]+/)?.[0] || null; + curatedProfile.bust = Number(profile.bust) || profile.bust?.match(/\d+/)?.[0] || null; + curatedProfile.waist = Number(profile.waist) || profile.waist?.match(/\d+/)?.[0] || null; + curatedProfile.hip = Number(profile.hip) || profile.hip?.match(/\d+/)?.[0] || null; + curatedProfile.height = Number(profile.height) || profile.height?.match(/\d+/)?.[0] || null; + curatedProfile.weight = Number(profile.weight) || profile.weight?.match(/\d+/)?.[0] || null; + + curatedProfile.naturalBoobs = typeof profile.naturalBoobs === 'boolean' ? profile.naturalBoobs : null; + curatedProfile.hasTattoos = typeof profile.hasTattoos === 'boolean' ? profile.hasTattoos : null; + curatedProfile.hasPiercings = typeof profile.hasPiercings === 'boolean' ? profile.hasPiercings : null; + + const [placeOfBirth, placeOfResidence] = await Promise.all([ + resolvePlace(profile.birthPlace), + resolvePlace(profile.residencePlace), + ]); + + curatedProfile.placeOfBirth = placeOfBirth; + curatedProfile.placeOfResidence = placeOfResidence; + + if (!curatedProfile.placeOfBirth && curatedProfile.nationality) { + const country = await knex('countries') + .where('nationality', 'ilike', `%${curatedProfile.nationality}%`) + .orderBy('priority', 'desc') + .first(); + + curatedProfile.placeOfBirth = { + country: country.alpha2, + }; + } + + curatedProfile.social = Array.isArray(profile.social) + ? profile.social.map((social) => { + try { + const { href } = new URL(); + return href; + } catch (error) { + logger.warn(`Profile scraper for '${profile.site.name}' returned invalid social link: ${social}`); + return null; + } + }).filter(Boolean) + : []; + + curatedProfile.releases = toBaseReleases(profile.releases); + + return curatedProfile; + } catch (error) { + logger.error(`Failed to curate '${profile.name}': ${error.message}`); + + return null; + } +} + +async function scrapeProfiles(actor, sources, networksBySlug, sitesBySlug) { + const profiles = Promise.map(sources, async (source) => { + try { + return await [].concat(source).reduce(async (outcome, scraperSlug) => outcome.catch(async () => { + const scraper = scrapers[scraperSlug]; + const siteOrNetwork = networksBySlug[scraperSlug] || sitesBySlug[scraperSlug]; + + if (!scraper?.fetchProfile) { + logger.warn(`No profile profile scraper available for ${scraperSlug}`); + throw new Error(`No profile profile scraper available for ${scraperSlug}`); + } + + if (!siteOrNetwork) { + logger.warn(`No site or network found for ${scraperSlug}`); + throw new Error(`No site or network found for ${scraperSlug}`); + } + + logger.verbose(`Searching profile for '${actor.name}' on '${scraperSlug}'`); + + const profile = await scraper.fetchProfile(actor.name, scraperSlug, siteOrNetwork, include); + + if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure + logger.verbose(`Profile for '${actor.name}' not available on ${scraperSlug}, scraper returned ${profile}`); + throw Object.assign(new Error(`Profile for '${actor.name}' not available on ${scraperSlug}`), { code: 'PROFILE_NOT_AVAILABLE' }); + } + + return { + ...actor, + ...profile, + site: siteOrNetwork, + }; + }), Promise.reject(new Error())); + } catch (error) { + if (error.code !== 'PROFILE_NOT_AVAILABLE') { + logger.error(`Failed to fetch profile for '${actor.name}': ${error.message}`); + } + } + + return null; + }); + + return profiles.filter(Boolean); +} + +async function upsertProfiles(curatedProfileEntries) { + const existingProfiles = await knex('actors_profiles') + .whereIn(['actor_id', 'network_id'], curatedProfileEntries.map(entry => [entry.actor_id, entry.network_id])) + .orWhereIn(['actor_id', 'site_id'], curatedProfileEntries.map(entry => [entry.actor_id, entry.site_id])); + + const existingProfilesByActorNetworkSiteIds = existingProfiles.reduce((acc, profile) => ({ + ...acc, + [profile.actor_id]: { + ...acc[profile.actor_id], + [profile.network_id]: { + ...acc[profile.actor_id]?.[profile.network_id], + [profile.site_id]: profile, + }, + }, + }), {}); + + const { updatingProfileEntries, newProfileEntries } = curatedProfileEntries.reduce((acc, profile) => { + const existingProfile = existingProfilesByActorNetworkSiteIds[profile.actor_id]?.[profile.network_id]?.[profile.site_id]; + + if (existingProfile) { + return { + ...acc, + updatingProfileEntries: [...acc.updatingProfileEntries, { + ...profile, + id: existingProfile.id, + }], + }; + } + + return { + ...acc, + newProfileEntries: [...acc.newProfileEntries, profile], + }; + }, { + updatingProfileEntries: [], + newProfileEntries: [], + }); + + if (newProfileEntries.length > 0) { + await knex('actors_profiles').insert(newProfileEntries); + } + + if (argv.force && updatingProfileEntries.length > 0) { + knex.transaction(async (transaction) => { + const queries = updatingProfileEntries.map(profileEntry => knex('actors_profiles') + .where('id', profileEntry.id) + .update(profileEntry) + .transacting(transaction)); + + return Promise.all(queries) + .then(transaction.commit) + .catch(transaction.rollback); + }); + } +} + async function scrapeActors(actorNames) { const baseActors = toBaseActors(actorNames); @@ -71,9 +294,20 @@ async function scrapeActors(actorNames) { const curatedActorEntries = batchId && curateActorEntries(newBaseActors, batchId); const newActorEntries = batchId && await knex('actors').insert(curatedActorEntries).returning(['id', 'name', 'slug']); - const actorEntries = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []); + const actors = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []); - console.log(actorEntries, newActorEntries, actorEntries); + // TODO: don't fetch existing profiles unless --force is used + + const profilesPerActor = await Promise.map( + actors, + async actor => scrapeProfiles(actor, sources, networksBySlug, sitesBySlug), + { concurrency: 10 }, + ); + + const profiles = await Promise.all(profilesPerActor.flat().map(profile => curateProfile(profile))); + const curatedProfileEntries = profiles.map(profile => curateProfileEntry(profile)); + + await upsertProfiles(curatedProfileEntries); } async function getOrCreateActors(baseActors, batchId) { diff --git a/src/deep.js b/src/deep.js index c648e6cf..c862bf85 100644 --- a/src/deep.js +++ b/src/deep.js @@ -167,4 +167,5 @@ module.exports = { fetchReleases, fetchScenes, fetchMovies, + toBaseReleases, }; diff --git a/src/utils/posters.js b/src/utils/posters.js index 541aec0f..178af2c6 100644 --- a/src/utils/posters.js +++ b/src/utils/posters.js @@ -8,10 +8,10 @@ const moment = require('moment'); const argv = require('../argv'); const knex = require('../knex'); -async function init() { +async function actorPosters(actorNames) { const posters = await knex('actors') .select('actors.name as actor_name', 'releases.title', 'releases.date', 'media.path', 'media.index', 'sites.name as site_name', 'networks.name as network_name') - .whereIn('actors.name', (argv.actors || []).concat(argv._)) + .whereIn('actors.name', actorNames) .join('releases_actors', 'releases_actors.actor_id', 'actors.id') .join('releases', 'releases_actors.release_id', 'releases.id') .join('sites', 'sites.id', 'releases.site_id') @@ -37,4 +37,41 @@ async function init() { knex.destroy(); } +async function sitePosters(siteSlugs) { + const posters = await knex('sites') + .select('sites.name as site_name', 'releases.title', 'releases.date', 'media.path') + .whereIn('sites.slug', siteSlugs) + .join('releases', 'releases.site_id', 'sites.id') + .join('releases_posters', 'releases_posters.release_id', 'releases.id') + .join('media', 'releases_posters.media_id', 'media.id'); + // .where('releases.date', '<', '2020-01-01'); + + const files = await Promise.all(posters.map(async (poster) => { + const directory = path.join(config.media.path, 'extracted', poster.site_name); + + const source = path.join(config.media.path, poster.path); + const target = path.join(directory, `${poster.site_name} - ${moment.utc(poster.date).format('YYYY-MM-DD')} - ${poster.title.replace(/[/.]/g, '_')}.jpeg`); + + await fs.mkdir(directory, { recursive: true }); + await fs.copyFile(source, target); + + return target; + })); + + console.log(files); + + knex.destroy(); +} + +async function init() { + if (argv.actors) { + await actorPosters(argv.actors); + return; + } + + if (argv.sites) { + await sitePosters(argv.sites); + } +} + init(); diff --git a/src/utils/resolve-place.js b/src/utils/resolve-place.js index 7470e872..e572f38b 100644 --- a/src/utils/resolve-place.js +++ b/src/utils/resolve-place.js @@ -1,25 +1,34 @@ 'use strict'; -const bhttp = require('bhttp'); +const logger = require('../logger')(__filename); +const http = require('./http'); async function resolvePlace(query) { if (!query) { return null; } - const res = await bhttp.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`); - const [item] = res.body; + try { + // https://operations.osmfoundation.org/policies/nominatim/ + const res = await http.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`, { + 'User-Agent': 'contact at moonloop.adult@protonmail.com', + }); - if (item && item.address) { - const rawPlace = item.address; - const place = {}; + const [item] = res.body; - if (rawPlace.city) place.city = rawPlace.city; - if (rawPlace.state) place.state = rawPlace.state; - if (rawPlace.country_code) place.country = rawPlace.country_code.toUpperCase(); - if (rawPlace.continent) place.continent = rawPlace.continent; + if (item && item.address) { + const rawPlace = item.address; + const place = {}; - return place; + if (rawPlace.city) place.city = rawPlace.city; + if (rawPlace.state) place.state = rawPlace.state; + if (rawPlace.country_code) place.country = rawPlace.country_code.toUpperCase(); + if (rawPlace.continent) place.continent = rawPlace.continent; + + return place; + } + } catch (error) { + logger.error(`Failed to resolve place '${query}': ${error.message}`); } return null;