diff --git a/assets/components/tags/tags.vue b/assets/components/tags/tags.vue index 083b821a..bb210534 100644 --- a/assets/components/tags/tags.vue +++ b/assets/components/tags/tags.vue @@ -45,6 +45,7 @@ async function mounted() { 'double-penetration', 'facial', 'creampie', + 'squirting', ], appearance: [ 'asian', @@ -100,6 +101,7 @@ async function mounted() { ], misc: [ 'gaping', + 'squirting', 'oil', ], }; diff --git a/assets/js/actors/actions.js b/assets/js/actors/actions.js index 5faa1c05..366551b6 100644 --- a/assets/js/actors/actions.js +++ b/assets/js/actors/actions.js @@ -57,7 +57,7 @@ function initActorActions(store, _router) { description createdAt updatedAt - network { + network: entity { id name slug @@ -80,12 +80,7 @@ function initActorActions(store, _router) { profiles: actorsProfiles { description descriptionHash - network { - id - slug - name - } - site { + network: entity { id slug name @@ -162,12 +157,12 @@ function initActorActions(store, _router) { ${releaseActorsFragment} ${releaseTagsFragment} ${releasePosterFragment} - site { + site: entity { id name slug url - network { + network: parent { id name slug @@ -265,7 +260,7 @@ function initActorActions(store, _router) { dateOfBirth dateOfDeath gender - network { + network: entity { id name slug diff --git a/public/img/logos/hush/favicon.png b/public/img/logos/hush/favicon.png new file mode 100644 index 00000000..2fe5eb09 Binary files /dev/null and b/public/img/logos/hush/favicon.png differ diff --git a/public/img/tags/squirting/0.jpeg b/public/img/tags/squirting/0.jpeg new file mode 100644 index 00000000..d41a4570 Binary files /dev/null and b/public/img/tags/squirting/0.jpeg differ diff --git a/public/img/tags/squirting/lazy/0.jpeg b/public/img/tags/squirting/lazy/0.jpeg new file mode 100644 index 00000000..7638e52e Binary files /dev/null and b/public/img/tags/squirting/lazy/0.jpeg differ diff --git a/public/img/tags/squirting/thumbs/0.jpeg b/public/img/tags/squirting/thumbs/0.jpeg new file mode 100644 index 00000000..248a9332 Binary files /dev/null and b/public/img/tags/squirting/thumbs/0.jpeg differ diff --git a/seeds/04_media.js b/seeds/04_media.js index e9c6378e..b97bb3aa 100644 --- a/seeds/04_media.js +++ b/seeds/04_media.js @@ -635,6 +635,7 @@ const tagPosters = [ ['piercings', 0, 'Kaegune in "When The Sun Goes Down" for Suicide Girls'], ['pussy-eating', 0, 'Kali Roses licking Emily Willis\' pussy in "Peeping On My Neighbor" for Girl Girl'], ['redhead', 1, 'Lacy Lennon in "Girl Crush" for When Girls Play'], + ['squirting', 0, 'Veronica Rodriguez in "Hot Latina Squirting" for Jules Jordan'], ['schoolgirl', 1, 'Eliza Ibarra for Brazzers'], ['swallowing', 'poster'], ['teen', 0, 'Eva Elfie in "Fresh New Talent" for Club Seventeen'], diff --git a/src/actors.js b/src/actors.js index 4019aade..94868221 100644 --- a/src/actors.js +++ b/src/actors.js @@ -20,7 +20,6 @@ const logger = require('./logger')(__filename); const { toBaseReleases } = require('./deep'); const { associateAvatars } = require('./media'); -const { curateSite } = require('./sites'); const slugify = require('./utils/slugify'); const capitalize = require('./utils/capitalize'); @@ -120,7 +119,7 @@ function toBaseActors(actorsOrNames, release) { const baseActor = { name, slug, - network: release?.site.network, + entity: release?.site?.network || release?.entity?.parent || null, }; if (actorOrName.name) { @@ -144,7 +143,7 @@ function curateActor(actor, withDetails = false) { name: actor.name, slug: actor.slug, gender: actor.gender, - networkId: actor.entity_id, + entityId: actor.entity_id, aliasFor: actor.alias_for, dateOfBirth: actor.date_of_birth, birthCountry: actor.birth_country_alpha2, @@ -155,10 +154,10 @@ function curateActor(actor, withDetails = false) { slug: actor.slug, gender: actor.alias.gender, }, - network: actor.network && { - id: actor.network.id, - name: actor.network.name, - slug: actor.network.slug, + entity: actor.entity && { + id: actor.entity.id, + name: actor.entity.name, + slug: actor.entity.slug, }, dateOfDeath: actor.date_of_death, cup: actor.cup, @@ -224,8 +223,7 @@ function curateProfileEntry(profile) { const curatedProfileEntry = { ...(profile.update !== false && { id: profile.update }), actor_id: profile.id, - site_id: profile.site?.id || null, - entity_id: profile.network?.id || null, + entity_id: profile.entity?.id || null, date_of_birth: profile.dateOfBirth, date_of_death: profile.dateOfDeath, gender: profile.gender, @@ -268,8 +266,7 @@ async function curateProfile(profile) { name: profile.name, avatar: profile.avatar, scraper: profile.scraper, - site: profile.site, - network: profile.network, + entity: profile.entity, update: profile.update, }; @@ -343,7 +340,7 @@ async function curateProfile(profile) { const { href } = new URL(social); return href; } catch (error) { - logger.warn(`Profile scraper for '${profile.site.name}' returned invalid social link: ${social}`); + logger.warn(`Profile scraper for '${profile.entity.name}' returned invalid social link: ${social}`); return null; } }).filter(Boolean) @@ -351,9 +348,9 @@ async function curateProfile(profile) { curatedProfile.releases = toBaseReleases(profile.releases); - if (profile.ethnicity && !curatedProfile.ethnicity) logger.warn(`Unrecognized ethnicity returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.ethnicity}`); - if ((profile.hairColor || profile.hair) && !curatedProfile.hairColor) logger.warn(`Unrecognized hair color returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.hairColor || profile.hair}`); - if (profile.eyes && !curatedProfile.eyes) logger.warn(`Unrecognized eye color returned by '${profile.site?.name || profile.network?.slug}' scraper: ${profile.eyes}`); + if (profile.ethnicity && !curatedProfile.ethnicity) logger.warn(`Unrecognized ethnicity returned by '${profile.entity.name}' scraper: ${profile.ethnicity}`); + if ((profile.hairColor || profile.hair) && !curatedProfile.hairColor) logger.warn(`Unrecognized hair color returned by '${profile.entity.name}' scraper: ${profile.hairColor || profile.hair}`); + if (profile.eyes && !curatedProfile.eyes) logger.warn(`Unrecognized eye color returned by '${profile.entity.name}' scraper: ${profile.eyes}`); return curatedProfile; } catch (error) { @@ -499,7 +496,7 @@ async function upsertProfiles(profiles) { } } -async function scrapeProfiles(actor, sources, networksBySlug, sitesBySlug, existingProfilesByActorNetworkSiteId) { +async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId) { const profiles = Promise.map(sources, async (source) => { try { // config may group sources to try until success @@ -507,24 +504,25 @@ async function scrapeProfiles(actor, sources, networksBySlug, sitesBySlug, exist try { const scraper = scrapers[scraperSlug]; const context = { - site: sitesBySlug[scraperSlug] || null, - network: networksBySlug[scraperSlug] || sitesBySlug[scraperSlug]?.network || null, + site: entitiesBySlug[scraperSlug] || null, + network: entitiesBySlug[scraperSlug] || null, + entity: entitiesBySlug[scraperSlug] || null, scraper: scraperSlug, }; - const label = context.site?.name || context.network?.name; + const label = context.entity?.name; if (!scraper?.fetchProfile) { logger.warn(`No profile profile scraper available for ${scraperSlug}`); throw new Error(`No profile profile scraper available for ${scraperSlug}`); } - if (!context.site && !context.network) { - logger.warn(`No site or network found for ${scraperSlug}`); - throw new Error(`No site or network found for ${scraperSlug}`); + if (!context.entity) { + logger.warn(`No entity found for ${scraperSlug}`); + throw new Error(`No entity found for ${scraperSlug}`); } - const existingProfile = existingProfilesByActorNetworkSiteId[actor.id]?.[context.network?.id || null]?.[context.site?.id || null]; + const existingProfile = existingProfilesByActorEntityId[actor.id]?.[context.entity?.id || null]; if (existingProfile && !argv.force) { logger.verbose(`Found existing profile for '${actor.name}' on '${label}', use --force to scrape again`); @@ -574,20 +572,14 @@ async function scrapeActors(actorNames) { const baseActors = toBaseActors(actorNames); const sources = argv.sources || config.profiles || Object.keys(scrapers.actors); - const siteSlugs = sources.flat(); + const entitySlugs = sources.flat(); - const [networks, sites, existingActorEntries] = await Promise.all([ + const [entities, existingActorEntries] = await Promise.all([ knex('entities') - .where('type', 2) - .whereIn('slug', siteSlugs), - knex('entities') - .select( - 'entities.*', - 'parents.name as network_name', 'parents.slug as network_slug', 'parents.url as network_url', 'parents.description as network_description', 'parents.parameters as network_parameters', - ) - .where('type', 2) - .whereIn('entities.slug', siteSlugs) - .leftJoin('entities as parents', 'parents.id', 'entities.parent_id'), + .select(knex.raw('entities.*, row_to_json(parents) as parent')) + .whereIn('entities.slug', entitySlugs) + .leftJoin('entities as parents', 'parents.id', 'entities.parent_id') + .orderBy('entities.type'), knex('actors') .select(['id', 'name', 'slug']) .modify((queryBuilder) => { @@ -598,8 +590,7 @@ async function scrapeActors(actorNames) { .whereNull('alias_for'), ]); - const networksBySlug = networks.reduce((acc, network) => ({ ...acc, [network.slug]: network }), {}); - const sitesBySlug = sites.reduce((acc, site) => ({ ...acc, [site.slug]: curateSite(site) }), {}); + const entitiesBySlug = entities.reduce((acc, entity) => ({ ...acc, [entity.slug]: entity }), {}); const existingActorEntriesBySlug = existingActorEntries.reduce((acc, actorEntry) => ({ ...acc, [actorEntry.slug]: actorEntry }), {}); const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlug[baseActor.slug]); @@ -611,20 +602,17 @@ async function scrapeActors(actorNames) { const actors = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []); const existingProfiles = await knex('actors_profiles').whereIn('actor_id', actors.map(actor => actor.id)); - const existingProfilesByActorNetworkSiteId = existingProfiles.reduce((acc, profile) => ({ + const existingProfilesByActorEntityId = existingProfiles.reduce((acc, profile) => ({ ...acc, [profile.actor_id]: { ...acc[profile.actor_id], - [profile.entity_id]: { - ...acc[profile.entity_id], - [profile.site_id]: profile, - }, + [profile.entity_id]: profile, }, }), {}); const profilesPerActor = await Promise.map( actors, - async actor => scrapeProfiles(actor, sources, networksBySlug, sitesBySlug, existingProfilesByActorNetworkSiteId), + async actor => scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId), { concurrency: 10 }, ); @@ -647,13 +635,11 @@ async function scrapeActors(actorNames) { } async function getOrCreateActors(baseActors, batchId) { - console.log(baseActors); - const existingActors = await knex('actors') .select('id', 'alias_for', 'name', 'slug', 'entity_id') .whereIn('slug', baseActors.map(baseActor => baseActor.slug)) .whereNull('entity_id') - .orWhereIn(['slug', 'entity_id'], baseActors.map(baseActor => [baseActor.slug, baseActor.network.id])); + .orWhereIn(['slug', 'entity_id'], baseActors.map(baseActor => [baseActor.slug, baseActor.entity.id])); // const existingActorSlugs = new Set(existingActors.map(actor => actor.slug)); const existingActorSlugs = existingActors.reduce((acc, actor) => ({ @@ -664,7 +650,7 @@ async function getOrCreateActors(baseActors, batchId) { }, }), {}); - const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.network.id]?.[baseActor.slug] && !existingActorSlugs.null?.[baseActor.slug]); + const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.entity.id]?.[baseActor.slug] && !existingActorSlugs.null?.[baseActor.slug]); const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId); const newActors = await knex('actors').insert(curatedActorEntries, ['id', 'alias_for', 'name', 'slug', 'entity_id']); @@ -722,7 +708,7 @@ async function fetchActor(actorId) { const actor = await knex('actors') .select(knex.raw(` actors.*, - row_to_json(networks) as network, + row_to_json(entities) as entity, row_to_json(actor_alias) as alias, row_to_json(birth_country) as birth_country, row_to_json(residence_country) as residence_country, @@ -737,7 +723,7 @@ async function fetchActor(actorId) { queryBuilder.where('actors.id', actorId); }) .leftJoin('actors as actor_alias', 'actor_alias.id', 'actors.alias_for') - .leftJoin('networks', 'networks.id', 'actors.entity_id') + .leftJoin('entities', 'entities.id', 'actors.entity_id') .leftJoin('countries as birth_country', 'birth_country.alpha2', 'actors.birth_country_alpha2') .leftJoin('countries as residence_country', 'residence_country.alpha2', 'actors.residence_country_alpha2') .leftJoin('media', 'media.id', 'actors.avatar_media_id') diff --git a/src/deep.js b/src/deep.js index 3bcd6d4e..6031600d 100644 --- a/src/deep.js +++ b/src/deep.js @@ -7,8 +7,6 @@ const include = require('./utils/argv-include')(argv); const logger = require('./logger')(__filename); const knex = require('./knex'); const scrapers = require('./scrapers/scrapers'); -const { curateSites } = require('./sites'); -const { curateNetworks } = require('./networks'); function urlToSiteSlug(url) { try { @@ -19,40 +17,31 @@ function urlToSiteSlug(url) { return slug; } catch (error) { - logger.warn(`Failed to derive site slug from '${url}': ${error.message}`); + logger.warn(`Failed to derive entity slug from '${url}': ${error.message}`); return null; } } -async function findSites(baseReleases) { - const baseReleasesWithoutSite = baseReleases.filter(release => release.url && !release.site); +async function findEntities(baseReleases) { + const baseReleasesWithoutEntity = baseReleases.filter(release => release.url && !release.site && !release.entity); - const siteSlugs = Array.from(new Set( - baseReleasesWithoutSite + const entitySlugs = Array.from(new Set( + baseReleasesWithoutEntity .map(baseRelease => urlToSiteSlug(baseRelease.url)) .filter(Boolean), )); - const siteEntries = await knex('entities') + const entities = await knex('entities') + .select(knex.raw('entities.*, row_to_json(parents) as parent')) .leftJoin('entities as parents', 'parents.id', 'entities.parent_id') - .select('entities.*', 'parents.id as network_id', 'parents.name as network_name', 'parents.slug as network_slug', 'parents.url as network_url', 'parents.parameters as network_parameters', 'parents.description as network_description') - .where('entities.type', 2) - .whereIn('entities.slug', siteSlugs); + .whereIn('entities.slug', entitySlugs) + .orderBy('entities.type', 'asc'); - const networkEntries = await knex('entities') - .where('type', 1) - .whereIn('slug', siteSlugs); + // channel entity will overwrite network entity + const entitiesBySlug = entities.reduce((accEntities, entity) => ({ ...accEntities, [entity.slug]: entity }), {}); - const sites = await curateSites(siteEntries, true, false); - const networks = await curateNetworks(networkEntries, true, false, false); - const markedNetworks = networks.map(network => ({ ...network, isNetwork: true })); - - const sitesBySlug = [] - .concat(markedNetworks, sites) - .reduce((accSites, site) => ({ ...accSites, [site.slug]: site }), {}); - - return sitesBySlug; + return entitiesBySlug; } function toBaseReleases(baseReleasesOrUrls) { @@ -92,23 +81,22 @@ function toBaseReleases(baseReleasesOrUrls) { .filter(Boolean); } -async function scrapeRelease(baseRelease, sites, type = 'scene') { - const site = baseRelease.site || sites[urlToSiteSlug(baseRelease.url)]; +async function scrapeRelease(baseRelease, entities, type = 'scene') { + const entity = baseRelease.entity || baseRelease.site || entities[urlToSiteSlug(baseRelease.url)]; - if (!site) { - logger.warn(`No site available for ${baseRelease.url}`); + if (!entity) { + logger.warn(`No entity available for ${baseRelease.url}`); return baseRelease; } if ((!baseRelease.url && !baseRelease.path) || !argv.deep) { return { ...baseRelease, - site, + entity, }; } - const siteWithFallbackNetwork = site.isNetwork ? { ...site, network: site } : site; // make site.network available, even when site is network fallback - const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug]; + const scraper = scrapers.releases[entity.slug] || scrapers.releases[entity.parent?.slug]; if (!scraper) { logger.warn(`Could not find scraper for ${baseRelease.url}`); @@ -116,7 +104,7 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') { } if ((type === 'scene' && !scraper.fetchScene) || (type === 'movie' && !scraper.fetchMovie)) { - logger.warn(`The '${site.name}'-scraper cannot fetch individual ${type}s`); + logger.warn(`The '${entity.name}'-scraper cannot fetch individual ${type}s`); return baseRelease; } @@ -124,14 +112,14 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') { logger.verbose(`Fetching ${type} ${baseRelease.url}`); const scrapedRelease = type === 'scene' - ? await scraper.fetchScene(baseRelease.url, siteWithFallbackNetwork, baseRelease, null, include) - : await scraper.fetchMovie(baseRelease.url, siteWithFallbackNetwork, baseRelease, null, include); + ? await scraper.fetchScene(baseRelease.url, entity, baseRelease, null, include) + : await scraper.fetchMovie(baseRelease.url, entity, baseRelease, null, include); const mergedRelease = { ...baseRelease, ...scrapedRelease, deep: !!scrapedRelease, - site, + entity, }; if (!mergedRelease.entryId) { @@ -155,19 +143,19 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') { } } -async function scrapeReleases(baseReleases, sites, type) { +async function scrapeReleases(baseReleases, entities, type) { return Promise.map( baseReleases, - async baseRelease => scrapeRelease(baseRelease, sites, type), + async baseRelease => scrapeRelease(baseRelease, entities, type), { concurrency: 10 }, ); } async function fetchReleases(baseReleasesOrUrls, type = 'scene') { const baseReleases = toBaseReleases(baseReleasesOrUrls); - const sites = await findSites(baseReleases); + const entities = await findEntities(baseReleases); - const deepReleases = await scrapeReleases(baseReleases, sites, type); + const deepReleases = await scrapeReleases(baseReleases, entities, type); return deepReleases.filter(Boolean); } diff --git a/src/scrapers/hush.js b/src/scrapers/hush.js index 35608937..d0ff82ff 100644 --- a/src/scrapers/hush.js +++ b/src/scrapers/hush.js @@ -3,21 +3,21 @@ const util = require('util'); const knex = require('../knex'); -const { get, geta, ed, fd, ctxa } = require('../utils/q'); +const { get, geta, ed, formatDate, ctxa } = require('../utils/q'); const slugify = require('../utils/slugify'); const { feetInchesToCm } = require('../utils/convert'); async function getChannelRegExp(site) { - if (!['hushpass', 'interracialpass'].includes(site.network.slug)) return null; + if (!['hushpass', 'interracialpass'].includes(site.parent.slug)) return null; - const sites = await knex('sites').where('network_id', site.network.id); + const sites = await knex('sites').where('network_id', site.parent.id); return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i'); } function deriveEntryId(release) { if (release.date && release.title) { - return `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`; + return `${slugify(formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`; } return null; @@ -140,7 +140,7 @@ function scrapeScene({ html, qu }, site, url, baseRelease) { release.title = qu.q('.centerwrap h2', true); release.description = qu.q('.videocontent p', true); - release.date = qu.date('.videodetails .date', 'MM/DD/YYYY'); + release.date = qu.date('.videodetails .date', ['MM/DD/YYYY', 'YYYY-MM-DD']); release.duration = qu.dur('.videodetails .date'); release.actors = qu.all('.modelname a', true); diff --git a/src/store-releases.js b/src/store-releases.js index 63cb3795..fe92785d 100644 --- a/src/store-releases.js +++ b/src/store-releases.js @@ -8,7 +8,7 @@ const knex = require('./knex'); const slugify = require('./utils/slugify'); const { associateActors, scrapeActors } = require('./actors'); const { associateReleaseTags } = require('./tags'); -const { curateSite } = require('./sites'); +const { curateEntity } = require('./entities'); const { associateReleaseMedia } = require('./media'); function curateReleaseEntry(release, batchId, existingRelease) { @@ -20,9 +20,9 @@ function curateReleaseEntry(release, batchId, existingRelease) { const curatedRelease = { title: release.title, entry_id: release.entryId || null, - entity_id: release.site?.id, - shoot_id: release.shootId || null, + entity_id: release.entity.id, studio_id: release.studio?.id || null, + shoot_id: release.shootId || null, url: release.url, date: Number(release.date) ? release.date : null, slug, @@ -45,51 +45,47 @@ function curateReleaseEntry(release, batchId, existingRelease) { return curatedRelease; } -async function attachChannelSites(releases) { - const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isNetwork || release.site.slug !== release.channel)); +async function attachChannelEntities(releases) { + const releasesWithoutEntity = releases.filter(release => release.channel && !release.entity && release.entity.type !== 1); - const channelSites = await knex('entities') - .leftJoin('entities AS parents', 'parents.id', 'entities.parent_id') - .select('entities.*', 'parents.name as network_name', 'parents.slug as network_slug', 'parents.url as network_url', 'parents.parameters as network_parameters', 'parents.description as network_description') - .whereIn('entities.slug', releasesWithoutSite.map(release => release.channel)); + const channelEntities = await knex('entities') + .select(knex.raw('entities.*, row_to_json(parents) as parent')) + .whereIn('entities.slug', releasesWithoutEntity.map(release => release.channel)) + .where('entities.type', 2) + .leftJoin('entities AS parents', 'parents.id', 'entities.parent_id'); - const channelSitesBySlug = channelSites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {}); + const channelEntitiesBySlug = channelEntities.reduce((acc, entity) => ({ ...acc, [entity.slug]: entity }), {}); - const releasesWithChannelSite = await Promise.all(releases + const releasesWithChannelEntity = await Promise.all(releases .map(async (release) => { - if (release.channel && channelSitesBySlug[release.channel]) { - const curatedSite = await curateSite(channelSitesBySlug[release.channel]); + if (release.channel && channelEntitiesBySlug[release.channel]) { + const curatedEntity = await curateEntity(channelEntitiesBySlug[release.channel]); return { ...release, - site: curatedSite, + entity: curatedEntity, }; } - if (release.site && !release.site.isNetwork) { + if (release.entity) { return release; } - if (release.site && release.site.isNetwork) { - return { - ...release, - site: null, - network: release.site, - }; - } - logger.error(`Unable to match channel '${release.channel?.slug || release.channel}' from generic URL ${release.url}`); return null; })); - return releasesWithChannelSite.filter(Boolean); + return releasesWithChannelEntity.filter(Boolean); } async function attachStudios(releases) { const studioSlugs = releases.map(release => release.studio).filter(Boolean); - const studios = await knex('studios').whereIn('slug', studioSlugs); + const studios = await knex('entities') + .whereIn('slug', studioSlugs) + .where('type', 3); + const studioBySlug = studios.reduce((acc, studio) => ({ ...acc, [studio.slug]: studio }), {}); const releasesWithStudio = releases.map((release) => { @@ -111,7 +107,7 @@ async function attachStudios(releases) { } function attachReleaseIds(releases, storedReleases) { - const storedReleaseIdsBySiteIdAndEntryId = storedReleases.reduce((acc, release) => { + const storedReleaseIdsByEntityIdAndEntryId = storedReleases.reduce((acc, release) => { if (!acc[release.entity_id]) acc[release.entity_id] = {}; acc[release.entity_id][release.entry_id] = release.id; @@ -120,29 +116,29 @@ function attachReleaseIds(releases, storedReleases) { const releasesWithId = releases.map(release => ({ ...release, - id: storedReleaseIdsBySiteIdAndEntryId[release.site.id][release.entryId], + id: storedReleaseIdsByEntityIdAndEntryId[release.entity.id][release.entryId], })); return releasesWithId; } function filterInternalDuplicateReleases(releases) { - const releasesBySiteIdAndEntryId = releases.reduce((acc, release) => { - if (!release.site) { + const releasesByEntityIdAndEntryId = releases.reduce((acc, release) => { + if (!release.entity) { return acc; } - if (!acc[release.site.id]) { - acc[release.site.id] = {}; + if (!acc[release.entity.id]) { + acc[release.entity.id] = {}; } - acc[release.site.id][release.entryId] = release; + acc[release.entity.id][release.entryId] = release; return acc; }, {}); - return Object.values(releasesBySiteIdAndEntryId) - .map(siteReleases => Object.values(siteReleases)) + return Object.values(releasesByEntityIdAndEntryId) + .map(entityReleases => Object.values(entityReleases)) .flat(); } @@ -150,17 +146,17 @@ async function filterDuplicateReleases(releases) { const internalUniqueReleases = filterInternalDuplicateReleases(releases); const duplicateReleaseEntries = await knex('releases') - .whereIn(['entry_id', 'entity_id'], internalUniqueReleases.map(release => [release.entryId, release.site.id])); + .whereIn(['entry_id', 'entity_id'], internalUniqueReleases.map(release => [release.entryId, release.entity.id])); - const duplicateReleasesBySiteIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => { + const duplicateReleasesByEntityIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => { if (!acc[release.entity_id]) acc[release.entity_id] = {}; acc[release.entity_id][release.entry_id] = true; return acc; }, {}); - const duplicateReleases = internalUniqueReleases.filter(release => duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]); - const uniqueReleases = internalUniqueReleases.filter(release => !duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]); + const duplicateReleases = internalUniqueReleases.filter(release => duplicateReleasesByEntityIdAndEntryId[release.entity.id]?.[release.entryId]); + const uniqueReleases = internalUniqueReleases.filter(release => !duplicateReleasesByEntityIdAndEntryId[release.entity.id]?.[release.entryId]); return { uniqueReleases, @@ -216,10 +212,10 @@ async function storeReleases(releases) { const [batchId] = await knex('batches').insert({ comment: null }).returning('id'); - const releasesWithSites = await attachChannelSites(releases); - const releasesWithStudios = await attachStudios(releasesWithSites); + const releasesWithChannels = await attachChannelEntities(releases); + const releasesWithStudios = await attachStudios(releasesWithChannels); - // uniqueness is site ID + entry ID, filter uniques after adding sites + // uniqueness is entity ID + entry ID, filter uniques after adding entities const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios); const curatedNewReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId)); diff --git a/src/tags.js b/src/tags.js index beba770e..2efb56bd 100644 --- a/src/tags.js +++ b/src/tags.js @@ -27,27 +27,27 @@ async function matchReleaseTags(releases) { return tagIdsBySlug; } -async function getSiteTags(releases) { - const siteIds = releases.map(release => release.site.id); - const siteTags = await knex('sites_tags').whereIn('site_id', siteIds); +async function getEntityTags(releases) { + const entityIds = releases.map(release => release.entity.id); + const entityTags = await knex('entities_tags').whereIn('entity_id', entityIds); - const siteTagIdsBySiteId = siteTags.reduce((acc, siteTag) => { - if (!acc[siteTag.site_id]) { - acc[siteTag.site_id] = []; + const entityTagIdsByEntityId = entityTags.reduce((acc, entityTag) => { + if (!acc[entityTag.entity_id]) { + acc[entityTag.entity_id] = []; } - acc[siteTag.site_id].push(siteTag.tag_id); + acc[entityTag.entity_id].push(entityTag.tag_id); return acc; }, {}); - return siteTagIdsBySiteId; + return entityTagIdsByEntityId; } -function buildReleaseTagAssociations(releases, tagIdsBySlug, siteTagIdsBySiteId) { +function buildReleaseTagAssociations(releases, tagIdsBySlug, entityTagIdsByEntityId) { const tagAssociations = releases .map((release) => { - const siteTagIds = siteTagIdsBySiteId[release.site.id]; + const entityTagIds = entityTagIdsByEntityId[release.entity.id]; const releaseTags = release.tags || []; const releaseTagIds = releaseTags.every(tag => typeof tag === 'number') @@ -57,7 +57,7 @@ function buildReleaseTagAssociations(releases, tagIdsBySlug, siteTagIdsBySiteId) const tags = [...new Set( // filter duplicates and empties releaseTagIds - .concat(siteTagIds) + .concat(entityTagIds) .filter(Boolean), )] .map(tagId => ({ @@ -94,9 +94,9 @@ async function filterUniqueAssociations(tagAssociations) { async function associateReleaseTags(releases) { const tagIdsBySlug = await matchReleaseTags(releases); - const siteTagIdsBySiteId = await getSiteTags(releases); + const EntityTagIdsByEntityId = await getEntityTags(releases); - const tagAssociations = buildReleaseTagAssociations(releases, tagIdsBySlug, siteTagIdsBySiteId); + const tagAssociations = buildReleaseTagAssociations(releases, tagIdsBySlug, EntityTagIdsByEntityId); const uniqueAssociations = await filterUniqueAssociations(tagAssociations); await knex('releases_tags').insert(uniqueAssociations); diff --git a/src/updates.js b/src/updates.js index 283aaee7..d2eaa5b1 100644 --- a/src/updates.js +++ b/src/updates.js @@ -196,12 +196,12 @@ async function scrapeSite(site, accSiteReleases) { } } -async function scrapeNetworkSequential(network) { +async function scrapeNetworkSequential(networkEntity) { return Promise.reduce( - network.sites, - async (chain, site) => { + networkEntity.children, + async (chain, siteEntity) => { const accSiteReleases = await chain; - const siteReleases = await scrapeSite(site, network, accSiteReleases); + const siteReleases = await scrapeSite(siteEntity, networkEntity, accSiteReleases); return accSiteReleases.concat(siteReleases); }, @@ -209,10 +209,10 @@ async function scrapeNetworkSequential(network) { ); } -async function scrapeNetworkParallel(network) { +async function scrapeNetworkParallel(networkEntity) { return Promise.map( - network.children, - async site => scrapeSite(site, network), + networkEntity.children, + async siteEntity => scrapeSite(siteEntity, networkEntity), { concurrency: 3 }, ); } diff --git a/src/utils/slugify.js b/src/utils/slugify.js index 329d84c1..46354430 100644 --- a/src/utils/slugify.js +++ b/src/utils/slugify.js @@ -45,7 +45,7 @@ function slugify(string, delimiter = '-', { return string; } - const slugComponents = string.trim().toLowerCase().match(/[A-Za-zÀ-ÖØ-öø-ÿ]+/g); + const slugComponents = string.trim().toLowerCase().match(/[A-Za-zÀ-ÖØ-öø-ÿ0-9]+/g); if (!slugComponents) { return '';