diff --git a/public/img/tags/airtight/11.jpeg b/public/img/tags/airtight/11.jpeg new file mode 100644 index 00000000..25f88c5a Binary files /dev/null and b/public/img/tags/airtight/11.jpeg differ diff --git a/public/img/tags/airtight/lazy/11.jpeg b/public/img/tags/airtight/lazy/11.jpeg new file mode 100644 index 00000000..14357c47 Binary files /dev/null and b/public/img/tags/airtight/lazy/11.jpeg differ diff --git a/public/img/tags/airtight/originals/11.jpeg b/public/img/tags/airtight/originals/11.jpeg new file mode 100644 index 00000000..d2012c2b Binary files /dev/null and b/public/img/tags/airtight/originals/11.jpeg differ diff --git a/public/img/tags/airtight/thumbs/11.jpeg b/public/img/tags/airtight/thumbs/11.jpeg new file mode 100644 index 00000000..bf8bb9fa Binary files /dev/null and b/public/img/tags/airtight/thumbs/11.jpeg differ diff --git a/public/img/tags/pussy-eating/8.jpeg b/public/img/tags/pussy-eating/8.jpeg new file mode 100644 index 00000000..5412c452 Binary files /dev/null and b/public/img/tags/pussy-eating/8.jpeg differ diff --git a/public/img/tags/pussy-eating/lazy/8.jpeg b/public/img/tags/pussy-eating/lazy/8.jpeg new file mode 100644 index 00000000..40f39665 Binary files /dev/null and b/public/img/tags/pussy-eating/lazy/8.jpeg differ diff --git a/public/img/tags/pussy-eating/originals/8.jpeg b/public/img/tags/pussy-eating/originals/8.jpeg new file mode 100644 index 00000000..94ff5472 Binary files /dev/null and b/public/img/tags/pussy-eating/originals/8.jpeg differ diff --git a/public/img/tags/pussy-eating/thumbs/8.jpeg b/public/img/tags/pussy-eating/thumbs/8.jpeg new file mode 100644 index 00000000..fddfca61 Binary files /dev/null and b/public/img/tags/pussy-eating/thumbs/8.jpeg differ diff --git a/seeds/04_media.js b/seeds/04_media.js index aa1a2b83..40e21024 100644 --- a/seeds/04_media.js +++ b/seeds/04_media.js @@ -685,12 +685,13 @@ const tagPhotos = [ ['69', 2, 'Abigail Mac and Kissa Sins in "Lesbian Anal Workout" for HardX'], ['airtight', 7, 'Lana Rhoades in "Gangbang Me 3" for HardX'], ['airtight', 6, 'Remy Lacroix in "Ass Worship 14" for Jules Jordan'], + ['airtight', 11, 'Malena Nazionale in "Rocco\'s Perverted Secretaries 2: Italian Edition" for Rocco Siffredi'], + ['airtight', 1, 'Jynx Maze in "Pump My Ass Full of Cum 3" for Jules Jordan'], ['airtight', 10, 'Asa Akira in "Asa Akira To The Limit" for Jules Jordan'], ['airtight', 8, 'Veronica Leal in LegalPorno SZ2520'], - ['airtight', 5, 'Chloe Amour in "DP Masters 4" for Jules Jordan'], ['airtight', 3, 'Anita Bellini in "Triple Dick Gangbang" for Hands On Hardcore (DDF Network)'], + ['airtight', 5, 'Chloe Amour in "DP Masters 4" for Jules Jordan'], ['airtight', 9, 'Cindy Shine in LegalPorno GP1658'], - ['airtight', 1, 'Jynx Maze in "Pump My Ass Full of Cum 3" for Jules Jordan'], ['atm', 3, 'Natasha Teen in "Work That Ass!" for Her Limit'], ['atm', 0, 'Roxy Lips in "Under Her Coat" for 21 Naturals'], ['atm', 6, 'Jane Wilde in "Teen Anal" for Evil Angel'], @@ -873,10 +874,11 @@ const tagPhotos = [ ['orgy', 'poster', 'Zoey Mornoe (DP), Jillian Janson (sex), Frida Sante, Katerina Kay and Natasha Starr in "Orgy Masters 6" for Jules Jordan'], ['pussy-eating', 4, 'Anastasia Knight and Jillian Janson in "Teach Me" for Screwbox'], ['pussy-eating', 7, 'Jewelz Blu and Katie Kush in "Pick Your Pleasure" for Reality Kings'], - ['pussy-eating', 6, 'Abella Danger and Karma Rx in "Neon Dreaming" for Brazzers'], + ['pussy-eating', 8, 'Sia Lust and Lacey London in "Naughty Gamer Girls" for Girls Gone Pink'], ['pussy-eating', 0, 'Kali Roses and Emily Willis\' pussy in "Peeping On My Neighbor" for Girl Girl'], ['pussy-eating', 2, 'Anikka Albrite and Mia Malkova in "Big Anal Bombshells" for LesbianX'], ['pussy-eating', 3, 'Kylie Page and Kalina Ryu in "Training My Masseuse" for All Girl Massage'], + ['pussy-eating', 6, 'Abella Danger and Karma Rx in "Neon Dreaming" for Brazzers'], ['pussy-eating', 1, 'Anikka Albrite and Riley Reid for In The Crack'], ['redhead', 0, 'Penny Pax in "The Submission of Emma Marx: Boundaries" for New Sensations'], ['schoolgirl', 1, 'Eliza Ibarra for Brazzers'], diff --git a/src/deep.js b/src/deep.js index e0246b4c..ce25f9ed 100644 --- a/src/deep.js +++ b/src/deep.js @@ -5,49 +5,11 @@ const merge = require('object-merge-advanced'); const argv = require('./argv'); const include = require('./utils/argv-include')(argv); +const { fetchReleaseEntities, urlToSiteSlug } = require('./entities'); const logger = require('./logger')(__filename); -const knex = require('./knex'); const qu = require('./utils/qu'); const scrapers = require('./scrapers/scrapers'); -function urlToSiteSlug(url) { - try { - const slug = new URL(url) - .hostname - .match(/([\w-]+)\.\w+$/)?.[1] - .replace(/[-_]+/g, ''); - - return slug; - } catch (error) { - logger.warn(`Failed to derive entity slug from '${url}': ${error.message}`); - - return null; - } -} - -async function findEntities(baseReleases) { - const baseReleasesWithoutEntity = baseReleases.filter(release => release.url && !release.site && !release.entity); - - const entitySlugs = Array.from(new Set( - baseReleasesWithoutEntity - .map(baseRelease => urlToSiteSlug(baseRelease.url)) - .filter(Boolean), - )); - - const entities = await knex('entities') - .select(knex.raw('entities.*, row_to_json(parents) as parent, json_agg(children) as children')) - .leftJoin('entities as parents', 'parents.id', 'entities.parent_id') - .leftJoin('entities as children', 'children.parent_id', 'entities.id') - .whereIn('entities.slug', entitySlugs) - .groupBy('entities.id', 'parents.id') - .orderBy('entities.type', 'asc'); - - // channel entity will overwrite network entity - const entitiesBySlug = entities.reduce((accEntities, entity) => ({ ...accEntities, [entity.slug]: accEntities[entity.slug] || entity }), {}); - - return entitiesBySlug; -} - function toBaseReleases(baseReleasesOrUrls, entity = null) { if (!baseReleasesOrUrls) { return []; @@ -106,8 +68,32 @@ async function fetchScene(scraper, url, entity, baseRelease, options) { return null; } -async function scrapeRelease(baseRelease, entities, type = 'scene') { - const entity = baseRelease.entity || entities[urlToSiteSlug(baseRelease.url)]; +function findScraper(entity) { + if (scrapers.releases[entity.slug]) { + return scrapers.releases[entity.slug]; + } + + if (entity.parent) { + return findScraper(entity.parent); + } + + return null; +} + +function findLayoutScraper(entity, scraper) { + if (scraper?.[entity.parameters?.layout]) { + return scraper[entity.parameters.layout]; + } + + if (entity.parent) { + return findLayoutScraper(entity.parent, scraper); + } + + return scraper; +} + +async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') { + const entity = baseRelease.entity || entitiesBySlug[urlToSiteSlug(baseRelease.url)]; if (!entity) { logger.warn(`No entity available for ${baseRelease.url}`); @@ -121,8 +107,8 @@ async function scrapeRelease(baseRelease, entities, type = 'scene') { }; } - const scraper = scrapers.releases[entity.slug] || scrapers.releases[entity.parent?.slug] || scrapers.releases[entity.parent?.parent?.slug]; - const layoutScraper = scraper?.[entity.parameters?.layout] || scraper?.[entity.parent?.parameters?.layout] || scraper?.[entity.parent?.parent?.parameters?.layout] || scraper; + const scraper = findScraper(entity); + const layoutScraper = findLayoutScraper(entity, scraper); if (!layoutScraper) { logger.warn(`Could not find scraper for ${baseRelease.url}`); @@ -184,19 +170,19 @@ async function scrapeRelease(baseRelease, entities, type = 'scene') { } } -async function scrapeReleases(baseReleases, entities, type) { +async function scrapeReleases(baseReleases, entitiesBySlug, type) { return Promise.map( baseReleases, - async baseRelease => scrapeRelease(baseRelease, entities, type), + async baseRelease => scrapeRelease(baseRelease, entitiesBySlug, type), { concurrency: 10 }, ); } async function fetchReleases(baseReleasesOrUrls, type = 'scene') { const baseReleases = toBaseReleases(baseReleasesOrUrls); - const entities = await findEntities(baseReleases); + const entitiesBySlug = await fetchReleaseEntities(baseReleases); - const deepReleases = await scrapeReleases(baseReleases, entities, type); + const deepReleases = await scrapeReleases(baseReleases, entitiesBySlug, type); return deepReleases.filter(Boolean); } diff --git a/src/entities.js b/src/entities.js index 03e3d661..a92b2d84 100644 --- a/src/entities.js +++ b/src/entities.js @@ -66,6 +66,21 @@ async function curateEntities(entities, includeParameters) { return Promise.all(entities.map(async entity => curateEntity(entity, includeParameters))); } +function urlToSiteSlug(url) { + try { + const slug = new URL(url) + .hostname + .match(/([\w-]+)\.\w+$/)?.[1] + .replace(/[-_]+/g, ''); + + return slug; + } catch (error) { + logger.warn(`Failed to derive entity slug from '${url}': ${error.message}`); + + return null; + } +} + async function fetchIncludedEntities() { const include = { includeAll: !argv.networks && !argv.channels && !config.include?.networks && !config.include?.channels, @@ -139,6 +154,46 @@ async function fetchIncludedEntities() { return curatedNetworks; } +async function fetchReleaseEntities(baseReleases) { + const baseReleasesWithoutEntity = baseReleases.filter(release => release.url && !release.site && !release.entity); + + const entitySlugs = Array.from(new Set( + baseReleasesWithoutEntity + .map(baseRelease => urlToSiteSlug(baseRelease.url)) + .filter(Boolean), + )); + + const entities = await knex.raw(` + WITH RECURSIVE tree as ( + SELECT to_jsonb(entities) as entity, + parent_id, + array['parent'] as parent_path, + 0 as depth + FROM entities + WHERE slug = ANY(:entitySlugs) + + UNION ALL + + SELECT jsonb_set(tree.entity, tree.parent_path, to_jsonb(entities)), + entities.parent_id, + tree.parent_path || array['parent'], + depth + 1 + FROM tree + JOIN entities ON tree.parent_id = entities.id + ) + SELECT entity FROM tree WHERE parent_id is null + ORDER BY entity->'type' ASC; + `, { entitySlugs }); + + // channel entity will overwrite network entity + const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => ({ + ...accEntities, + [entity.slug]: accEntities[entity.slug] || curateEntity(entity, true), + }), {}); + + return entitiesBySlug; +} + async function fetchEntity(entityId, type) { const entity = await knex('entities') .select(knex.raw(` @@ -290,8 +345,10 @@ module.exports = { curateEntity, curateEntities, fetchIncludedEntities, + fetchReleaseEntities, fetchEntity, fetchEntities, searchEntities, flushEntities, + urlToSiteSlug, }; diff --git a/src/store-releases.js b/src/store-releases.js index 07ab75ae..e76f3a82 100644 --- a/src/store-releases.js +++ b/src/store-releases.js @@ -164,7 +164,6 @@ function attachReleaseIds(releases, storedReleases) { function filterInternalDuplicateReleases(releases) { const releasesByEntityIdAndEntryId = releases.reduce((acc, release) => { - console.log(release); if (!release.entity) { return acc; }