Matching URLs to entity using hostname rather than slug to minimize collisions. Fixed missing Cum Louder POV logo.

This commit is contained in:
DebaucheryLibrarian 2023-06-04 21:50:59 +02:00
parent 7e2840a00d
commit 164757ee26
55 changed files with 67 additions and 24 deletions

View File

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

BIN
public/img/logos/cumlouder/lazy/bitchconfessions.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.9 KiB

After

Width:  |  Height:  |  Size: 3.9 KiB

BIN
public/img/logos/cumlouder/lazy/boldlygirls.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.7 KiB

After

Width:  |  Height:  |  Size: 2.8 KiB

BIN
public/img/logos/cumlouder/lazy/boobday.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.4 KiB

After

Width:  |  Height:  |  Size: 2.5 KiB

BIN
public/img/logos/cumlouder/lazy/breakingasses.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.2 KiB

After

Width:  |  Height:  |  Size: 7.2 KiB

BIN
public/img/logos/cumlouder/lazy/cocksxl.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.8 KiB

After

Width:  |  Height:  |  Size: 6.8 KiB

BIN
public/img/logos/cumlouder/lazy/cumextreme.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

BIN
public/img/logos/cumlouder/lazy/cumlouder.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.2 KiB

After

Width:  |  Height:  |  Size: 5.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 KiB

BIN
public/img/logos/cumlouder/lazy/cumtrick.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.0 KiB

After

Width:  |  Height:  |  Size: 3.1 KiB

BIN
public/img/logos/cumlouder/lazy/exgirlfriends.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.9 KiB

After

Width:  |  Height:  |  Size: 2.9 KiB

BIN
public/img/logos/cumlouder/lazy/favicon.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

BIN
public/img/logos/cumlouder/lazy/favicon_dark.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

BIN
public/img/logos/cumlouder/lazy/favicon_light.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

BIN
public/img/logos/cumlouder/lazy/fuckinvan.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.2 KiB

After

Width:  |  Height:  |  Size: 7.2 KiB

BIN
public/img/logos/cumlouder/lazy/givemespunk.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.9 KiB

After

Width:  |  Height:  |  Size: 3.9 KiB

BIN
public/img/logos/cumlouder/lazy/handjobgoddess.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.8 KiB

After

Width:  |  Height:  |  Size: 4.8 KiB

BIN
public/img/logos/cumlouder/lazy/hungrycumeaters.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.0 KiB

After

Width:  |  Height:  |  Size: 4.1 KiB

BIN
public/img/logos/cumlouder/lazy/livingwithapornstar.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.6 KiB

After

Width:  |  Height:  |  Size: 5.7 KiB

BIN
public/img/logos/cumlouder/lazy/livingwithleyla.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.9 KiB

After

Width:  |  Height:  |  Size: 6.9 KiB

BIN
public/img/logos/cumlouder/lazy/network.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.2 KiB

After

Width:  |  Height:  |  Size: 5.3 KiB

BIN
public/img/logos/cumlouder/lazy/pornstarfisher.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.5 KiB

After

Width:  |  Height:  |  Size: 5.5 KiB

BIN
public/img/logos/cumlouder/lazy/readyornothereicum.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.4 KiB

After

Width:  |  Height:  |  Size: 4.5 KiB

BIN
public/img/logos/cumlouder/lazy/spoofporn.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.3 KiB

After

Width:  |  Height:  |  Size: 5.4 KiB

BIN
public/img/logos/cumlouder/lazy/streetsuckers.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.2 KiB

After

Width:  |  Height:  |  Size: 5.2 KiB

BIN
public/img/logos/cumlouder/lazy/stunningbutts.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

BIN
public/img/logos/cumlouder/thumbs/bitchconfessions.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 19 KiB

BIN
public/img/logos/cumlouder/thumbs/boldlygirls.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 15 KiB

BIN
public/img/logos/cumlouder/thumbs/boobday.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 16 KiB

BIN
public/img/logos/cumlouder/thumbs/breakingasses.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

BIN
public/img/logos/cumlouder/thumbs/cocksxl.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

After

Width:  |  Height:  |  Size: 12 KiB

BIN
public/img/logos/cumlouder/thumbs/cumextreme.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

After

Width:  |  Height:  |  Size: 27 KiB

BIN
public/img/logos/cumlouder/thumbs/cumlouder.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.9 KiB

After

Width:  |  Height:  |  Size: 9.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

BIN
public/img/logos/cumlouder/thumbs/cumtrick.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 19 KiB

BIN
public/img/logos/cumlouder/thumbs/exgirlfriends.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

BIN
public/img/logos/cumlouder/thumbs/favicon.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

BIN
public/img/logos/cumlouder/thumbs/favicon_dark.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

BIN
public/img/logos/cumlouder/thumbs/favicon_light.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

BIN
public/img/logos/cumlouder/thumbs/fuckinvan.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.0 KiB

After

Width:  |  Height:  |  Size: 9.0 KiB

BIN
public/img/logos/cumlouder/thumbs/givemespunk.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 19 KiB

BIN
public/img/logos/cumlouder/thumbs/handjobgoddess.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

After

Width:  |  Height:  |  Size: 13 KiB

BIN
public/img/logos/cumlouder/thumbs/hungrycumeaters.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 18 KiB

BIN
public/img/logos/cumlouder/thumbs/livingwithapornstar.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

BIN
public/img/logos/cumlouder/thumbs/livingwithleyla.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

After

Width:  |  Height:  |  Size: 12 KiB

BIN
public/img/logos/cumlouder/thumbs/network.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.9 KiB

After

Width:  |  Height:  |  Size: 9.0 KiB

BIN
public/img/logos/cumlouder/thumbs/pornstarfisher.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 18 KiB

BIN
public/img/logos/cumlouder/thumbs/readyornothereicum.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.9 KiB

After

Width:  |  Height:  |  Size: 10 KiB

BIN
public/img/logos/cumlouder/thumbs/spoofporn.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 16 KiB

BIN
public/img/logos/cumlouder/thumbs/streetsuckers.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 10 KiB

After

Width:  |  Height:  |  Size: 10 KiB

BIN
public/img/logos/cumlouder/thumbs/stunningbutts.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

View File

@ -7,7 +7,7 @@ const { mergeAdvanced: merge } = require('object-merge-advanced');
const argv = require('./argv'); const argv = require('./argv');
const include = require('./utils/argv-include')(argv); const include = require('./utils/argv-include')(argv);
const { fetchReleaseEntities, urlToSiteSlug } = require('./entities'); const { fetchReleaseEntities, urlToHostname } = require('./entities');
const logger = require('./logger')(__filename); const logger = require('./logger')(__filename);
const qu = require('./utils/qu'); const qu = require('./utils/qu');
const getRecursiveParameters = require('./utils/get-recursive-parameters'); const getRecursiveParameters = require('./utils/get-recursive-parameters');
@ -110,8 +110,11 @@ function fetchMovie(scraper, url, entity, baseRelease, options) {
return fetchScene(scraper, url, entity, baseRelease, options, 'movie'); return fetchScene(scraper, url, entity, baseRelease, options, 'movie');
} }
async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') { async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') {
const entity = baseRelease.entity || entitiesBySlug[urlToSiteSlug(baseRelease.url)]; const entity = baseRelease.entity || entitiesByHostname[urlToHostname(baseRelease.url)];
console.log(entitiesByHostname);
console.log(entity);
if (!entity) { if (!entity) {
logger.warn(`No entity available for ${baseRelease.url}`); logger.warn(`No entity available for ${baseRelease.url}`);
@ -222,10 +225,8 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
} }
} }
async function scrapeReleases(baseReleases, entitiesBySlug, type) { async function scrapeReleases(baseReleases, entitiesByHostname, type) {
const entitiesWithBeforeDataEntries = await Promise.all(Object.entries(entitiesBySlug).map(async ([slug, entity]) => { const entitiesWithBeforeDataEntries = await Promise.all(Object.entries(entitiesByHostname).map(async ([slug, entity]) => {
console.log('scraper', entity.scraper?.beforeFetchScenes);
if (entity.scraper?.beforeFetchScenes) { if (entity.scraper?.beforeFetchScenes) {
const parameters = getRecursiveParameters(entity); const parameters = getRecursiveParameters(entity);
const preData = await entity.scraper.beforeFetchScenes(entity, parameters); const preData = await entity.scraper.beforeFetchScenes(entity, parameters);
@ -249,9 +250,9 @@ async function scrapeReleases(baseReleases, entitiesBySlug, type) {
async function fetchReleases(baseReleasesOrUrls, type = 'scene') { async function fetchReleases(baseReleasesOrUrls, type = 'scene') {
const baseReleases = toBaseReleases(baseReleasesOrUrls); const baseReleases = toBaseReleases(baseReleasesOrUrls);
const entitiesBySlug = await fetchReleaseEntities(baseReleases); const entitiesByHostname = await fetchReleaseEntities(baseReleases);
const deepReleases = await scrapeReleases(baseReleases, entitiesBySlug, type); const deepReleases = await scrapeReleases(baseReleases, entitiesByHostname, type);
return deepReleases.filter(Boolean); return deepReleases.filter(Boolean);
} }

View File

@ -82,6 +82,7 @@ async function curateEntities(entities, includeParameters) {
return Promise.all(entities.map(async (entity) => curateEntity(entity, includeParameters))); return Promise.all(entities.map(async (entity) => curateEntity(entity, includeParameters)));
} }
/* obsolete in favor of urlToHostname
function urlToSiteSlug(url) { function urlToSiteSlug(url) {
try { try {
const slug = new URL(url) const slug = new URL(url)
@ -96,6 +97,21 @@ function urlToSiteSlug(url) {
return null; return null;
} }
} }
*/
function urlToHostname(url) {
try {
const hostname = new URL(url)
.hostname
.match(/(www\.)(.*)/)?.at(-1);
return hostname;
} catch (error) {
logger.warn(`Failed to derive entity hostname from '${url}': ${error.message}`);
return null;
}
}
async function fetchIncludedEntities() { async function fetchIncludedEntities() {
const include = { const include = {
@ -191,6 +207,7 @@ async function fetchEntitiesBySlug(entitySlugs, sort = 'asc') {
array['parent'] as parent_path array['parent'] as parent_path
FROM entities FROM entities
WHERE slug = ANY(:entitySlugs) WHERE slug = ANY(:entitySlugs)
OR url ILIKE ANY(:entityHosts)
UNION ALL UNION ALL
@ -215,14 +232,23 @@ async function fetchEntitiesBySlug(entitySlugs, sort = 'asc') {
WHERE entity_tree.parent_id IS NULL WHERE entity_tree.parent_id IS NULL
GROUP BY entity_tree.entity GROUP BY entity_tree.entity
ORDER BY entity->'type' :sort; ORDER BY entity->'type' :sort;
`, { entitySlugs, sort: knex.raw(sort) }); `, {
entitySlugs: entitySlugs.filter((slug) => !slug.includes('.')),
entityHosts: entitySlugs.filter((slug) => slug.includes('.')).map((hostname) => `%${hostname}%`),
sort: knex.raw(sort),
});
// channel entity will overwrite network entity // channel entity will overwrite network entity
const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => ({ const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => {
...accEntities, const host = urlToHostname(entity.url);
[entity.slug]: accEntities[entity.slug] || curateEntity(entity, true), const curatedEntity = accEntities[entity.slug] || accEntities[host] || curateEntity(entity, true);
[urlToSiteSlug(entity.url)]: accEntities[urlToSiteSlug(entity.url)] || curateEntity(entity, true),
}), {}); return {
...accEntities,
[entity.slug]: curatedEntity,
[host]: curatedEntity,
};
}, {});
return entitiesBySlug; return entitiesBySlug;
} }
@ -232,7 +258,7 @@ async function fetchReleaseEntities(baseReleases) {
const entitySlugs = Array.from(new Set( const entitySlugs = Array.from(new Set(
baseReleasesWithoutEntity baseReleasesWithoutEntity
.map((baseRelease) => urlToSiteSlug(baseRelease.url)) .map((baseRelease) => urlToHostname(baseRelease.url))
.filter(Boolean), .filter(Boolean),
)); ));
@ -409,5 +435,6 @@ module.exports = {
getRecursiveParent, getRecursiveParent,
searchEntities, searchEntities,
flushEntities, flushEntities,
urlToSiteSlug, urlToHostname,
// urlToSiteSlug,
}; };

View File

@ -22,6 +22,8 @@ function scrapeBlockLatest(scenes) {
release.teaser = qu.video(); release.teaser = qu.video();
console.log(release);
return release; return release;
}); });
} }
@ -57,6 +59,15 @@ function scrapeClassicLatest(scenes) {
}); });
} }
function scrapeScene({ query }) {
const release = {};
release.title = query.content('.indScene h2');
console.log(release);
return release;
}
async function fetchLatest(site, page = 1) { async function fetchLatest(site, page = 1) {
if (!site.parameters) { if (!site.parameters) {
return null; return null;
@ -74,4 +85,6 @@ async function fetchLatest(site, page = 1) {
module.exports = { module.exports = {
fetchLatest, fetchLatest,
scrapeScene,
useUnprint: true,
}; };

View File

@ -481,10 +481,14 @@ async function addReleaseMedia(medias, release, target) {
await transferMedia(media, target); await transferMedia(media, target);
} }
await knex(`${release.type}s_${target}`).insert({ try {
[`${release.type}_id`]: release.id, await knex(`${release.type}s_${target}`).insert({
media_id: id, [`${release.type}_id`]: release.id,
}); media_id: id,
});
} catch (error) {
console.warn(`Ignored duplicate ${release.type} ${target} association ${media.hash} with ${release.id} "${release.title}"`);
}
}, Promise.resolve()); }, Promise.resolve());
} }
@ -496,8 +500,6 @@ async function linkMovieScenes(release, context) {
&& storedMovie.entity.slug === linkedMovie.entity.slug && storedMovie.entity.slug === linkedMovie.entity.slug
&& storedMovie.entity.type === linkedMovie.entity.type); && storedMovie.entity.type === linkedMovie.entity.type);
console.log('movie', linkedMovie, movie);
if (!movie) { if (!movie) {
throw new Error(`Missing ${linkedMovie.entity.slug} movie '${linkedMovie.title}' in '${release.title}'`); throw new Error(`Missing ${linkedMovie.entity.slug} movie '${linkedMovie.title}' in '${release.title}'`);
} }
@ -534,7 +536,7 @@ async function addRelease(release, context) {
]); ]);
if (!entity) { if (!entity) {
throw new Error(`Release contains non-existent ${release.entity.type} '${release.entity.slug}'`); throw new Error(`Release "${release.title}" contains non-existent ${release.entity.type} '${release.entity.slug}'`);
} }
const [releaseEntry] = await knex(`${release.type}s`) const [releaseEntry] = await knex(`${release.type}s`)