Added WIP media module. Returning releases from release search database function. Fixed page loop in update module.
This commit is contained in:
@@ -6,8 +6,7 @@ const initServer = require('./web/server');
|
||||
const knex = require('./knex');
|
||||
const fetchUpdates = require('./updates');
|
||||
const { fetchScenes, fetchMovies } = require('./deep');
|
||||
const { storeReleases } = require('./store-releases');
|
||||
const { updateReleasesSearch } = require('./releases');
|
||||
const { storeReleases, updateReleasesSearch } = require('./store-releases');
|
||||
const { scrapeActors } = require('./actors-legacy');
|
||||
|
||||
async function init() {
|
||||
@@ -38,8 +37,6 @@ async function init() {
|
||||
...(deepScenes || []),
|
||||
...(deepMovies || []),
|
||||
]);
|
||||
|
||||
// await storeReleaseActors(updateReleases);
|
||||
}
|
||||
|
||||
knex.destroy();
|
||||
|
||||
@@ -329,8 +329,6 @@ async function storeMedia(sources, domain, role, { entropyFilter = 2.5 } = {}) {
|
||||
return {};
|
||||
}
|
||||
|
||||
console.log(presentSources, presentSources.length);
|
||||
|
||||
// split up source list to prevent excessive RAM usage
|
||||
const itemChunksBySource = await Promise.all(chunk(presentSources, 50).map(async (sourceChunk, index) => {
|
||||
try {
|
||||
|
||||
165
src/media.js
165
src/media.js
@@ -1,9 +1,170 @@
|
||||
'use strict';
|
||||
|
||||
function toBaseAvatars() {
|
||||
const Promise = require('bluebird');
|
||||
const nanoid = require('nanoid/non-secure');
|
||||
|
||||
const logger = require('./logger')(__filename);
|
||||
const argv = require('./argv');
|
||||
const knex = require('./knex');
|
||||
const { get } = require('./utils/qu');
|
||||
|
||||
function itemsByKey(items, key) {
|
||||
return items.reduce((acc, item) => ({ ...acc, [item[key]]: item }), {});
|
||||
}
|
||||
|
||||
function toBaseSource(rawSource) {
|
||||
if (rawSource.src || (rawSource.extract && rawSource.url)) {
|
||||
const baseSource = {};
|
||||
|
||||
if (rawSource.src) baseSource.src = rawSource.src;
|
||||
if (rawSource.quality) baseSource.quality = rawSource.quality;
|
||||
if (rawSource.type) baseSource.type = rawSource.type;
|
||||
|
||||
if (rawSource.url) baseSource.url = rawSource.url;
|
||||
if (rawSource.extract) baseSource.extract = rawSource.extract;
|
||||
|
||||
return baseSource;
|
||||
}
|
||||
|
||||
if (typeof rawSource === 'string') {
|
||||
return {
|
||||
src: rawSource,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function baseSourceToBaseMedia(baseSource) {
|
||||
if (Array.isArray(baseSource)) {
|
||||
if (baseSource.length > 0) {
|
||||
return {
|
||||
id: nanoid(),
|
||||
sources: baseSource,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
if (baseSource) {
|
||||
return {
|
||||
id: nanoid(),
|
||||
sources: [baseSource],
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function fallbackMediaToBaseMedia(rawMedia) {
|
||||
const baseSources = rawMedia
|
||||
.map(source => toBaseSource(source))
|
||||
.filter(Boolean);
|
||||
|
||||
return baseSourceToBaseMedia(baseSources);
|
||||
}
|
||||
|
||||
function toBaseMedias(rawMedias) {
|
||||
if (!rawMedias || rawMedias.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return rawMedias.map((rawMedia) => {
|
||||
if (!rawMedia) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (Array.isArray(rawMedia)) {
|
||||
// fallback sources provided
|
||||
return fallbackMediaToBaseMedia(rawMedia);
|
||||
}
|
||||
|
||||
const baseSource = toBaseSource(rawMedia);
|
||||
|
||||
return baseSourceToBaseMedia(baseSource);
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
async function findSourceDuplicates(baseMedias) {
|
||||
const sourceUrls = baseMedias
|
||||
.map(baseMedia => baseMedia.sources.map(source => source.src))
|
||||
.flat()
|
||||
.filter(Boolean);
|
||||
|
||||
const extractUrls = baseMedias
|
||||
.map(baseMedia => baseMedia.sources.map(source => source.url))
|
||||
.flat()
|
||||
.filter(Boolean);
|
||||
|
||||
|
||||
const [existingSourceMedia, existingExtractMedia] = await Promise.all([
|
||||
knex('media').whereIn('source', sourceUrls),
|
||||
knex('media').whereIn('source_page', extractUrls),
|
||||
]);
|
||||
|
||||
const existingSourceMediaByUrl = itemsByKey(existingSourceMedia, 'source');
|
||||
const existingExtractMediaByUrl = itemsByKey(existingExtractMedia, 'source_page');
|
||||
|
||||
return {
|
||||
existingSourceMediaByUrl,
|
||||
existingExtractMediaByUrl,
|
||||
};
|
||||
}
|
||||
|
||||
async function extractSource(baseSource) {
|
||||
if (!baseSource.extract || !baseSource.url) {
|
||||
return baseSource;
|
||||
}
|
||||
|
||||
const res = await get(baseSource.url);
|
||||
|
||||
console.log(res);
|
||||
return baseSource;
|
||||
}
|
||||
|
||||
async function fetchSource(baseSource, { existingSourceMediaByUrl, existingExtractMediaByUrl }) {
|
||||
// attempts
|
||||
// extract
|
||||
const extractedSource = await extractSource(baseSource, existingExtractMediaByUrl);
|
||||
|
||||
console.log(extractedSource);
|
||||
}
|
||||
|
||||
async function fetchMedia(baseMedia, existingMedias) {
|
||||
await baseMedia.sources.reduce((result, baseSource, _baseSourceIndex) => result.catch(async () => {
|
||||
await fetchSource(baseSource, existingMedias);
|
||||
}), Promise.reject(new Error()));
|
||||
}
|
||||
|
||||
async function storeMedias(baseMedias) {
|
||||
const { existingSourceMediaByUrl, existingExtractMediaByUrl } = await findSourceDuplicates(baseMedias);
|
||||
|
||||
await Promise.map(baseMedias, async baseMedia => fetchMedia(baseMedia, { existingSourceMediaByUrl, existingExtractMediaByUrl }));
|
||||
|
||||
console.log(existingSourceMediaByUrl, existingExtractMediaByUrl);
|
||||
}
|
||||
|
||||
async function associateReleaseMedia(releases) {
|
||||
if (!argv.media) {
|
||||
return;
|
||||
}
|
||||
|
||||
const baseMediasByReleaseId = releases.reduce((acc, release) => ({
|
||||
...acc,
|
||||
[release.id]: {
|
||||
poster: argv.images && argv.poster && toBaseMedias([release.poster]),
|
||||
photos: argv.images && argv.photos && toBaseMedias(release.photos),
|
||||
trailer: argv.videos && argv.trailer && toBaseMedias([release.trailer]),
|
||||
teaser: argv.videos && argv.teaser && toBaseMedias([release.teaser]),
|
||||
},
|
||||
}), {});
|
||||
|
||||
const baseMedias = Object.values(baseMediasByReleaseId).map(releaseMedia => Object.values(releaseMedia)).flat(2);
|
||||
|
||||
await storeMedias(baseMedias);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
associateReleaseMedia,
|
||||
};
|
||||
|
||||
@@ -1,48 +1,20 @@
|
||||
'use strict';
|
||||
|
||||
const logger = require('./logger')(__filename);
|
||||
const knex = require('./knex');
|
||||
|
||||
async function updateReleasesSearch(releaseIds) {
|
||||
logger.info(`Updating search documents for ${releaseIds ? releaseIds.length : 'all' } releases`);
|
||||
async function fetchReleases(limit = 100) {
|
||||
const releases = await knex('releases').limit(limit);
|
||||
|
||||
const documents = await knex.raw(`
|
||||
SELECT
|
||||
releases.id AS release_id,
|
||||
TO_TSVECTOR(
|
||||
'traxxx',
|
||||
releases.title || ' ' ||
|
||||
networks.name || ' ' ||
|
||||
networks.slug || ' ' ||
|
||||
networks.url || ' ' ||
|
||||
sites.name || ' ' ||
|
||||
sites.slug || ' ' ||
|
||||
COALESCE(sites.url, '') || ' ' ||
|
||||
COALESCE(sites.alias, '') || ' ' ||
|
||||
COALESCE(releases.shoot_id, '') || ' ' ||
|
||||
COALESCE(TO_CHAR(releases.date, 'YYYY YY MM FMMM FMmonth mon DD FMDD'), '') || ' ' ||
|
||||
STRING_AGG(COALESCE(actors.name, ''), ' ') || ' ' ||
|
||||
STRING_AGG(COALESCE(tags.name, ''), ' ') || ' ' ||
|
||||
STRING_AGG(COALESCE(tags_aliases.name, ''), ' ')
|
||||
) as document
|
||||
FROM releases
|
||||
LEFT JOIN sites ON releases.site_id = sites.id
|
||||
LEFT JOIN networks ON sites.network_id = networks.id
|
||||
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
|
||||
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
|
||||
LEFT JOIN actors ON local_actors.actor_id = actors.id
|
||||
LEFT JOIN tags ON local_tags.tag_id = tags.id
|
||||
LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for
|
||||
${releaseIds ? 'WHERE releases.id = ANY(?)' : ''}
|
||||
GROUP BY releases.id, sites.name, sites.slug, sites.alias, sites.url, networks.name, networks.slug, networks.url;
|
||||
`, releaseIds && [releaseIds]);
|
||||
return releases;
|
||||
}
|
||||
|
||||
if (documents.rows?.length > 0) {
|
||||
const query = knex('releases_search').insert(documents.rows).toString();
|
||||
await knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
|
||||
}
|
||||
async function searchReleases(query, limit = 100) {
|
||||
const releases = await knex.raw('SELECT * FROM search_releases(?) LIMIT ?;', [query, limit]);
|
||||
|
||||
return releases.rows;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
updateReleasesSearch,
|
||||
fetchReleases,
|
||||
searchReleases,
|
||||
};
|
||||
|
||||
@@ -18,7 +18,7 @@ async function getPhotos(albumUrl) {
|
||||
const pageUrl = `https://blacksonblondes.com${lastPhotoPage.replace(/\d+.jpg/, `${(index + 1).toString().padStart(3, '0')}.jpg`)}`;
|
||||
|
||||
return {
|
||||
src: pageUrl,
|
||||
url: pageUrl,
|
||||
extract: q => q('.scenes-module img', 'src'),
|
||||
};
|
||||
});
|
||||
|
||||
@@ -5,8 +5,9 @@ const config = require('config');
|
||||
const logger = require('./logger')(__filename);
|
||||
const knex = require('./knex');
|
||||
const slugify = require('./utils/slugify');
|
||||
const { associateTags } = require('./tags');
|
||||
const { associateActors } = require('./actors');
|
||||
const { associateReleaseTags } = require('./tags');
|
||||
const { associateReleaseMedia } = require('./media');
|
||||
|
||||
function curateReleaseEntry(release, batchId, existingRelease) {
|
||||
const slug = slugify(release.title, '-', {
|
||||
@@ -149,6 +150,46 @@ async function filterDuplicateReleases(releases) {
|
||||
};
|
||||
}
|
||||
|
||||
async function updateReleasesSearch(releaseIds) {
|
||||
logger.info(`Updating search documents for ${releaseIds ? releaseIds.length : 'all' } releases`);
|
||||
|
||||
const documents = await knex.raw(`
|
||||
SELECT
|
||||
releases.id AS release_id,
|
||||
TO_TSVECTOR(
|
||||
'traxxx',
|
||||
releases.title || ' ' ||
|
||||
networks.name || ' ' ||
|
||||
networks.slug || ' ' ||
|
||||
networks.url || ' ' ||
|
||||
sites.name || ' ' ||
|
||||
sites.slug || ' ' ||
|
||||
COALESCE(sites.url, '') || ' ' ||
|
||||
COALESCE(sites.alias, '') || ' ' ||
|
||||
COALESCE(releases.shoot_id, '') || ' ' ||
|
||||
COALESCE(TO_CHAR(releases.date, 'YYYY YY MM FMMM FMmonth mon DD FMDD'), '') || ' ' ||
|
||||
STRING_AGG(COALESCE(actors.name, ''), ' ') || ' ' ||
|
||||
STRING_AGG(COALESCE(tags.name, ''), ' ') || ' ' ||
|
||||
STRING_AGG(COALESCE(tags_aliases.name, ''), ' ')
|
||||
) as document
|
||||
FROM releases
|
||||
LEFT JOIN sites ON releases.site_id = sites.id
|
||||
LEFT JOIN networks ON sites.network_id = networks.id
|
||||
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
|
||||
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
|
||||
LEFT JOIN actors ON local_actors.actor_id = actors.id
|
||||
LEFT JOIN tags ON local_tags.tag_id = tags.id AND tags.priority >= 7
|
||||
LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true
|
||||
${releaseIds ? 'WHERE releases.id = ANY(?)' : ''}
|
||||
GROUP BY releases.id, sites.name, sites.slug, sites.alias, sites.url, networks.name, networks.slug, networks.url;
|
||||
`, releaseIds && [releaseIds]);
|
||||
|
||||
if (documents.rows?.length > 0) {
|
||||
const query = knex('releases_search').insert(documents.rows).toString();
|
||||
await knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
|
||||
}
|
||||
}
|
||||
|
||||
async function storeReleases(releases) {
|
||||
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
|
||||
|
||||
@@ -169,15 +210,19 @@ async function storeReleases(releases) {
|
||||
const releasesWithId = attachReleaseIds([].concat(uniqueReleases, duplicateReleases), [].concat(storedReleaseEntries, duplicateReleaseEntries));
|
||||
|
||||
await Promise.all([
|
||||
associateTags(releasesWithId),
|
||||
associateActors(releasesWithId),
|
||||
associateReleaseTags(releasesWithId),
|
||||
associateReleaseMedia(releasesWithId),
|
||||
]);
|
||||
|
||||
logger.info(`Stored ${storedReleaseEntries.length} releases`);
|
||||
|
||||
await updateReleasesSearch(releasesWithId.map(release => release.id));
|
||||
|
||||
return releasesWithId;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
storeReleases,
|
||||
updateReleasesSearch,
|
||||
};
|
||||
|
||||
@@ -92,7 +92,7 @@ async function filterUniqueAssociations(tagAssociations) {
|
||||
return uniqueAssociations;
|
||||
}
|
||||
|
||||
async function associateTags(releases) {
|
||||
async function associateReleaseTags(releases) {
|
||||
const tagIdsBySlug = await matchReleaseTags(releases);
|
||||
const siteTagIdsBySiteId = await getSiteTags(releases);
|
||||
|
||||
@@ -103,5 +103,5 @@ async function associateTags(releases) {
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
associateTags,
|
||||
associateReleaseTags,
|
||||
};
|
||||
|
||||
@@ -62,13 +62,15 @@ function needNextPage(uniqueReleases, pageAccReleases) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const oldestReleaseOnPage = uniqueReleases
|
||||
.sort((releaseA, releaseB) => releaseB.date - releaseA.date)
|
||||
.slice(-1)[0];
|
||||
if (uniqueReleases.every(release => !!release.date)) {
|
||||
const oldestReleaseOnPage = uniqueReleases
|
||||
.sort((releaseA, releaseB) => releaseB.date - releaseA.date)
|
||||
.slice(-1)[0];
|
||||
|
||||
if (oldestReleaseOnPage && moment(oldestReleaseOnPage.date).isAfter(afterDate)) {
|
||||
// oldest release on page is newer than the specified date cut-off
|
||||
return true;
|
||||
if (moment(oldestReleaseOnPage.date).isAfter(afterDate)) {
|
||||
// oldest release on page is newer than the specified date cut-off
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// dates missing, and limit for scenes without dates not yet reached
|
||||
|
||||
@@ -1,80 +1,17 @@
|
||||
'use strict';
|
||||
|
||||
const {
|
||||
fetchReleases,
|
||||
fetchActorReleases,
|
||||
fetchNetworkReleases,
|
||||
fetchSiteReleases,
|
||||
fetchTagReleases,
|
||||
} = require('../releases');
|
||||
const { fetchReleases, searchReleases } = require('../releases');
|
||||
|
||||
async function fetchReleasesApi(req, res) {
|
||||
const releases = await fetchReleases({}, req.query);
|
||||
const query = req.query.query || req.query.q;
|
||||
|
||||
res.send(releases);
|
||||
}
|
||||
|
||||
async function fetchReleaseByIdApi(req, res) {
|
||||
const [release] = await fetchReleases({
|
||||
id: req.params.releaseId,
|
||||
});
|
||||
|
||||
res.send(release);
|
||||
}
|
||||
|
||||
async function fetchActorReleasesApi(req, res) {
|
||||
const actorId = Number.isInteger(Number(req.params.actorId)) ? Number(req.params.actorId) : null;
|
||||
const actorSlug = typeof req.params.actorId === 'string' ? req.params.actorId : null;
|
||||
|
||||
const releases = await fetchActorReleases({
|
||||
id: actorId,
|
||||
slug: actorSlug,
|
||||
}, req.query);
|
||||
|
||||
res.send(releases);
|
||||
}
|
||||
|
||||
async function fetchNetworkReleasesApi(req, res) {
|
||||
const networkId = typeof req.params.networkId === 'number' ? req.params.networkId : null;
|
||||
const networkSlug = typeof req.params.networkId === 'string' ? req.params.networkId : null;
|
||||
|
||||
const releases = await fetchNetworkReleases({
|
||||
id: networkId,
|
||||
slug: networkSlug,
|
||||
}, req.query);
|
||||
|
||||
res.send(releases);
|
||||
}
|
||||
|
||||
async function fetchSiteReleasesApi(req, res) {
|
||||
const siteId = typeof req.params.siteId === 'number' ? req.params.siteId : null;
|
||||
const siteSlug = typeof req.params.siteId === 'string' ? req.params.siteId : null;
|
||||
|
||||
const releases = await fetchSiteReleases({
|
||||
id: siteId,
|
||||
slug: siteSlug,
|
||||
}, req.query);
|
||||
|
||||
res.send(releases);
|
||||
}
|
||||
|
||||
async function fetchTagReleasesApi(req, res) {
|
||||
const tagId = typeof req.params.tagId === 'number' ? req.params.tagId : null;
|
||||
const tagSlug = typeof req.params.tagId === 'string' ? req.params.tagId : null;
|
||||
|
||||
const releases = await fetchTagReleases({
|
||||
id: tagId,
|
||||
slug: tagSlug,
|
||||
}, req.query);
|
||||
const releases = query
|
||||
? await searchReleases(query, req.query.limit)
|
||||
: await fetchReleases(req.query.limit);
|
||||
|
||||
res.send(releases);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchReleases: fetchReleasesApi,
|
||||
fetchReleaseById: fetchReleaseByIdApi,
|
||||
fetchActorReleases: fetchActorReleasesApi,
|
||||
fetchNetworkReleases: fetchNetworkReleasesApi,
|
||||
fetchSiteReleases: fetchSiteReleasesApi,
|
||||
fetchTagReleases: fetchTagReleasesApi,
|
||||
};
|
||||
|
||||
@@ -16,22 +16,8 @@ const { ActorPlugins, SitePlugins, ReleasePlugins } = require('./plugins/plugins
|
||||
|
||||
const {
|
||||
fetchReleases,
|
||||
fetchReleaseById,
|
||||
fetchActorReleases,
|
||||
fetchNetworkReleases,
|
||||
fetchSiteReleases,
|
||||
fetchTagReleases,
|
||||
} = require('./releases');
|
||||
|
||||
const {
|
||||
fetchNetworks,
|
||||
fetchNetworksFromReleases,
|
||||
} = require('./networks');
|
||||
|
||||
const { fetchActors } = require('./actors');
|
||||
const { fetchSites } = require('./sites');
|
||||
const { fetchTags } = require('./tags');
|
||||
|
||||
function initServer() {
|
||||
const app = express();
|
||||
const router = Router();
|
||||
@@ -75,24 +61,6 @@ function initServer() {
|
||||
router.use(bodyParser.json({ strict: false }));
|
||||
|
||||
router.get('/api/releases', fetchReleases);
|
||||
router.get('/api/releases/:releaseId', fetchReleaseById);
|
||||
router.get('/api/releases/networks', fetchNetworksFromReleases);
|
||||
|
||||
router.get('/api/actors', fetchActors);
|
||||
router.get('/api/actors/:actorId', fetchActors);
|
||||
router.get('/api/actors/:actorId/releases', fetchActorReleases);
|
||||
|
||||
router.get('/api/networks', fetchNetworks);
|
||||
router.get('/api/networks/:networkId', fetchNetworks);
|
||||
router.get('/api/networks/:networkId/releases', fetchNetworkReleases);
|
||||
|
||||
router.get('/api/sites', fetchSites);
|
||||
router.get('/api/sites/:siteId', fetchSites);
|
||||
router.get('/api/sites/:siteId/releases', fetchSiteReleases);
|
||||
|
||||
router.get('/api/tags', fetchTags);
|
||||
router.get('/api/tags/:tagId', fetchTags);
|
||||
router.get('/api/tags/:tagId/releases', fetchTagReleases);
|
||||
|
||||
router.get('*', (req, res) => {
|
||||
res.render(path.join(__dirname, '../../assets/index.ejs'), {
|
||||
|
||||
Reference in New Issue
Block a user