Storing actor avatars. Using 1 second interval queue for location resolve as per OSM code of conduct.
This commit is contained in:
@@ -13,6 +13,7 @@ const logger = require('./logger')(__filename);
|
||||
const slugify = require('./utils/slugify');
|
||||
const capitalize = require('./utils/capitalize');
|
||||
const resolvePlace = require('./utils/resolve-place');
|
||||
const { associateAvatars } = require('./media');
|
||||
|
||||
const { toBaseReleases } = require('./deep');
|
||||
|
||||
@@ -80,6 +81,7 @@ function curateProfileEntry(profile) {
|
||||
has_piercings: profile.hasPiercings,
|
||||
piercings: profile.piercings,
|
||||
tattoos: profile.tattoos,
|
||||
avatar_media_id: profile.avatarMediaId || null,
|
||||
};
|
||||
|
||||
return curatedProfileEntry;
|
||||
@@ -91,6 +93,7 @@ async function curateProfile(profile) {
|
||||
id: profile.id,
|
||||
name: profile.name,
|
||||
avatar: profile.avatar,
|
||||
scraper: profile.scraper,
|
||||
};
|
||||
|
||||
curatedProfile.site = profile.site.isNetwork ? null : profile.site;
|
||||
@@ -198,6 +201,7 @@ async function scrapeProfiles(actor, sources, networksBySlug, sitesBySlug) {
|
||||
return {
|
||||
...actor,
|
||||
...profile,
|
||||
scraper: scraperSlug,
|
||||
site: siteOrNetwork,
|
||||
};
|
||||
}), Promise.reject(new Error()));
|
||||
@@ -256,16 +260,16 @@ async function upsertProfiles(curatedProfileEntries) {
|
||||
}
|
||||
|
||||
if (argv.force && updatingProfileEntries.length > 0) {
|
||||
knex.transaction(async (transaction) => {
|
||||
const queries = updatingProfileEntries.map(profileEntry => knex('actors_profiles')
|
||||
.where('id', profileEntry.id)
|
||||
.update(profileEntry)
|
||||
.transacting(transaction));
|
||||
const transaction = await knex.transaction();
|
||||
const queries = updatingProfileEntries.map(profileEntry => knex('actors_profiles')
|
||||
.where('id', profileEntry.id)
|
||||
.update(profileEntry)
|
||||
.returning(['id', 'actor_id'])
|
||||
.transacting(transaction));
|
||||
|
||||
return Promise.all(queries)
|
||||
.then(transaction.commit)
|
||||
.catch(transaction.rollback);
|
||||
});
|
||||
await Promise.all(queries)
|
||||
.then(transaction.commit)
|
||||
.catch(transaction.rollback);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -305,7 +309,9 @@ async function scrapeActors(actorNames) {
|
||||
);
|
||||
|
||||
const profiles = await Promise.all(profilesPerActor.flat().map(profile => curateProfile(profile)));
|
||||
const curatedProfileEntries = profiles.map(profile => curateProfileEntry(profile));
|
||||
const profilesWithAvatarIds = await associateAvatars(profiles);
|
||||
|
||||
const curatedProfileEntries = profilesWithAvatarIds.map(profile => curateProfileEntry(profile));
|
||||
|
||||
await upsertProfiles(curatedProfileEntries);
|
||||
}
|
||||
@@ -350,7 +356,7 @@ async function associateActors(releases, batchId) {
|
||||
const baseActors = Object.values(baseActorsByReleaseId).flat();
|
||||
|
||||
if (baseActors.length === 0) {
|
||||
return;
|
||||
return null;
|
||||
}
|
||||
|
||||
const baseActorsBySlugAndNetworkId = baseActors.reduce((acc, baseActor) => ({
|
||||
@@ -382,6 +388,8 @@ async function associateActors(releases, batchId) {
|
||||
.flat();
|
||||
|
||||
await knex.raw(`${knex('releases_actors').insert(releaseActorAssociations).toString()} ON CONFLICT DO NOTHING;`);
|
||||
|
||||
return actors;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
52
src/media.js
52
src/media.js
@@ -106,10 +106,11 @@ function toBaseSource(rawSource) {
|
||||
return null;
|
||||
}
|
||||
|
||||
function baseSourceToBaseMedia(baseSource, role) {
|
||||
function baseSourceToBaseMedia(baseSource, role, metadata) {
|
||||
if (Array.isArray(baseSource)) {
|
||||
if (baseSource.length > 0) {
|
||||
return {
|
||||
...metadata,
|
||||
id: nanoid(),
|
||||
role,
|
||||
sources: baseSource,
|
||||
@@ -121,6 +122,7 @@ function baseSourceToBaseMedia(baseSource, role) {
|
||||
|
||||
if (baseSource) {
|
||||
return {
|
||||
...metadata,
|
||||
id: nanoid(),
|
||||
role,
|
||||
sources: [baseSource],
|
||||
@@ -130,15 +132,15 @@ function baseSourceToBaseMedia(baseSource, role) {
|
||||
return null;
|
||||
}
|
||||
|
||||
function fallbackMediaToBaseMedia(rawMedia, role) {
|
||||
function fallbackMediaToBaseMedia(rawMedia, role, metadata) {
|
||||
const baseSources = rawMedia
|
||||
.map(source => toBaseSource(source))
|
||||
.filter(Boolean);
|
||||
|
||||
return baseSourceToBaseMedia(baseSources, role);
|
||||
return baseSourceToBaseMedia(baseSources, role, metadata);
|
||||
}
|
||||
|
||||
function toBaseMedias(rawMedias, role) {
|
||||
function toBaseMedias(rawMedias, role, metadata) {
|
||||
if (!rawMedias || rawMedias.length === 0) {
|
||||
return [];
|
||||
}
|
||||
@@ -150,12 +152,12 @@ function toBaseMedias(rawMedias, role) {
|
||||
|
||||
if (Array.isArray(rawMedia)) {
|
||||
// fallback sources provided
|
||||
return fallbackMediaToBaseMedia(rawMedia, role);
|
||||
return fallbackMediaToBaseMedia(rawMedia, role, metadata);
|
||||
}
|
||||
|
||||
const baseSource = toBaseSource(rawMedia);
|
||||
|
||||
return baseSourceToBaseMedia(baseSource, role);
|
||||
return baseSourceToBaseMedia(baseSource, role, metadata);
|
||||
}).filter(Boolean);
|
||||
|
||||
const sampledBaseMedias = sampleMedias(baseMedias);
|
||||
@@ -604,6 +606,44 @@ async function associateReleaseMedia(releases) {
|
||||
}, Promise.resolve());
|
||||
}
|
||||
|
||||
async function associateAvatars(profiles) {
|
||||
if (!argv.media) {
|
||||
return profiles;
|
||||
}
|
||||
|
||||
const profilesWithBaseMedias = profiles.map(profile => (profile.avatar
|
||||
? {
|
||||
...profile,
|
||||
avatarBaseMedia: toBaseMedias([profile.avatar], 'avatars', {
|
||||
copyright: profile.network?.name || null,
|
||||
scraper: profile.scraper || null,
|
||||
})[0],
|
||||
}
|
||||
: profile
|
||||
));
|
||||
|
||||
const baseMedias = profilesWithBaseMedias.map(profile => profile.avatarBaseMedia).filter(Boolean);
|
||||
|
||||
const storedMedias = await storeMedias(baseMedias);
|
||||
const storedMediasById = itemsByKey(storedMedias, 'id');
|
||||
|
||||
const profilesWithAvatarIds = profilesWithBaseMedias.map((profile) => {
|
||||
const media = storedMediasById[profile.avatarBaseMedia?.id];
|
||||
|
||||
if (media) {
|
||||
return {
|
||||
...profile,
|
||||
avatarMediaId: media.use || media.entry.id,
|
||||
};
|
||||
}
|
||||
|
||||
return profile;
|
||||
});
|
||||
|
||||
return profilesWithAvatarIds;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
associateAvatars,
|
||||
associateReleaseMedia,
|
||||
};
|
||||
|
||||
@@ -365,8 +365,8 @@ function scrapeApiProfile(data, releases, siteSlug) {
|
||||
if (data.attributes.eye_color) profile.eyes = data.attributes.eye_color;
|
||||
if (data.attributes.hair_color) profile.hair = data.attributes.hair_color;
|
||||
|
||||
const avatarPath = Object.values(data.pictures).reverse()[0];
|
||||
if (avatarPath) profile.avatar = `https://images01-evilangel.gammacdn.com/actors${avatarPath}`;
|
||||
const avatarPaths = Object.values(data.pictures).reverse();
|
||||
if (avatarPaths.length > 0) profile.avatar = avatarPaths.map(avatarPath => `https://images01-evilangel.gammacdn.com/actors${avatarPath}`);
|
||||
|
||||
profile.releases = releases.map(release => `https://${siteSlug}.com/en/video/${release.url_title}/${release.clip_id}`);
|
||||
|
||||
|
||||
@@ -342,8 +342,6 @@ function scrapeProfile(html, url, actorName) {
|
||||
|
||||
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
|
||||
|
||||
console.log(profile);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ const config = require('config');
|
||||
const logger = require('./logger')(__filename);
|
||||
const knex = require('./knex');
|
||||
const slugify = require('./utils/slugify');
|
||||
const { associateActors } = require('./actors');
|
||||
const { associateActors, scrapeActors } = require('./actors');
|
||||
const { associateReleaseTags } = require('./tags');
|
||||
const { curateSite } = require('./sites');
|
||||
const { associateReleaseMedia } = require('./media');
|
||||
@@ -226,18 +226,19 @@ async function storeReleases(releases) {
|
||||
const storedReleaseEntries = Array.isArray(storedReleases) ? storedReleases : [];
|
||||
const releasesWithId = attachReleaseIds([].concat(uniqueReleases, duplicateReleases), [].concat(storedReleaseEntries, duplicateReleaseEntries));
|
||||
|
||||
await Promise.all([
|
||||
const [actors] = await Promise.all([
|
||||
associateActors(releasesWithId, batchId),
|
||||
associateReleaseTags(releasesWithId),
|
||||
]);
|
||||
|
||||
await updateReleasesSearch(releasesWithId.map(release => release.id));
|
||||
|
||||
// media is more error-prone, associate separately
|
||||
await associateReleaseMedia(releasesWithId);
|
||||
await scrapeActors(actors.map(actor => actor.name));
|
||||
|
||||
logger.info(`Stored ${storedReleaseEntries.length} releases`);
|
||||
|
||||
await updateReleasesSearch(releasesWithId.map(release => release.id));
|
||||
|
||||
return releasesWithId;
|
||||
}
|
||||
|
||||
|
||||
@@ -36,17 +36,13 @@ function useProxy(url) {
|
||||
|
||||
const queue = taskQueue();
|
||||
|
||||
queue.on('concurrencyReached:http', () => {
|
||||
logger.silly('Queueing requests');
|
||||
});
|
||||
|
||||
queue.define('http', async ({
|
||||
async function handler({
|
||||
url,
|
||||
method = 'GET',
|
||||
body,
|
||||
headers = {},
|
||||
options = {},
|
||||
}) => {
|
||||
}) {
|
||||
if (body) {
|
||||
logger.silly(`${method.toUpperCase()} ${url} with ${JSON.stringify(body)}`);
|
||||
} else {
|
||||
@@ -55,12 +51,12 @@ queue.define('http', async ({
|
||||
|
||||
const reqOptions = {
|
||||
headers: {
|
||||
...(options.defaultHeaders !== false && defaultHeaders),
|
||||
...(options?.defaultHeaders !== false && defaultHeaders),
|
||||
...headers,
|
||||
},
|
||||
...defaultOptions,
|
||||
...options,
|
||||
...(options.timeout && { responseTimeout: options.timeout }),
|
||||
...(options?.timeout && { responseTimeout: options?.timeout }),
|
||||
};
|
||||
|
||||
if (useProxy(url)) {
|
||||
@@ -71,8 +67,8 @@ queue.define('http', async ({
|
||||
? await bhttp[method.toLowerCase()](url, body, reqOptions)
|
||||
: await bhttp[method.toLowerCase()](url, reqOptions);
|
||||
|
||||
if (options.stream && options.destination) {
|
||||
await pipeline(res, ...(options.transforms || []), options.destination);
|
||||
if (options?.stream && options?.destination) {
|
||||
await pipeline(res, ...(options?.transforms || []), options?.destination);
|
||||
}
|
||||
|
||||
const html = Buffer.isBuffer(res.body) ? res.body.toString() : null;
|
||||
@@ -88,12 +84,22 @@ queue.define('http', async ({
|
||||
code: res.statusCode,
|
||||
status: res.statusCode,
|
||||
};
|
||||
}, {
|
||||
}
|
||||
|
||||
queue.on('concurrencyReached:http', () => {
|
||||
logger.silly('Queueing requests');
|
||||
});
|
||||
|
||||
queue.define('20p', handler, {
|
||||
concurrency: 20,
|
||||
});
|
||||
|
||||
async function get(url, headers, options) {
|
||||
return queue.push('http', {
|
||||
queue.define('1s', handler, {
|
||||
interval: 1,
|
||||
});
|
||||
|
||||
async function get(url, headers, options, queueMethod = '20p') {
|
||||
return queue.push(queueMethod, {
|
||||
method: 'GET',
|
||||
url,
|
||||
headers,
|
||||
@@ -101,8 +107,8 @@ async function get(url, headers, options) {
|
||||
});
|
||||
}
|
||||
|
||||
async function post(url, body, headers, options) {
|
||||
return queue.push('http', {
|
||||
async function post(url, body, headers, options, queueMethod = '20p') {
|
||||
return queue.push(queueMethod, {
|
||||
method: 'POST',
|
||||
url,
|
||||
body,
|
||||
|
||||
@@ -12,7 +12,7 @@ async function resolvePlace(query) {
|
||||
// https://operations.osmfoundation.org/policies/nominatim/
|
||||
const res = await http.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`, {
|
||||
'User-Agent': 'contact at moonloop.adult@protonmail.com',
|
||||
});
|
||||
}, null, '1s');
|
||||
|
||||
const [item] = res.body;
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
|
||||
}
|
||||
|
||||
extend type Actor {
|
||||
age: Int @requires(columns: ["birthdate"])
|
||||
age: Int @requires(columns: ["date_of_birth"])
|
||||
height(units:Units): String @requires(columns: ["height"])
|
||||
weight(units:Units): String @requires(columns: ["weight"])
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user