'use strict';

const config = require('config');
const util = require('util');
const Promise = require('bluebird');
const moment = require('moment');
const blake2 = require('blake2');
const DOMPurify = require('dompurify');
const { JSDOM } = require('jsdom');
const omit = require('object.omit');

const { window } = new JSDOM('');
const domPurify = DOMPurify(window);

// const logger = require('./logger')(__filename);
const knex = require('./knex');
const scrapers = require('./scrapers/scrapers').actors;

const argv = require('./argv');
const include = require('./utils/argv-include')(argv);
const bulkInsert = require('./utils/bulk-insert');
const logger = require('./logger')(__filename);

const { toBaseReleases } = require('./deep');
const { associateAvatars, flushOrphanedMedia } = require('./media');
const { fetchEntitiesBySlug } = require('./entities');
const { deleteScenes } = require('./releases');

const slugify = require('./utils/slugify');
const capitalize = require('./utils/capitalize');
const resolvePlace = require('./utils/resolve-place');

const hairColors = {
	'jet-black': 'black',
	'red-head': 'red',
	'soft-black': 'black',
	black: 'black',
	blonde: 'blonde',
	blondie: 'blonde',
	brown: 'brown',
	brunette: 'brown',
	fair: 'blonde',
	raven: 'black',
	red: 'red',
	redhead: 'red',
	blue: 'blue',
	green: 'green',
	purple: 'purple',
	pink: 'pink',
};

const eyeColors = {
	blue: 'blue',
	brown: 'brown',
	dark: 'brown',
	gray: 'gray',
	green: 'green',
	grey: 'gray',
	hazel: 'hazel',
};

const ethnicities = {
	'african american': 'black',
	'african-american': 'black',
	'native american': 'native american',
	african: 'black',
	aravic: 'arabic',
	asian: 'asian',
	black: 'black',
	caucasian: 'white',
	european: 'white',
	hispanic: 'latin',
	indian: 'indian',
	japanese: 'japanese',
	latin: 'latin',
	latina: 'latina',
	latino: 'latino',
	white: 'white',
};

function getBoolean(value) {
	if (typeof value === 'boolean') {
		return value;
	}

	if (typeof value === 'string') {
		if (/yes/i.test(value)) {
			return true;
		}

		if (/no/i.test(value)) {
			return true;
		}
	}

	return null;
}

function getMostFrequent(items) {
	const { mostFrequent } = items.reduce((acc, item) => {
		if (item === undefined || item === null) {
			return acc;
		}

		const slug = slugify(item);

		acc.counts[slug] = (acc.counts[slug] || 0) + 1;

		if (!acc.mostFrequent || acc.counts[slug] > acc.counts[slugify(acc.mostFrequent)]) {
			acc.mostFrequent = item;
		}

		return acc;
	}, {
		counts: {},
		mostFrequent: null,
	});

	return mostFrequent;
}

function getMostFrequentDate(dates) {
	const year = getMostFrequent(dates.map(dateX => dateX.getFullYear()));
	const month = getMostFrequent(dates.map(dateX => dateX.getMonth()));
	const date = getMostFrequent(dates.map(dateX => dateX.getDate()));

	if (year === null || month === null || date === null) {
		return null;
	}

	return moment({ year, month, date }).toDate();
}

function getHighest(items) {
	return items.reduce((prevItem, item) => (item > prevItem ? item : prevItem), null);
}

function getLongest(items) {
	return items.sort((itemA, itemB) => itemB.length - itemA.length)[0] || null;
}

function getAverage(items) {
	return Math.round(items.reduce((acc, item) => acc + item, 0) / items.length) || null;
}

function toBaseActors(actorsOrNames, release) {
	if (!actorsOrNames) {
		return [];
	}

	const baseActors = actorsOrNames
		.filter(actorOrName => actorOrName && (typeof actorOrName === 'string' || actorOrName.name))
		.map((actorOrName) => {
			const [baseName, entryId] = (actorOrName.name || actorOrName).split(':');

			const name = capitalize(baseName);
			const slug = slugify(name);

			const baseActor = {
				name,
				slug,
				entryId: entryId || actorOrName.entryId || null,
				entity: release?.entity?.parent || release?.entity || null,
				hasProfile: !!actorOrName.name, // actor contains profile information
			};

			if (actorOrName.name) {
				return {
					...actorOrName,
					...baseActor,
				};
			}

			return baseActor;
		});

	return baseActors;
}

function curateActor(actor, withDetails = false, isProfile = false) {
	if (!actor) {
		return null;
	}

	const curatedActor = {
		id: actor.id,
		name: actor.name,
		slug: actor.slug,
		url: actor.url,
		gender: actor.gender,
		entityId: actor.entity_id,
		aliasFor: actor.alias_for,
		dateOfBirth: actor.date_of_birth,
		age: actor.age,
		birthCountry: actor.birth_country_alpha2,
		...(withDetails && {
			alias: actor.alias && {
				id: actor.alias.id,
				name: actor.alias.name,
				slug: actor.slug,
				gender: actor.alias.gender,
			},
			entity: actor.entity && {
				id: actor.entity.id,
				name: actor.entity.name,
				slug: actor.entity.slug,
			},
			dateOfDeath: actor.date_of_death,
			cup: actor.cup,
			bust: actor.bust,
			waist: actor.waist,
			hip: actor.hip,
			naturalBoobs: actor.natural_boobs,
			penisLength: actor.penis_length,
			penisGirth: actor.penis_girth,
			circumcised: actor.circumcised,
			height: actor.height,
			weight: actor.weight,
			eyes: actor.eyes,
			hairColor: actor.hair_color,
			hasTattoos: actor.has_tattoos,
			hasPiercings: actor.has_piercings,
			tattoos: actor.tattoos,
			piercings: actor.piercings,
			...(isProfile && { description: actor.description }),
			placeOfBirth: actor.birth_country && {
				country: {
					alpha2: actor.birth_country.alpha2,
					name: actor.birth_country.name,
					alias: actor.birth_country.alias,
				},
				state: actor.birth_state,
				city: actor.birth_city,
			},
			placeOfResidence: actor.residence_country && {
				country: {
					alpha2: actor.residence_country.alpha2,
					name: actor.residence_country.name,
					alias: actor.residence_country.alias,
				},
				state: actor.residence_state,
				city: actor.residence_city,
			},
			avatar: actor.avatar && {
				id: actor.avatar.id,
				path: actor.avatar.path,
				width: actor.avatar.width,
				height: actor.avatar.height,
				size: actor.avatar.size,
				source: actor.avatar.source,
			},
			...(actor.profiles && { profiles: actor.profiles?.map(profile => curateActor(profile, true, true)) }),
		}),
	};

	return curatedActor;
}

function curateActorEntry(baseActor, batchId) {
	return {
		name: baseActor.name,
		slug: baseActor.slug,
		entity_id: null,
		entry_id: baseActor.entryId,
		batch_id: batchId,
	};
}

function curateActorEntries(baseActors, batchId) {
	return baseActors.map(baseActor => curateActorEntry(baseActor, batchId));
}

function curateProfileEntry(profile) {
	if (!profile.id) {
		return null;
	}

	const curatedProfileEntry = {
		...(profile.update !== false && { id: profile.update }),
		actor_id: profile.id,
		entity_id: profile.entity?.id || null,
		date_of_birth: profile.dateOfBirth,
		date_of_death: profile.dateOfDeath,
		age: profile.age,
		url: profile.url,
		gender: profile.gender,
		ethnicity: profile.ethnicity,
		description: profile.description,
		description_hash: profile.descriptionHash,
		birth_city: profile.placeOfBirth?.city || null,
		birth_state: profile.placeOfBirth?.state || null,
		birth_country_alpha2: profile.placeOfBirth?.country || null,
		residence_city: profile.placeOfResidence?.city || null,
		residence_state: profile.placeOfResidence?.state || null,
		residence_country_alpha2: profile.placeOfResidence?.country || null,
		cup: profile.cup,
		bust: profile.bust,
		waist: profile.waist,
		hip: profile.hip,
		penis_length: profile.penisLength,
		penis_girth: profile.penisGirth,
		circumcised: profile.circumcised,
		natural_boobs: profile.naturalBoobs,
		height: profile.height,
		weight: profile.weight,
		hair_color: profile.hairColor,
		eyes: profile.eyes,
		has_tattoos: profile.hasTattoos,
		has_piercings: profile.hasPiercings,
		piercings: profile.piercings,
		tattoos: profile.tattoos,
		avatar_media_id: profile.avatarMediaId || null,
	};

	return curatedProfileEntry;
}

async function curateProfile(profile, actor) {
	if (!profile) {
		return null;
	}

	try {
		const curatedProfile = {
			id: profile.id,
			name: profile.name,
			avatar: profile.avatar,
			scraper: profile.scraper,
			entity: profile.entity,
			update: profile.update,
		};

		curatedProfile.description = domPurify.sanitize(profile.description?.replace(/\s+/g, ' '), { ALLOWED_TAGS: [] }).trim() || null;

		const hasher = curatedProfile.description && blake2
			.createHash('blake2b', { digestLength: 24 })
			.update(Buffer.from(slugify(curatedProfile.description)));

		curatedProfile.descriptionHash = curatedProfile.description && hasher.digest('hex');

		curatedProfile.nationality = profile.nationality?.trim() || null; // used to derive country when country not available

		curatedProfile.ethnicity = ethnicities[profile.ethnicity?.trim().toLowerCase()] || null;
		curatedProfile.hairColor = hairColors[(profile.hairColor || profile.hair)?.toLowerCase().replace('hair', '').trim()] || null;
		curatedProfile.eyes = eyeColors[profile.eyes?.trim().toLowerCase()] || null;

		curatedProfile.tattoos = profile.tattoos?.trim() || null;
		curatedProfile.piercings = profile.piercings?.trim() || null;

		curatedProfile.gender = (/female/i.test(profile.gender) && 'female')
			|| (/shemale|trans/i.test(profile.gender) && 'transsexual')
			|| (/male/i.test(profile.gender) && 'male')
			|| null;

		const dateOfBirth = profile.dateOfBirth || profile.birthdate;

		curatedProfile.dateOfBirth = (!Number.isNaN(Number(dateOfBirth)) // possibly valid date
			&& new Date() - dateOfBirth > 567648000000 // over 18
			&& dateOfBirth)
			|| null;

		curatedProfile.dateOfDeath = Number.isNaN(Number(profile.dateOfDeath)) ? null : profile.dateOfDeath;
		curatedProfile.age = Number(profile.age) || null;

		curatedProfile.height = Number(profile.height) || profile.height?.match?.(/\d+/)?.[0] || null;
		curatedProfile.weight = Number(profile.weight) || profile.weight?.match?.(/\d+/)?.[0] || null;

		curatedProfile.cup = profile.cup || (typeof profile.bust === 'string' && profile.bust?.match?.(/[a-zA-Z]+/)?.[0]) || null;
		curatedProfile.bust = Number(profile.bust) || profile.bust?.match?.(/\d+/)?.[0] || null;
		curatedProfile.waist = Number(profile.waist) || profile.waist?.match?.(/\d+/)?.[0] || null;
		curatedProfile.hip = Number(profile.hip) || profile.hip?.match?.(/\d+/)?.[0] || null;
		curatedProfile.penisLength = Number(profile.penisLength) || profile.penisLength?.match?.(/\d+/)?.[0] || null;
		curatedProfile.penisGirth = Number(profile.penisGirth) || profile.penisGirth?.match?.(/\d+/)?.[0] || null;

		curatedProfile.circumcised = getBoolean(profile.circumcised);
		curatedProfile.naturalBoobs = getBoolean(profile.naturalBoobs);
		curatedProfile.hasTattoos = getBoolean(profile.hasTattoos);
		curatedProfile.hasPiercings = getBoolean(profile.hasPiercings);

		if (argv.resolvePlace) {
			const [placeOfBirth, placeOfResidence] = await Promise.all([
				resolvePlace(profile.birthPlace),
				resolvePlace(profile.residencePlace),
			]);

			curatedProfile.placeOfBirth = placeOfBirth;
			curatedProfile.placeOfResidence = placeOfResidence;
		}

		if (!curatedProfile.placeOfBirth && curatedProfile.nationality) {
			const country = await knex('countries')
				.where('nationality', 'ilike', `%${curatedProfile.nationality}%`)
				.orWhere('alpha3', 'ilike', `%${curatedProfile.nationality}%`)
				.orWhere('alpha2', 'ilike', `%${curatedProfile.nationality}%`)
				.orderBy('priority', 'desc')
				.first();

			if (country) {
				curatedProfile.placeOfBirth = {
					country: country.alpha2,
				};
			}
		}

		curatedProfile.social = Array.isArray(profile.social)
			? profile.social.map((social) => {
				try {
					const { href } = new URL(social);
					return href;
				} catch (error) {
					logger.warn(`Profile scraper for '${profile.entity.name}' returned invalid social link: ${social}`);
					return null;
				}
			}).filter(Boolean)
			: [];

		curatedProfile.scenes = toBaseReleases(profile.scenes || profile.releases, profile.entity, actor)
			// attach actor to base scene, in case it was not scraped
			.map((scene) => {
				if (actor && !scene.actors?.find(sceneActor => slugify(sceneActor) === actor.slug || slugify(sceneActor.name) === actor.slug)) {
					return {
						...scene,
						actors: [actor, ...(scene.actors || [])],
					};
				}

				return scene;
			});

		if (profile.ethnicity && !curatedProfile.ethnicity) logger.warn(`Unrecognized ethnicity returned by '${profile.entity.name}' scraper: ${profile.ethnicity}`);
		if ((profile.hairColor || profile.hair) && !curatedProfile.hairColor) logger.warn(`Unrecognized hair color returned by '${profile.entity.name}' scraper: ${profile.hairColor || profile.hair}`);
		if (profile.eyes && !curatedProfile.eyes) logger.warn(`Unrecognized eye color returned by '${profile.entity.name}' scraper: ${profile.eyes}`);

		return curatedProfile;
	} catch (error) {
		logger.error(`Failed to curate '${profile.name}': ${error.message}`);

		return null;
	}
}

async function fetchProfiles(actorIdsOrNames) {
	return knex('actors_profiles')
		.select(knex.raw('actors_profiles.*, row_to_json(actors) as actor, row_to_json(media) as avatar'))
		.leftJoin('actors', 'actors.id', 'actors_profiles.actor_id')
		.modify((query) => {
			if (actorIdsOrNames) {
				query
					.whereIn('actor_id', actorIdsOrNames.filter(idOrName => typeof idOrName === 'number'))
					.orWhere((builder) => {
						builder
							.whereIn('actors.name', actorIdsOrNames.filter(idOrName => typeof idOrName === 'string'))
							.whereNull('actors.entity_id');
					});
			}
		})
		.leftJoin('media', 'actors_profiles.avatar_media_id', 'media.id');
}

async function interpolateProfiles(actorIdsOrNames) {
	const profiles = await fetchProfiles(actorIdsOrNames);

	const profilesByActorId = profiles.reduce((acc, profile) => ({
		...acc,
		[profile.actor_id]: [
			...(acc[profile.actor_id] || []),
			profile,
		],
	}), {});

	logger.info(`Interpolating ${profiles.length} profiles from ${Object.keys(profilesByActorId).length} actors`);

	const interpolatedProfiles = Object.entries(profilesByActorId).map(([actorId, actorProfiles]) => {
		// group values from each profile
		const valuesByProperty = actorProfiles.reduce((acc, profile) => Object
			.entries(profile)
			.reduce((profileAcc, [property, value]) => ({
				...profileAcc,
				[property]: [
					...(acc[property] || []),
					...(value === null ? [] : Array.from({ length: profile.priority }, () => value)), // multiply by priority, increasing the odds of being the most frequent value
				],
			}), {
				// bundle location values so they can be assessed together, to ensure the most frequent city is in the most frequent state is in most frequent country
				origin: [...acc.origin || [], {
					...(profile.birth_country_alpha2 && { country: profile.birth_country_alpha2 }),
					...(profile.birth_state && { state: profile.birth_state }),
					...(profile.birth_city && { city: profile.birth_city }),
				}].filter(location => Object.keys(location).length > 0),
				residence: [...acc.residence || [], {
					...(profile.residence_country_alpha2 && { country: profile.residence_country_alpha2 }),
					...(profile.residence_state && { state: profile.residence_state }),
					...(profile.residence_city && { city: profile.residence_city }),
				}].filter(location => Object.keys(location).length > 0),
			}), {});

		const mostFrequentValues = [
			'gender',
			'ethnicity',
			'cup',
			'bust',
			'waist',
			'hip',
			'penis_length',
			'penis_girth',
			'circumcised',
			'natural_boobs',
			'hair_color',
			'eyes',
			'has_tattoos',
			'has_piercings',
		].reduce((acc, property) => ({
			...acc,
			[property]: getMostFrequent(valuesByProperty[property]),
		}), {});

		const profile = {
			id: actorId,
			...mostFrequentValues,
		};

		profile.height = getMostFrequent(valuesByProperty.height.filter(height => height > 50 && height < 300)); // remove unlikely values

		profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth);
		profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death);
		profile.age = getHighest(valuesByProperty.age);

		// ensure most frequent country, city and state match up
		profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.origin.map(location => location.country));
		const remainingOriginCountries = valuesByProperty.origin.filter(location => location.country === profile.birth_country_alpha2);

		profile.birth_state = getMostFrequent(remainingOriginCountries.map(location => location.state));
		const remainingOriginStates = remainingOriginCountries.filter(location => !profile.birth_state || location.state === profile.birth_state);

		profile.birth_city = getMostFrequent(remainingOriginStates.map(location => location.city));

		profile.residence_country_alpha2 = getMostFrequent(valuesByProperty.residence.map(location => location.country));
		const remainingResidenceCountries = valuesByProperty.residence.filter(location => location.country === profile.residence_country_alpha2);

		profile.residence_state = getMostFrequent(remainingResidenceCountries.map(location => location.state));
		const remainingResidenceStates = remainingResidenceCountries.filter(location => !profile.residence_state || location.state === profile.residence_state);

		profile.residence_city = getMostFrequent(remainingResidenceStates.map(location => location.city));

		profile.weight = getAverage(valuesByProperty.weight);

		profile.tattoos = getLongest(valuesByProperty.tattoos);
		profile.piercings = getLongest(valuesByProperty.piercings);

		profile.avatar_media_id = actorProfiles
			.map(actorProfile => actorProfile.avatar)
			.filter(avatar => avatar && (avatar.entropy === null || avatar.entropy > 6))
			.sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null;

		return profile;
	});

	const transaction = await knex.transaction();

	// clear existing interpolated data
	const emptyProfile = Object
		.keys(omit(curateProfileEntry({ id: 1 }), ['id', 'actor_id', 'entity_id', 'url', 'description_hash']))
		.reduce((acc, key) => ({ ...acc, [key]: null }), {});

	await knex('actors')
		.modify((modifyBuilder) => {
			if (actorIdsOrNames) {
				modifyBuilder
					.whereIn('id', actorIdsOrNames.filter(idOrName => typeof idOrName === 'number'))
					.orWhere((whereBuilder) => {
						whereBuilder
							.whereIn('name', actorIdsOrNames.filter(idOrName => typeof idOrName === 'string'))
							.whereNull('entity_id');
					});
			}
		})
		.update(emptyProfile)
		.transacting(transaction);

	// insert new interpolated data
	const queries = interpolatedProfiles.map(profile => knex('actors')
		.where('id', profile.id)
		.update(profile)
		.transacting(transaction));

	await Promise.all(queries)
		.then(transaction.commit)
		.catch(transaction.rollback);
}

async function upsertProfiles(profiles) {
	const newProfileEntries = profiles.filter(profile => !profile.update).map(profile => curateProfileEntry(profile)).filter(Boolean);
	const updatingProfileEntries = profiles.filter(profile => profile.update).map(profile => curateProfileEntry(profile)).filter(Boolean);

	if (newProfileEntries.length > 0) {
		await bulkInsert('actors_profiles', newProfileEntries);

		logger.info(`Saved ${newProfileEntries.length} actor profiles`);
	}

	if (argv.force && updatingProfileEntries.length > 0) {
		const transaction = await knex.transaction();
		const queries = updatingProfileEntries.map(profileEntry => knex('actors_profiles')
			.where('id', profileEntry.id)
			.update(profileEntry)
			.returning(['id', 'actor_id'])
			.transacting(transaction));

		await Promise.all(queries)
			.then(transaction.commit)
			.catch(transaction.rollback);

		logger.info(`Updated ${updatingProfileEntries.length} new actor profiles`);
	}
}

async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId) {
	const profiles = Promise.map(sources, async (source) => {
		try {
			// config may group sources to try until success
			return await [].concat(source).reduce(async (outcome, scraperSlug) => outcome.catch(async () => {
				try {
					const entity = entitiesBySlug[scraperSlug] || null;

					const scraper = scrapers[scraperSlug];
					const layoutScraper = scraper?.[entity.parameters?.layout]
						|| scraper?.[entity.parent?.parameters?.layout]
						|| scraper?.[entity.parent?.parent?.parameters?.layout]
						|| scraper;

					const context = {
						...entity,
						// legacy
						site: entity,
						network: entity?.parent,
						entity,
						scraper: scraperSlug,
						parameters: {
							...entity?.parent?.parent?.parameters,
							...entity?.parent?.parameters,
							...entity?.parameters,
						},
					};

					const label = context.entity?.name;

					if (!layoutScraper?.fetchProfile) {
						logger.warn(`No profile profile scraper available for ${scraperSlug}`);
						throw new Error(`No profile profile scraper available for ${scraperSlug}`);
					}

					if (!context.entity) {
						logger.warn(`No entity found for ${scraperSlug}`);
						throw new Error(`No entity found for ${scraperSlug}`);
					}

					const existingProfile = existingProfilesByActorEntityId[actor.id]?.[context.entity?.id || null];

					if (existingProfile && !argv.force) {
						logger.verbose(`Found existing profile for '${actor.name}' on '${label}', use --force to scrape again`);

						return null;
					}

					logger.verbose(`Searching profile for '${actor.name}' on '${label}'`);

					const profile = await layoutScraper.fetchProfile(curateActor({
						...existingProfile,
						...actor,
					}), context, include);

					if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure
						logger.verbose(`Profile for '${actor.name}' not available on ${label}, scraper returned ${profile}`);
						throw Object.assign(new Error(`Profile for '${actor.name}' not available on ${label}`), { code: 'PROFILE_NOT_AVAILABLE' });
					}

					logger.verbose(`Found profile for '${actor.name}' on '${label}'`);

					return await curateProfile({
						...actor,
						...profile,
						entity,
						update: existingProfile?.id || false,
					}, actor);
				} catch (error) {
					if (error.code !== 'PROFILE_NOT_AVAILABLE') {
						logger.error(`Failed to fetch profile for '${actor.name}' from '${scraperSlug}': ${error.message}`);
					}

					// throw error to try next source
					throw error;
				}
			}), Promise.reject(new Error()));
		} catch (error) {
			console.log(error);

			if (error.code !== 'PROFILE_NOT_AVAILABLE') {
				logger.error(`Failed to fetch profile for '${actor.name}': ${error.message}`);
			}
		}

		return null;
	});

	return profiles.filter(Boolean);
}

async function getActorNames(actorNames) {
	if (actorNames.length > 0) {
		return actorNames;
	}

	const actorsWithoutProfiles = await knex.raw(`
		SELECT actors.name
		FROM actors
		WHERE NOT EXISTS (
			SELECT *
			FROM actors_profiles
			WHERE actors_profiles.actor_id = actors.id
			AND actors_profiles.updated_at <= (?)
		)
	`, [argv.actorsUpdate || new Date()]);

	return actorsWithoutProfiles.rows.map(actor => actor.name);
}

async function storeProfiles(profiles) {
	const profilesWithAvatarIds = await associateAvatars(profiles);
	const actorIds = Array.from(new Set(profiles.map(profile => profile.id)));

	await upsertProfiles(profilesWithAvatarIds);
	await interpolateProfiles(actorIds);
}

async function scrapeActors(argNames) {
	const actorNames = await getActorNames(argNames);
	const baseActors = toBaseActors(actorNames);

	logger.info(`Scraping profiles for ${actorNames.length} actors`);

	const sources = argv.profileSources || config.profiles || Object.keys(scrapers.actors);
	const entitySlugs = sources.flat();

	const [entitiesBySlug, existingActorEntries] = await Promise.all([
		fetchEntitiesBySlug(entitySlugs, 'desc'),
		knex('actors')
			.select(['id', 'name', 'slug', 'entry_id'])
			.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
			.whereNull('alias_for'),
	]);

	const existingActorEntriesBySlugAndEntryId = existingActorEntries.reduce((acc, actorEntry) => ({
		...acc,
		[actorEntry.slug]: {
			...acc[actorEntry.slug],
			[actorEntry.entryId || null]: actorEntry,
		},
	}), {});

	const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlugAndEntryId[baseActor.slug]?.[baseActor.entryId]);

	const [batchId] = newBaseActors.length > 0 ? await knex('batches').insert({ comment: null }).returning('id') : [null];
	const curatedActorEntries = batchId && curateActorEntries(newBaseActors, batchId);

	// TODO: associate entity when entry ID is provided

	const newActorEntries = batchId && await bulkInsert('actors', curatedActorEntries);

	const actors = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []);

	const existingProfiles = await knex('actors_profiles')
		.select(knex.raw('actors_profiles.*, row_to_json(avatars) as avatar'))
		.whereIn('actor_id', actors.map(actor => actor.id))
		.leftJoin('media as avatars', 'avatars.id', 'actors_profiles.avatar_media_id');

	const existingProfilesByActorEntityId = existingProfiles.reduce((acc, profile) => ({
		...acc,
		[profile.actor_id]: {
			...acc[profile.actor_id],
			[profile.entity_id]: profile,
		},
	}), {});

	const profilesPerActor = await Promise.map(
		actors,
		async actor => scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId),
		{ concurrency: 10 },
	);

	const profiles = profilesPerActor.flat().filter(Boolean);

	logger.info(`Scraped ${profiles.length} profiles`);

	if (argv.report) {
		console.log(util.inspect(profiles, { depth: Infinity, colors: true }));
	}

	if (argv.save) {
		await storeProfiles(profiles);
	}

	return profiles;
}

async function getOrCreateActors(baseActors, batchId) {
	// WHERE IN causes stack depth error and performance issues with a large amount of values, no knex VALUES helper available
	const actorValues = baseActors.map(actor => knex.raw('(:slug, :entityId)', { slug: actor.slug, entityId: actor.entity.id })).join(', ');

	const existingActors = await knex
		.select('actors.*')
		.from(knex.raw(`actors, (VALUES ${actorValues}) AS base_actors (slug, entity_id)`))
		.whereRaw('actors.slug = base_actors.slug AND actors.entity_id IS NULL')
		.orWhereRaw('actors.slug = base_actors.slug AND actors.entity_id = base_actors.entity_id');

	// const existingActorSlugs = new Set(existingActors.map(actor => actor.slug));
	const existingActorSlugs = existingActors.reduce((acc, actor) => ({
		...acc,
		[actor.entity_id]: {
			...acc[actor.entity_id],
			[actor.slug]: true,
		},
	}), {});

	const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.entity.id]?.[baseActor.slug] && !existingActorSlugs.null?.[baseActor.slug]);

	const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId);
	const newActors = await bulkInsert('actors', curatedActorEntries);

	const newActorIdsByEntityIdAndSlug = newActors.reduce((acc, actor) => ({
		...acc,
		[actor.entity_id]: {
			...acc[actor.entity_id],
			[actor.slug]: actor.id,
		},
	}), {});

	const newActorProfiles = await Promise.all(baseActors
		.filter(actor => actor.hasProfile)
		.map(actor => ({
			...actor,
			id: newActorIdsByEntityIdAndSlug[actor.entity?.id]?.[actor.slug] || newActorIdsByEntityIdAndSlug.null?.[actor.slug],
		}))
		.filter(actor => !!actor.id)
		.map(actor => curateProfile(actor)));

	await storeProfiles(newActorProfiles);

	if (Array.isArray(newActors)) {
		return newActors.concat(existingActors);
	}

	return existingActors;
}

async function associateActors(releases, batchId) {
	const baseActorsByReleaseId = releases.reduce((acc, release) => {
		if (release.actors) {
			acc[release.id] = toBaseActors(release.actors, release);
		}

		return acc;
	}, {});

	const baseActors = Object.values(baseActorsByReleaseId).flat();

	if (baseActors.length === 0) {
		return [];
	}

	const baseActorsBySlug = baseActors.reduce((acc, baseActor) => ({
		...acc,
		[baseActor.slug]: baseActor,
	}), {});

	const uniqueBaseActors = Object.values(baseActorsBySlug);

	const actors = await getOrCreateActors(uniqueBaseActors, batchId);

	const actorIdsBySlug = actors.reduce((acc, actor) => ({
		...acc,
		[actor.slug]: actor.alias_for || actor.id,
	}), {});

	const releaseActorAssociations = Object.entries(baseActorsByReleaseId)
		.map(([releaseId, releaseActors]) => releaseActors
			.map(releaseActor => ({
				release_id: releaseId,
				actor_id: actorIdsBySlug[releaseActor.slug],
			})))
		.flat();

	await bulkInsert('releases_actors', releaseActorAssociations, false);

	logger.verbose(`Associated ${releaseActorAssociations.length} actors to ${releases.length} scenes`);

	return actors;
}

async function fetchActor(actorId) {
	const actor = await knex('actors')
		.select(knex.raw(`
			actors.*,
			row_to_json(entities) as entity,
			row_to_json(actor_alias) as alias,
			row_to_json(birth_country) as birth_country,
			row_to_json(residence_country) as residence_country,
			row_to_json(media) as avatar,
			json_agg(actors_profiles) as profiles
		`))
		.modify((queryBuilder) => {
			if (Number.isNaN(Number(actorId))) {
				queryBuilder.where('actors.slug', actorId);
				return;
			}

			queryBuilder.where('actors.id', actorId);
		})
		.leftJoin('actors as actor_alias', 'actor_alias.id', 'actors.alias_for')
		.leftJoin('actors_profiles', 'actors.id', 'actors_profiles.actor_id')
		.leftJoin('entities', 'entities.id', 'actors.entity_id')
		.leftJoin('countries as birth_country', 'birth_country.alpha2', 'actors.birth_country_alpha2')
		.leftJoin('countries as residence_country', 'residence_country.alpha2', 'actors.residence_country_alpha2')
		.leftJoin('media', 'media.id', 'actors.avatar_media_id')
		.groupBy('actors.id', 'entities.id', 'actor_alias.id', 'birth_country.alpha2', 'residence_country.alpha2', 'media.id')
		.first();

	return curateActor(actor, true);
}

async function searchActors(query) {
	const actors = await knex
		.select('*')
		.from(knex.raw('search_actors(?) as actors', [query]))
		.limit(100);

	return actors.map(actor => curateActor(actor));
}

async function flushProfiles(actorIdsOrNames) {
	const profiles = await fetchProfiles(actorIdsOrNames);
	const actorNames = Array.from(new Set(profiles.map(profile => profile.actor.name)));

	const deleteCount = await knex('actors_profiles')
		.whereIn('id', profiles.map(profile => profile.id))
		.delete();

	await interpolateProfiles(actorIdsOrNames);
	await flushOrphanedMedia(); // don't flush until main avatar is detached by re-interpolating

	if (actorNames.length > 20) {
		logger.info(`Removed ${deleteCount} profiles for ${actorNames.length} actors`);
		return;
	}

	if (deleteCount > 0) {
		logger.info(`Removed ${deleteCount} profiles for ${actorNames.join(', ')}`);
		return;
	}

	logger.info(`Removed ${deleteCount} profiles`);
}

async function flushActors(actorIdsOrNames) {
	const actors = await knex('actors')
		.whereIn('id', actorIdsOrNames.filter(idOrName => typeof idOrName === 'number'))
		.orWhere((builder) => {
			builder
				.whereIn('name', actorIdsOrNames.filter(idOrName => typeof idOrName === 'string'))
				.whereNull('entity_id');
		});

	const actorIds = actors.map(actor => actor.id);

	const sceneIds = await knex('releases_actors')
		.select('releases.id')
		.whereIn('actor_id', actorIds)
		.leftJoin('releases', 'releases.id', 'releases_actors.release_id')
		.pluck('id');

	const [deletedScenesCount, deletedActorsCount] = await Promise.all([
		deleteScenes(sceneIds),
		knex('actors')
			.whereIn('id', actorIds)
			.delete(),
	]);

	await flushOrphanedMedia();

	logger.info(`Removed ${deletedActorsCount} actors with ${deletedScenesCount} scenes`);
}

module.exports = {
	associateActors,
	fetchActor,
	flushActors,
	flushProfiles,
	interpolateProfiles,
	scrapeActors,
	searchActors,
	toBaseActors,
};