Added Bang! deep scrape. Improved network page layout. Added Bang Bros logos.

This commit is contained in:
2020-01-07 04:23:28 +01:00
parent 89064e9e0c
commit 0a19f2e624
71 changed files with 194 additions and 116 deletions

View File

@@ -9,6 +9,7 @@ const argv = require('./argv');
const scrapers = require('./scrapers/scrapers');
const whereOr = require('./utils/where-or');
const resolvePlace = require('./utils/resolve-place');
const slugify = require('./utils/slugify');
const { createMediaDirectory, storePhotos } = require('./media');
async function curateActor(actor) {
@@ -89,7 +90,7 @@ function curateActorEntry(actor, scraped, scrapeSuccess) {
.split(' ')
.map(segment => `${segment.charAt(0).toUpperCase()}${segment.slice(1)}`)
.join(' '),
slug: actor.name.toLowerCase().replace(/\s+/g, '-'),
slug: slugify(actor.name),
birthdate: actor.birthdate,
description: actor.description,
gender: actor.gender,
@@ -320,7 +321,7 @@ async function mergeProfiles(profiles, actor) {
async function scrapeActors(actorNames) {
await Promise.map(actorNames || argv.actors, async (actorName) => {
try {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const actorSlug = slugify(actorName);
const actorEntry = await knex('actors').where({ slug: actorSlug }).first();
const sources = argv.sources ? argv.sources.map(source => [source, scrapers.actors[source]]) : Object.entries(scrapers.actors);
@@ -393,28 +394,40 @@ async function scrapeBasicActors() {
}
async function associateActors(mappedActors, releases) {
const actorNames = Object.keys(mappedActors);
const actorSlugs = actorNames.map(name => slugify(name));
const [existingActorEntries, existingAssociationEntries] = await Promise.all([
knex('actors').whereIn('name', Object.keys(mappedActors)),
knex('actors')
.whereIn('name', actorNames)
.orWhereIn('slug', actorSlugs),
knex('releases_actors').whereIn('release_id', releases.map(release => release.id)),
]);
const associations = await Promise.map(Object.entries(mappedActors), async ([actorName, releaseIds]) => {
const actorEntry = existingActorEntries.find(actor => actor.name === actorName)
|| await storeActor({ name: actorName });
console.log(actorNames, actorSlugs, existingActorEntries.map(actor => actor.name));
return releaseIds
.map(releaseId => ({
release_id: releaseId,
actor_id: actorEntry.id,
}))
.filter(association => !existingAssociationEntries
// remove associations already in database
.some(associationEntry => associationEntry.actor_id === association.actor_id
&& associationEntry.release_id === association.release_id));
const associations = await Promise.map(Object.entries(mappedActors), async ([actorName, releaseIds]) => {
try {
const actorEntry = existingActorEntries.find(actor => actor.name === actorName)
|| await storeActor({ name: actorName });
return releaseIds
.map(releaseId => ({
release_id: releaseId,
actor_id: actorEntry.id,
}))
.filter(association => !existingAssociationEntries
// remove associations already in database
.some(associationEntry => associationEntry.actor_id === association.actor_id
&& associationEntry.release_id === association.release_id));
} catch (error) {
console.error(actorName, error);
return null;
}
});
await Promise.all([
knex('releases_actors').insert(associations.flat()),
knex('releases_actors').insert(associations.filter(association => association).flat()),
scrapeBasicActors(),
]);
}