Improved scraping and association behavior.
This commit is contained in:
		
							parent
							
								
									7e9fd19c2a
								
							
						
					
					
						commit
						3ec7b15886
					
				| 
						 | 
				
			
			@ -20,6 +20,11 @@
 | 
			
		|||
                    <span>{{ actor.aliases.join(', ') }}</span>
 | 
			
		||||
                </li>
 | 
			
		||||
 | 
			
		||||
                <li v-if="actor.gender">
 | 
			
		||||
                    <dfn class="bio-heading">Gender</dfn>
 | 
			
		||||
                    <span>{{ actor.gender }}</span>
 | 
			
		||||
                </li>
 | 
			
		||||
 | 
			
		||||
                <li v-if="actor.birthdate">
 | 
			
		||||
                    <dfn class="bio-heading">Date of birth</dfn>
 | 
			
		||||
                    <span>{{ formatDate(actor.birthdate, 'MMMM D, YYYY') }} ({{ age }})</span>
 | 
			
		||||
| 
						 | 
				
			
			@ -51,7 +56,7 @@
 | 
			
		|||
                    <span v-if="actor.residencePlace">{{ actor.residencePlace }}</span>
 | 
			
		||||
                </li>
 | 
			
		||||
 | 
			
		||||
                <li v-if="actor.caucasion">
 | 
			
		||||
                <li v-if="actor.ethnicity">
 | 
			
		||||
                    <dfn class="bio-heading">Ethnicity</dfn>
 | 
			
		||||
                    <span>{{ actor.ethnicity }}</span>
 | 
			
		||||
                </li>
 | 
			
		||||
| 
						 | 
				
			
			@ -61,7 +66,7 @@
 | 
			
		|||
                    <span>{{ actor.height }}</span>
 | 
			
		||||
                </li>
 | 
			
		||||
 | 
			
		||||
                <li v-if="actor.gender !== 'male'">
 | 
			
		||||
                <li v-if="actor.boobSize || actor.boobsNatural">
 | 
			
		||||
                    <dfn class="bio-heading">Boobs</dfn>
 | 
			
		||||
                    <span v-if="actor.boobSize">{{ actor.boobSize }}</span>
 | 
			
		||||
                    <span v-if="actor.boobsNatural !== null">{{ actor.boobsNatural ? 'Natural' : 'Enhanced' }}</span>
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -53,6 +53,9 @@ exports.up = knex => Promise.resolve()
 | 
			
		|||
 | 
			
		||||
        table.datetime('created_at')
 | 
			
		||||
            .defaultTo(knex.fn.now());
 | 
			
		||||
 | 
			
		||||
        table.datetime('scraped_at');
 | 
			
		||||
        table.boolean('scrape_success');
 | 
			
		||||
    }))
 | 
			
		||||
    .then(() => knex.schema.createTable('directors', (table) => {
 | 
			
		||||
        table.increments('id', 12);
 | 
			
		||||
| 
						 | 
				
			
			@ -229,6 +232,8 @@ exports.up = knex => Promise.resolve()
 | 
			
		|||
            .notNullable()
 | 
			
		||||
            .references('id')
 | 
			
		||||
            .inTable('actors');
 | 
			
		||||
 | 
			
		||||
        table.unique(['release_id', 'actor_id']);
 | 
			
		||||
    }))
 | 
			
		||||
    .then(() => knex.schema.createTable('directors_associated', (table) => {
 | 
			
		||||
        table.increments('id', 16);
 | 
			
		||||
| 
						 | 
				
			
			@ -242,6 +247,8 @@ exports.up = knex => Promise.resolve()
 | 
			
		|||
            .notNullable()
 | 
			
		||||
            .references('id')
 | 
			
		||||
            .inTable('directors');
 | 
			
		||||
 | 
			
		||||
        table.unique(['release_id', 'director_id']);
 | 
			
		||||
    }))
 | 
			
		||||
    .then(() => knex.schema.createTable('tags_associated', (table) => {
 | 
			
		||||
        table.integer('tag_id', 12)
 | 
			
		||||
| 
						 | 
				
			
			@ -256,6 +263,9 @@ exports.up = knex => Promise.resolve()
 | 
			
		|||
        table.integer('release_id', 16)
 | 
			
		||||
            .references('id')
 | 
			
		||||
            .inTable('releases');
 | 
			
		||||
 | 
			
		||||
        table.unique(['release_id', 'tag_id']);
 | 
			
		||||
        table.unique(['site_id', 'tag_id']);
 | 
			
		||||
    }));
 | 
			
		||||
 | 
			
		||||
exports.down = knex => Promise.resolve()
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -12,6 +12,7 @@ async function curateActor(actor) {
 | 
			
		|||
 | 
			
		||||
    return {
 | 
			
		||||
        id: actor.id,
 | 
			
		||||
        gender: actor.gender,
 | 
			
		||||
        name: actor.name,
 | 
			
		||||
        description: actor.description,
 | 
			
		||||
        birthdate: actor.birthdate && new Date(actor.birthdate),
 | 
			
		||||
| 
						 | 
				
			
			@ -43,10 +44,13 @@ function curateActors(releases) {
 | 
			
		|||
    return Promise.all(releases.map(async release => curateActor(release)));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function curateScrapedActor(actor) {
 | 
			
		||||
    return {
 | 
			
		||||
function curateActorEntry(actor, scraped, scrapeSuccess) {
 | 
			
		||||
    const curatedActor = {
 | 
			
		||||
        id: actor.id,
 | 
			
		||||
        name: actor.name,
 | 
			
		||||
        name: actor.name
 | 
			
		||||
            .split(' ')
 | 
			
		||||
            .map(segment => `${segment.charAt(0).toUpperCase()}${segment.slice(1)}`)
 | 
			
		||||
            .join(' '),
 | 
			
		||||
        slug: actor.name.toLowerCase().replace(/\s+/g, '-'),
 | 
			
		||||
        birthdate: actor.birthdate,
 | 
			
		||||
        description: actor.description,
 | 
			
		||||
| 
						 | 
				
			
			@ -65,6 +69,16 @@ function curateScrapedActor(actor) {
 | 
			
		|||
        tattoos: actor.tattoos,
 | 
			
		||||
        piercings: actor.piercings,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    if (scraped) {
 | 
			
		||||
        return {
 | 
			
		||||
            ...curatedActor,
 | 
			
		||||
            scraped_at: new Date(),
 | 
			
		||||
            scrape_success: scrapeSuccess,
 | 
			
		||||
        };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return curatedActor;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function fetchActors(queryObject) {
 | 
			
		||||
| 
						 | 
				
			
			@ -82,8 +96,8 @@ async function fetchActors(queryObject) {
 | 
			
		|||
    return curateActors(releases);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function storeActor(actor) {
 | 
			
		||||
    const curatedActor = curateScrapedActor(actor);
 | 
			
		||||
async function storeActor(actor, scraped = false, scrapeSuccess = false) {
 | 
			
		||||
    const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
 | 
			
		||||
 | 
			
		||||
    const actorEntries = await knex('actors')
 | 
			
		||||
        .insert(curatedActor)
 | 
			
		||||
| 
						 | 
				
			
			@ -102,8 +116,8 @@ async function storeActor(actor) {
 | 
			
		|||
    return null;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function updateActor(actorEntry, actor) {
 | 
			
		||||
    const curatedActor = curateScrapedActor(actor);
 | 
			
		||||
async function updateActor(actorEntry, actor, scraped = false, scrapeSuccess = false) {
 | 
			
		||||
    const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
 | 
			
		||||
 | 
			
		||||
    const actorEntries = await knex('actors')
 | 
			
		||||
        .where({ id: actorEntry.id })
 | 
			
		||||
| 
						 | 
				
			
			@ -117,39 +131,59 @@ async function updateActor(actorEntry, actor) {
 | 
			
		|||
 | 
			
		||||
async function scrapeActors(actorNames) {
 | 
			
		||||
    await Promise.map(actorNames || argv.actors, async (actorName) => {
 | 
			
		||||
        const [actorEntry] = await fetchActors({ name: actorName });
 | 
			
		||||
        const profiles = await Promise.all(Object.values(scrapers.actors).map(scraper => scraper.fetchActor(actorName)));
 | 
			
		||||
        const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
 | 
			
		||||
 | 
			
		||||
        if (actorEntry) {
 | 
			
		||||
            return updateActor(actorEntry, profiles[0]);
 | 
			
		||||
        const [actorEntry] = await fetchActors({ slug: actorSlug });
 | 
			
		||||
        const profiles = await Promise.all(Object.values(scrapers.actors).map(scraper => scraper.fetchActor(actorEntry ? actorEntry.name : actorName)));
 | 
			
		||||
 | 
			
		||||
        if (profiles[0] === null) {
 | 
			
		||||
            console.log(`Could not find profile for actor '${actorName}'`);
 | 
			
		||||
            return updateActor(actorEntry, actorEntry, true, false);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return storeActor(profiles[0]);
 | 
			
		||||
        if (actorEntry && profiles[0]) {
 | 
			
		||||
            return updateActor(actorEntry, profiles[0], true, true);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return storeActor(profiles[0], true, true);
 | 
			
		||||
    }, {
 | 
			
		||||
        concurrency: 5,
 | 
			
		||||
        concurrency: 1,
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function storeActors(release, releaseEntry) {
 | 
			
		||||
async function scrapeBasicActors() {
 | 
			
		||||
    const basicActors = await knex('actors').where('scraped_at', null);
 | 
			
		||||
 | 
			
		||||
    return scrapeActors(basicActors.map(actor => actor.name));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function associateActors(release, releaseId) {
 | 
			
		||||
    const actorEntries = await knex('actors').whereIn('name', release.actors);
 | 
			
		||||
 | 
			
		||||
    const newActors = release.actors
 | 
			
		||||
        .map(actorName => actorName.trim())
 | 
			
		||||
        .filter(actorName => !actorEntries.some(actor => actor.name === actorName));
 | 
			
		||||
 | 
			
		||||
    const newActorEntries = await Promise.all(newActors.map(async actorName => storeActor({ name: actorName })));
 | 
			
		||||
    const [newActorEntries, associatedActors] = await Promise.all([
 | 
			
		||||
        Promise.all(newActors.map(async actorName => storeActor({ name: actorName }))),
 | 
			
		||||
        knex('actors_associated').where('release_id', releaseId),
 | 
			
		||||
    ]);
 | 
			
		||||
 | 
			
		||||
    const newlyAssociatedActors = actorEntries
 | 
			
		||||
        .concat(newActorEntries)
 | 
			
		||||
        .filter(actorEntry => !associatedActors.some(actor => actorEntry.id === actor.id))
 | 
			
		||||
        .map(actor => ({
 | 
			
		||||
            release_id: releaseId,
 | 
			
		||||
            actor_id: actor.id,
 | 
			
		||||
        }));
 | 
			
		||||
 | 
			
		||||
    await knex('actors_associated')
 | 
			
		||||
        .insert(actorEntries.concat(newActorEntries).map(actor => ({
 | 
			
		||||
            release_id: releaseEntry.id,
 | 
			
		||||
            actor_id: actor.id,
 | 
			
		||||
        })), '*');
 | 
			
		||||
 | 
			
		||||
    scrapeActors(newActorEntries.map(actor => actor.name));
 | 
			
		||||
        .insert(newlyAssociatedActors);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
module.exports = {
 | 
			
		||||
    associateActors,
 | 
			
		||||
    fetchActors,
 | 
			
		||||
    scrapeActors,
 | 
			
		||||
    storeActors,
 | 
			
		||||
    scrapeBasicActors,
 | 
			
		||||
};
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										11
									
								
								src/app.js
								
								
								
								
							
							
						
						
									
										11
									
								
								src/app.js
								
								
								
								
							| 
						 | 
				
			
			@ -6,7 +6,7 @@ const initServer = require('./web/server');
 | 
			
		|||
 | 
			
		||||
const scrapeSites = require('./scrape-sites');
 | 
			
		||||
const scrapeRelease = require('./scrape-release');
 | 
			
		||||
const { scrapeActors } = require('./actors');
 | 
			
		||||
const { scrapeActors, scrapeBasicActors } = require('./actors');
 | 
			
		||||
 | 
			
		||||
async function init() {
 | 
			
		||||
    if (argv.url) {
 | 
			
		||||
| 
						 | 
				
			
			@ -24,13 +24,20 @@ async function init() {
 | 
			
		|||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (argv.actors) {
 | 
			
		||||
    if (argv.actors && argv.actors.length > 0) {
 | 
			
		||||
        await scrapeActors();
 | 
			
		||||
        knex.destroy();
 | 
			
		||||
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (argv.actors) {
 | 
			
		||||
        await scrapeBasicActors();
 | 
			
		||||
        knex.destroy();
 | 
			
		||||
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    await initServer();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										40
									
								
								src/media.js
								
								
								
								
							
							
						
						
									
										40
									
								
								src/media.js
								
								
								
								
							| 
						 | 
				
			
			@ -37,13 +37,13 @@ async function createMediaDirectory(release, releaseId) {
 | 
			
		|||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function storePoster(release, releaseEntry) {
 | 
			
		||||
async function storePoster(release, releaseId) {
 | 
			
		||||
    if (!release.poster) {
 | 
			
		||||
        console.warn(`No poster available for (${release.site.name}, ${releaseEntry.id}}) "${release.title}"`);
 | 
			
		||||
        console.warn(`No poster available for (${release.site.name}, ${releaseId}}) "${release.title}"`);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    console.log(`Storing poster for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
 | 
			
		||||
    console.log(`Storing poster for (${release.site.name}, ${releaseId}) "${release.title}"`);
 | 
			
		||||
 | 
			
		||||
    const res = await bhttp.get(release.poster);
 | 
			
		||||
    const thumbnail = await getThumbnail(res.body);
 | 
			
		||||
| 
						 | 
				
			
			@ -53,8 +53,8 @@ async function storePoster(release, releaseEntry) {
 | 
			
		|||
        const mimetype = res.headers['content-type'] || mime.getType(pathname) || 'image/jpeg';
 | 
			
		||||
        const extension = mime.getExtension(mimetype);
 | 
			
		||||
 | 
			
		||||
        const filepath = path.join(release.site.network.slug, release.site.slug, releaseEntry.id.toString(), `poster.${extension}`);
 | 
			
		||||
        const thumbpath = path.join(release.site.network.slug, release.site.slug, releaseEntry.id.toString(), `poster_thumb.${extension}`);
 | 
			
		||||
        const filepath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `poster.${extension}`);
 | 
			
		||||
        const thumbpath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `poster_thumb.${extension}`);
 | 
			
		||||
        const hash = getHash(res.body);
 | 
			
		||||
 | 
			
		||||
        await Promise.all([
 | 
			
		||||
| 
						 | 
				
			
			@ -69,23 +69,23 @@ async function storePoster(release, releaseEntry) {
 | 
			
		|||
            hash,
 | 
			
		||||
            source: release.poster,
 | 
			
		||||
            domain: 'releases',
 | 
			
		||||
            target_id: releaseEntry.id,
 | 
			
		||||
            target_id: releaseId,
 | 
			
		||||
            role: 'poster',
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    console.warn(`Failed to store poster for (${release.site.name}, ${releaseEntry.id}) "${release.title}": ${res.statusCode}`);
 | 
			
		||||
    console.warn(`Failed to store poster for (${release.site.name}, ${releaseId}) "${release.title}": ${res.statusCode}`);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function storePhotos(release, releaseEntry) {
 | 
			
		||||
    if (release.photos.length === 0) {
 | 
			
		||||
        console.warn(`No photos available for (${release.site.name}, ${releaseEntry.id}}) "${release.title}"`);
 | 
			
		||||
async function storePhotos(release, releaseId) {
 | 
			
		||||
    if (!release.photos || release.photos.length === 0) {
 | 
			
		||||
        console.warn(`No photos available for (${release.site.name}, ${releaseId}}) "${release.title}"`);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    console.log(`Storing ${release.photos.length} photos for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
 | 
			
		||||
    console.log(`Storing ${release.photos.length} photos for (${release.site.name}, ${releaseId}) "${release.title}"`);
 | 
			
		||||
 | 
			
		||||
    const files = await Promise.map(release.photos, async (photoUrl, index) => {
 | 
			
		||||
        const { pathname } = new URL(photoUrl);
 | 
			
		||||
| 
						 | 
				
			
			@ -98,8 +98,8 @@ async function storePhotos(release, releaseEntry) {
 | 
			
		|||
            if (res.statusCode === 200) {
 | 
			
		||||
                const extension = mime.getExtension(mimetype);
 | 
			
		||||
 | 
			
		||||
                const filepath = path.join(release.site.network.slug, release.site.slug, releaseEntry.id.toString(), `${index + 1}.${extension}`);
 | 
			
		||||
                const thumbpath = path.join(release.site.network.slug, release.site.slug, releaseEntry.id.toString(), `${index + 1}_thumb.${extension}`);
 | 
			
		||||
                const filepath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `${index + 1}.${extension}`);
 | 
			
		||||
                const thumbpath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `${index + 1}_thumb.${extension}`);
 | 
			
		||||
                const hash = getHash(res.body);
 | 
			
		||||
 | 
			
		||||
                await Promise.all([
 | 
			
		||||
| 
						 | 
				
			
			@ -118,7 +118,7 @@ async function storePhotos(release, releaseEntry) {
 | 
			
		|||
 | 
			
		||||
            throw new Error(`Response ${res.statusCode} not OK`);
 | 
			
		||||
        } catch (error) {
 | 
			
		||||
            console.warn(`Failed to store photo ${index + 1} for "${release.title}" (${photoUrl}, ${release.url}, ${release.site.name}, ${releaseEntry.id}): ${error}`);
 | 
			
		||||
            console.warn(`Failed to store photo ${index + 1} for "${release.title}" (${photoUrl}, ${release.url}, ${release.site.name}, ${releaseId}): ${error}`);
 | 
			
		||||
 | 
			
		||||
            return null;
 | 
			
		||||
        }
 | 
			
		||||
| 
						 | 
				
			
			@ -136,24 +136,24 @@ async function storePhotos(release, releaseEntry) {
 | 
			
		|||
                source: file.source,
 | 
			
		||||
                index,
 | 
			
		||||
                domain: 'releases',
 | 
			
		||||
                target_id: releaseEntry.id,
 | 
			
		||||
                target_id: releaseId,
 | 
			
		||||
                role: 'photo',
 | 
			
		||||
            })));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function storeTrailer(release, releaseEntry) {
 | 
			
		||||
async function storeTrailer(release, releaseId) {
 | 
			
		||||
    if (!release.trailer || !release.trailer.src) {
 | 
			
		||||
        console.warn(`No trailer available for (${release.site.name}, ${releaseEntry.id}}) "${release.title}"`);
 | 
			
		||||
        console.warn(`No trailer available for (${release.site.name}, ${releaseId}}) "${release.title}"`);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    console.log(`Storing trailer for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
 | 
			
		||||
    console.log(`Storing trailer for (${release.site.name}, ${releaseId}) "${release.title}"`);
 | 
			
		||||
 | 
			
		||||
    const { pathname } = new URL(release.trailer.src);
 | 
			
		||||
    const mimetype = release.trailer.type || mime.getType(pathname);
 | 
			
		||||
 | 
			
		||||
    const res = await bhttp.get(release.trailer.src);
 | 
			
		||||
    const filepath = path.join(release.site.network.slug, release.site.slug, releaseEntry.id.toString(), `trailer${release.trailer.quality ? `_${release.trailer.quality}` : ''}.${mime.getExtension(mimetype)}`);
 | 
			
		||||
    const filepath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `trailer${release.trailer.quality ? `_${release.trailer.quality}` : ''}.${mime.getExtension(mimetype)}`);
 | 
			
		||||
 | 
			
		||||
    await Promise.all([
 | 
			
		||||
        fs.writeFile(path.join(config.media.path, filepath), res.body),
 | 
			
		||||
| 
						 | 
				
			
			@ -162,7 +162,7 @@ async function storeTrailer(release, releaseEntry) {
 | 
			
		|||
            mime: mimetype,
 | 
			
		||||
            source: release.trailer.src,
 | 
			
		||||
            domain: 'releases',
 | 
			
		||||
            target_id: releaseEntry.id,
 | 
			
		||||
            target_id: releaseId,
 | 
			
		||||
            role: 'trailer',
 | 
			
		||||
            quality: release.trailer.quality || null,
 | 
			
		||||
        }),
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4,8 +4,8 @@ const Promise = require('bluebird');
 | 
			
		|||
const knex = require('./knex');
 | 
			
		||||
const argv = require('./argv');
 | 
			
		||||
const whereOr = require('./utils/where-or');
 | 
			
		||||
const { storeTags } = require('./tags');
 | 
			
		||||
const { storeActors } = require('./actors');
 | 
			
		||||
const { associateTags } = require('./tags');
 | 
			
		||||
const { associateActors } = require('./actors');
 | 
			
		||||
const {
 | 
			
		||||
    createMediaDirectory,
 | 
			
		||||
    storePoster,
 | 
			
		||||
| 
						 | 
				
			
			@ -141,15 +141,6 @@ async function fetchReleases(queryObject = {}, options = {}) {
 | 
			
		|||
    return curateReleases(releases);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function fetchReleasesByEntryIds(entryIds, queryObject = {}, options = {}) {
 | 
			
		||||
    const releases = await knex('releases')
 | 
			
		||||
        .modify(commonQuery, options)
 | 
			
		||||
        .whereIn('entry_id', entryIds)
 | 
			
		||||
        .andWhere(builder => whereOr(queryObject, 'releases', builder));
 | 
			
		||||
 | 
			
		||||
    return curateReleases(releases);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function fetchSiteReleases(queryObject, options = {}) {
 | 
			
		||||
    const releases = await knex('releases')
 | 
			
		||||
        .modify(commonQuery, options)
 | 
			
		||||
| 
						 | 
				
			
			@ -192,41 +183,52 @@ async function fetchTagReleases(queryObject, options = {}) {
 | 
			
		|||
    return curateReleases(releases);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function storeReleaseAssets(release, releaseId) {
 | 
			
		||||
    await createMediaDirectory(release, releaseId);
 | 
			
		||||
 | 
			
		||||
    await Promise.all([
 | 
			
		||||
        associateActors(release, releaseId),
 | 
			
		||||
        associateTags(release, releaseId),
 | 
			
		||||
        storePhotos(release, releaseId),
 | 
			
		||||
        storePoster(release, releaseId),
 | 
			
		||||
        storeTrailer(release, releaseId),
 | 
			
		||||
    ]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function storeRelease(release) {
 | 
			
		||||
    const existingRelease = await knex('releases').where('entry_id', release.entryId).first();
 | 
			
		||||
    const curatedRelease = curateScrapedRelease(release);
 | 
			
		||||
 | 
			
		||||
    const releaseEntries = await knex('releases')
 | 
			
		||||
    if (existingRelease && !argv.redownload) {
 | 
			
		||||
        return existingRelease.id;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (existingRelease && argv.redownload) {
 | 
			
		||||
        const [updatedRelease] = await knex('releases')
 | 
			
		||||
            .where('entry_id', existingRelease.id)
 | 
			
		||||
            .update({
 | 
			
		||||
                ...existingRelease,
 | 
			
		||||
                ...curatedRelease,
 | 
			
		||||
            })
 | 
			
		||||
            .returning('*');
 | 
			
		||||
 | 
			
		||||
        await storeReleaseAssets(release, existingRelease.id);
 | 
			
		||||
        console.log(`Updated release "${release.title}" (${existingRelease.id}, ${release.site.name})`);
 | 
			
		||||
 | 
			
		||||
        return updatedRelease || existingRelease;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const [releaseEntry] = await knex('releases')
 | 
			
		||||
        .insert(curatedRelease)
 | 
			
		||||
        .returning('*');
 | 
			
		||||
 | 
			
		||||
    if (releaseEntries.length) {
 | 
			
		||||
        const releaseEntry = releaseEntries[0];
 | 
			
		||||
 | 
			
		||||
        console.log(`Stored (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
 | 
			
		||||
 | 
			
		||||
        await createMediaDirectory(release, releaseEntry.id);
 | 
			
		||||
 | 
			
		||||
        await Promise.all([
 | 
			
		||||
            storeActors(release, releaseEntry),
 | 
			
		||||
            storeTags(release, releaseEntry),
 | 
			
		||||
            storePhotos(release, releaseEntry),
 | 
			
		||||
            storePoster(release, releaseEntry),
 | 
			
		||||
            storeTrailer(release, releaseEntry),
 | 
			
		||||
        ]);
 | 
			
		||||
 | 
			
		||||
        return releaseEntry.id;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    console.error(`Unable to save scene to database, possible collision: "${release.title}" (${release.site.name})`);
 | 
			
		||||
    await storeReleaseAssets(release, releaseEntry.id);
 | 
			
		||||
    console.log(`Stored release "${release.title}" (${releaseEntry.id}, ${release.site.name})`);
 | 
			
		||||
 | 
			
		||||
    return null;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function storeReleases(releases) {
 | 
			
		||||
    const existingReleases = await fetchReleasesByEntryIds(releases.map(release => release.entryId));
 | 
			
		||||
 | 
			
		||||
    console.log(existingReleases);
 | 
			
		||||
 | 
			
		||||
    return Promise.map(releases, async (release) => {
 | 
			
		||||
        try {
 | 
			
		||||
            const releaseId = await storeRelease(release);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -7,6 +7,7 @@ const scrapers = require('./scrapers/scrapers');
 | 
			
		|||
const { storeReleases } = require('./releases');
 | 
			
		||||
const { findSiteByUrl } = require('./sites');
 | 
			
		||||
const { findNetworkByUrl } = require('./networks');
 | 
			
		||||
const { scrapeBasicActors } = require('./actors');
 | 
			
		||||
 | 
			
		||||
async function findSite(url, release) {
 | 
			
		||||
    const site = (release && release.site) || await findSiteByUrl(url);
 | 
			
		||||
| 
						 | 
				
			
			@ -48,7 +49,10 @@ async function scrapeRelease(url, release, deep = false) {
 | 
			
		|||
 | 
			
		||||
    if (!deep && argv.save) {
 | 
			
		||||
        // don't store release when called by site scraper
 | 
			
		||||
        const releaseId = await storeReleases([scene]);
 | 
			
		||||
        const releaseId = await Promise.all([
 | 
			
		||||
            storeReleases([scene]),
 | 
			
		||||
            scrapeBasicActors(),
 | 
			
		||||
        ]);
 | 
			
		||||
 | 
			
		||||
        console.log(`http://${config.web.host}:${config.web.port}/scene/${releaseId}`);
 | 
			
		||||
    }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -9,6 +9,7 @@ const { fetchIncludedSites } = require('./sites');
 | 
			
		|||
const scrapers = require('./scrapers/scrapers');
 | 
			
		||||
const scrapeRelease = require('./scrape-release');
 | 
			
		||||
const { storeReleases } = require('./releases');
 | 
			
		||||
const { scrapeBasicActors } = require('./actors');
 | 
			
		||||
 | 
			
		||||
function getAfterDate() {
 | 
			
		||||
    return moment
 | 
			
		||||
| 
						 | 
				
			
			@ -58,7 +59,7 @@ async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), a
 | 
			
		|||
 | 
			
		||||
async function scrapeUpcomingReleases(scraper, site) {
 | 
			
		||||
    if (scraper.fetchUpcoming) {
 | 
			
		||||
        const upcomingReleases = scraper.fetchUpcoming(site);
 | 
			
		||||
        const upcomingReleases = await scraper.fetchUpcoming(site);
 | 
			
		||||
 | 
			
		||||
        return upcomingReleases.map(release => ({ ...release, upcoming: true }));
 | 
			
		||||
    }
 | 
			
		||||
| 
						 | 
				
			
			@ -131,6 +132,8 @@ async function scrapeReleases() {
 | 
			
		|||
    }, {
 | 
			
		||||
        concurrency: 2,
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    await scrapeBasicActors();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
module.exports = scrapeReleases;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -19,7 +19,9 @@ async function scrapeActorFrontpage(html, url, name) {
 | 
			
		|||
    const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
 | 
			
		||||
 | 
			
		||||
    const birthdateString = bio['Date of Birth:'];
 | 
			
		||||
    const birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
 | 
			
		||||
    const birthdate = birthdateString && birthdateString !== 'Unknown (Add)'
 | 
			
		||||
        ? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate()
 | 
			
		||||
        : null;
 | 
			
		||||
 | 
			
		||||
    const boobsSizeString = bio['Measurements:'];
 | 
			
		||||
    const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString;
 | 
			
		||||
| 
						 | 
				
			
			@ -74,8 +76,9 @@ async function scrapeActorBio(html, frontpageBio, url, name) {
 | 
			
		|||
    const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
 | 
			
		||||
 | 
			
		||||
    const birthdateString = bio['Date of Birth:'];
 | 
			
		||||
    const birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
 | 
			
		||||
    const active = bio['Career Status:'].trim() === 'Active';
 | 
			
		||||
    const birthdate = birthdateString && birthdateString !== 'Unknown'
 | 
			
		||||
        ? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate()
 | 
			
		||||
        : null;
 | 
			
		||||
 | 
			
		||||
    const boobsSizeString = bio['Measurements:'];
 | 
			
		||||
    const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString;
 | 
			
		||||
| 
						 | 
				
			
			@ -114,7 +117,6 @@ async function scrapeActorBio(html, frontpageBio, url, name) {
 | 
			
		|||
        eyes,
 | 
			
		||||
        piercings,
 | 
			
		||||
        tattoos,
 | 
			
		||||
        active,
 | 
			
		||||
        social,
 | 
			
		||||
    };
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -124,11 +126,16 @@ async function fetchActor(actorName) {
 | 
			
		|||
    const frontpageUrl = `https://freeones.com/html/v_links/${slug}`;
 | 
			
		||||
 | 
			
		||||
    const resFrontpage = await bhttp.get(frontpageUrl);
 | 
			
		||||
    const { url, bio } = await scrapeActorFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
 | 
			
		||||
 | 
			
		||||
    const resBio = await bhttp.get(url);
 | 
			
		||||
    if (resFrontpage.statusCode === 200) {
 | 
			
		||||
        const { url, bio } = await scrapeActorFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
 | 
			
		||||
 | 
			
		||||
    return scrapeActorBio(resBio.body.toString(), bio, url, actorName);
 | 
			
		||||
        const resBio = await bhttp.get(url);
 | 
			
		||||
 | 
			
		||||
        return scrapeActorBio(resBio.body.toString(), bio, url, actorName);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return null;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
module.exports = {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -24,15 +24,15 @@ function curateTags(tags) {
 | 
			
		|||
    return Promise.all(tags.map(async tag => curateTag(tag)));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function storeTags(release, releaseEntry) {
 | 
			
		||||
async function associateTags(release, releaseId) {
 | 
			
		||||
    if (!release.tags || release.tags.length === 0) {
 | 
			
		||||
        console.warn(`No tags available for (${release.site.name}, ${releaseEntry.id}}) "${release.title}"`);
 | 
			
		||||
        console.warn(`No tags available for (${release.site.name}, ${releaseId}}) "${release.title}"`);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    await knex('tags_associated').insert(release.tags.map(tagId => ({
 | 
			
		||||
        tag_id: tagId,
 | 
			
		||||
        release_id: releaseEntry.id,
 | 
			
		||||
        release_id: releaseId,
 | 
			
		||||
    })));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -74,7 +74,7 @@ async function matchTags(rawTags) {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
module.exports = {
 | 
			
		||||
    storeTags,
 | 
			
		||||
    associateTags,
 | 
			
		||||
    fetchTags,
 | 
			
		||||
    matchTags,
 | 
			
		||||
};
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue