Scraping actor profiles from FreeOnes.

This commit is contained in:
2019-11-17 03:56:45 +01:00
parent abcdb52335
commit e8130c3634
268 changed files with 19161 additions and 102 deletions

View File

@@ -100,52 +100,6 @@ function curateScrapedRelease(release) {
};
}
async function storeRelease(release) {
const curatedRelease = curateScrapedRelease(release);
const releaseEntries = await knex('releases')
.insert(curatedRelease)
.returning('*');
if (releaseEntries.length) {
const releaseEntry = releaseEntries[0];
console.log(`Stored (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
await createMediaDirectory(release, releaseEntry.id);
await Promise.all([
storeActors(release, releaseEntry),
storeTags(release, releaseEntry),
storePhotos(release, releaseEntry),
storePoster(release, releaseEntry),
storeTrailer(release, releaseEntry),
]);
return releaseEntry.id;
}
console.error(`Unable to save scene to database, possible collision: "${release.title}" (${release.site.name})`);
return null;
}
async function storeReleases(releases) {
return Promise.map(releases, async (release) => {
try {
const releaseId = await storeRelease(release);
return releaseId;
} catch (error) {
console.error(error);
return null;
}
}, {
concurrency: 2,
});
}
function commonQuery(queryBuilder, {
filter = [],
after = new Date(0), // January 1970
@@ -187,6 +141,15 @@ async function fetchReleases(queryObject = {}, options = {}) {
return curateReleases(releases);
}
async function fetchReleasesByEntryIds(entryIds, queryObject = {}, options = {}) {
const releases = await knex('releases')
.modify(commonQuery, options)
.whereIn('entry_id', entryIds)
.andWhere(builder => whereOr(queryObject, 'releases', builder));
return curateReleases(releases);
}
async function fetchSiteReleases(queryObject, options = {}) {
const releases = await knex('releases')
.modify(commonQuery, options)
@@ -229,6 +192,56 @@ async function fetchTagReleases(queryObject, options = {}) {
return curateReleases(releases);
}
async function storeRelease(release) {
const curatedRelease = curateScrapedRelease(release);
const releaseEntries = await knex('releases')
.insert(curatedRelease)
.returning('*');
if (releaseEntries.length) {
const releaseEntry = releaseEntries[0];
console.log(`Stored (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
await createMediaDirectory(release, releaseEntry.id);
await Promise.all([
storeActors(release, releaseEntry),
storeTags(release, releaseEntry),
storePhotos(release, releaseEntry),
storePoster(release, releaseEntry),
storeTrailer(release, releaseEntry),
]);
return releaseEntry.id;
}
console.error(`Unable to save scene to database, possible collision: "${release.title}" (${release.site.name})`);
return null;
}
async function storeReleases(releases) {
const existingReleases = await fetchReleasesByEntryIds(releases.map(release => release.entryId));
console.log(existingReleases);
return Promise.map(releases, async (release) => {
try {
const releaseId = await storeRelease(release);
return releaseId;
} catch (error) {
console.error(error);
return null;
}
}, {
concurrency: 2,
});
}
module.exports = {
fetchReleases,
fetchActorReleases,