diff --git a/assets/components/actor/actor.vue b/assets/components/actor/actor.vue
index 0cb23c53..c3442704 100644
--- a/assets/components/actor/actor.vue
+++ b/assets/components/actor/actor.vue
@@ -20,6 +20,11 @@
{{ actor.aliases.join(', ') }}
+
+ Gender
+ {{ actor.gender }}
+
+
Date of birth
{{ formatDate(actor.birthdate, 'MMMM D, YYYY') }} ({{ age }})
@@ -51,7 +56,7 @@
{{ actor.residencePlace }}
-
+
Ethnicity
{{ actor.ethnicity }}
@@ -61,7 +66,7 @@
{{ actor.height }}
-
+
Boobs
{{ actor.boobSize }}
{{ actor.boobsNatural ? 'Natural' : 'Enhanced' }}
diff --git a/migrations/20190325001339_releases.js b/migrations/20190325001339_releases.js
index 5ff4ff1c..297d82a9 100644
--- a/migrations/20190325001339_releases.js
+++ b/migrations/20190325001339_releases.js
@@ -53,6 +53,9 @@ exports.up = knex => Promise.resolve()
table.datetime('created_at')
.defaultTo(knex.fn.now());
+
+ table.datetime('scraped_at');
+ table.boolean('scrape_success');
}))
.then(() => knex.schema.createTable('directors', (table) => {
table.increments('id', 12);
@@ -229,6 +232,8 @@ exports.up = knex => Promise.resolve()
.notNullable()
.references('id')
.inTable('actors');
+
+ table.unique(['release_id', 'actor_id']);
}))
.then(() => knex.schema.createTable('directors_associated', (table) => {
table.increments('id', 16);
@@ -242,6 +247,8 @@ exports.up = knex => Promise.resolve()
.notNullable()
.references('id')
.inTable('directors');
+
+ table.unique(['release_id', 'director_id']);
}))
.then(() => knex.schema.createTable('tags_associated', (table) => {
table.integer('tag_id', 12)
@@ -256,6 +263,9 @@ exports.up = knex => Promise.resolve()
table.integer('release_id', 16)
.references('id')
.inTable('releases');
+
+ table.unique(['release_id', 'tag_id']);
+ table.unique(['site_id', 'tag_id']);
}));
exports.down = knex => Promise.resolve()
diff --git a/src/actors.js b/src/actors.js
index 3f20eaec..319e65d8 100644
--- a/src/actors.js
+++ b/src/actors.js
@@ -12,6 +12,7 @@ async function curateActor(actor) {
return {
id: actor.id,
+ gender: actor.gender,
name: actor.name,
description: actor.description,
birthdate: actor.birthdate && new Date(actor.birthdate),
@@ -43,10 +44,13 @@ function curateActors(releases) {
return Promise.all(releases.map(async release => curateActor(release)));
}
-function curateScrapedActor(actor) {
- return {
+function curateActorEntry(actor, scraped, scrapeSuccess) {
+ const curatedActor = {
id: actor.id,
- name: actor.name,
+ name: actor.name
+ .split(' ')
+ .map(segment => `${segment.charAt(0).toUpperCase()}${segment.slice(1)}`)
+ .join(' '),
slug: actor.name.toLowerCase().replace(/\s+/g, '-'),
birthdate: actor.birthdate,
description: actor.description,
@@ -65,6 +69,16 @@ function curateScrapedActor(actor) {
tattoos: actor.tattoos,
piercings: actor.piercings,
};
+
+ if (scraped) {
+ return {
+ ...curatedActor,
+ scraped_at: new Date(),
+ scrape_success: scrapeSuccess,
+ };
+ }
+
+ return curatedActor;
}
async function fetchActors(queryObject) {
@@ -82,8 +96,8 @@ async function fetchActors(queryObject) {
return curateActors(releases);
}
-async function storeActor(actor) {
- const curatedActor = curateScrapedActor(actor);
+async function storeActor(actor, scraped = false, scrapeSuccess = false) {
+ const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
const actorEntries = await knex('actors')
.insert(curatedActor)
@@ -102,8 +116,8 @@ async function storeActor(actor) {
return null;
}
-async function updateActor(actorEntry, actor) {
- const curatedActor = curateScrapedActor(actor);
+async function updateActor(actorEntry, actor, scraped = false, scrapeSuccess = false) {
+ const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
const actorEntries = await knex('actors')
.where({ id: actorEntry.id })
@@ -117,39 +131,59 @@ async function updateActor(actorEntry, actor) {
async function scrapeActors(actorNames) {
await Promise.map(actorNames || argv.actors, async (actorName) => {
- const [actorEntry] = await fetchActors({ name: actorName });
- const profiles = await Promise.all(Object.values(scrapers.actors).map(scraper => scraper.fetchActor(actorName)));
+ const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
- if (actorEntry) {
- return updateActor(actorEntry, profiles[0]);
+ const [actorEntry] = await fetchActors({ slug: actorSlug });
+ const profiles = await Promise.all(Object.values(scrapers.actors).map(scraper => scraper.fetchActor(actorEntry ? actorEntry.name : actorName)));
+
+ if (profiles[0] === null) {
+ console.log(`Could not find profile for actor '${actorName}'`);
+ return updateActor(actorEntry, actorEntry, true, false);
}
- return storeActor(profiles[0]);
+ if (actorEntry && profiles[0]) {
+ return updateActor(actorEntry, profiles[0], true, true);
+ }
+
+ return storeActor(profiles[0], true, true);
}, {
- concurrency: 5,
+ concurrency: 1,
});
}
-async function storeActors(release, releaseEntry) {
+async function scrapeBasicActors() {
+ const basicActors = await knex('actors').where('scraped_at', null);
+
+ return scrapeActors(basicActors.map(actor => actor.name));
+}
+
+async function associateActors(release, releaseId) {
const actorEntries = await knex('actors').whereIn('name', release.actors);
const newActors = release.actors
.map(actorName => actorName.trim())
.filter(actorName => !actorEntries.some(actor => actor.name === actorName));
- const newActorEntries = await Promise.all(newActors.map(async actorName => storeActor({ name: actorName })));
+ const [newActorEntries, associatedActors] = await Promise.all([
+ Promise.all(newActors.map(async actorName => storeActor({ name: actorName }))),
+ knex('actors_associated').where('release_id', releaseId),
+ ]);
+
+ const newlyAssociatedActors = actorEntries
+ .concat(newActorEntries)
+ .filter(actorEntry => !associatedActors.some(actor => actorEntry.id === actor.id))
+ .map(actor => ({
+ release_id: releaseId,
+ actor_id: actor.id,
+ }));
await knex('actors_associated')
- .insert(actorEntries.concat(newActorEntries).map(actor => ({
- release_id: releaseEntry.id,
- actor_id: actor.id,
- })), '*');
-
- scrapeActors(newActorEntries.map(actor => actor.name));
+ .insert(newlyAssociatedActors);
}
module.exports = {
+ associateActors,
fetchActors,
scrapeActors,
- storeActors,
+ scrapeBasicActors,
};
diff --git a/src/app.js b/src/app.js
index 3d8442da..d43224bb 100644
--- a/src/app.js
+++ b/src/app.js
@@ -6,7 +6,7 @@ const initServer = require('./web/server');
const scrapeSites = require('./scrape-sites');
const scrapeRelease = require('./scrape-release');
-const { scrapeActors } = require('./actors');
+const { scrapeActors, scrapeBasicActors } = require('./actors');
async function init() {
if (argv.url) {
@@ -24,13 +24,20 @@ async function init() {
return;
}
- if (argv.actors) {
+ if (argv.actors && argv.actors.length > 0) {
await scrapeActors();
knex.destroy();
return;
}
+ if (argv.actors) {
+ await scrapeBasicActors();
+ knex.destroy();
+
+ return;
+ }
+
await initServer();
}
diff --git a/src/media.js b/src/media.js
index e75c1693..9fe243a0 100644
--- a/src/media.js
+++ b/src/media.js
@@ -37,13 +37,13 @@ async function createMediaDirectory(release, releaseId) {
}
}
-async function storePoster(release, releaseEntry) {
+async function storePoster(release, releaseId) {
if (!release.poster) {
- console.warn(`No poster available for (${release.site.name}, ${releaseEntry.id}}) "${release.title}"`);
+ console.warn(`No poster available for (${release.site.name}, ${releaseId}}) "${release.title}"`);
return;
}
- console.log(`Storing poster for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
+ console.log(`Storing poster for (${release.site.name}, ${releaseId}) "${release.title}"`);
const res = await bhttp.get(release.poster);
const thumbnail = await getThumbnail(res.body);
@@ -53,8 +53,8 @@ async function storePoster(release, releaseEntry) {
const mimetype = res.headers['content-type'] || mime.getType(pathname) || 'image/jpeg';
const extension = mime.getExtension(mimetype);
- const filepath = path.join(release.site.network.slug, release.site.slug, releaseEntry.id.toString(), `poster.${extension}`);
- const thumbpath = path.join(release.site.network.slug, release.site.slug, releaseEntry.id.toString(), `poster_thumb.${extension}`);
+ const filepath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `poster.${extension}`);
+ const thumbpath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `poster_thumb.${extension}`);
const hash = getHash(res.body);
await Promise.all([
@@ -69,23 +69,23 @@ async function storePoster(release, releaseEntry) {
hash,
source: release.poster,
domain: 'releases',
- target_id: releaseEntry.id,
+ target_id: releaseId,
role: 'poster',
});
return;
}
- console.warn(`Failed to store poster for (${release.site.name}, ${releaseEntry.id}) "${release.title}": ${res.statusCode}`);
+ console.warn(`Failed to store poster for (${release.site.name}, ${releaseId}) "${release.title}": ${res.statusCode}`);
}
-async function storePhotos(release, releaseEntry) {
- if (release.photos.length === 0) {
- console.warn(`No photos available for (${release.site.name}, ${releaseEntry.id}}) "${release.title}"`);
+async function storePhotos(release, releaseId) {
+ if (!release.photos || release.photos.length === 0) {
+ console.warn(`No photos available for (${release.site.name}, ${releaseId}}) "${release.title}"`);
return;
}
- console.log(`Storing ${release.photos.length} photos for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
+ console.log(`Storing ${release.photos.length} photos for (${release.site.name}, ${releaseId}) "${release.title}"`);
const files = await Promise.map(release.photos, async (photoUrl, index) => {
const { pathname } = new URL(photoUrl);
@@ -98,8 +98,8 @@ async function storePhotos(release, releaseEntry) {
if (res.statusCode === 200) {
const extension = mime.getExtension(mimetype);
- const filepath = path.join(release.site.network.slug, release.site.slug, releaseEntry.id.toString(), `${index + 1}.${extension}`);
- const thumbpath = path.join(release.site.network.slug, release.site.slug, releaseEntry.id.toString(), `${index + 1}_thumb.${extension}`);
+ const filepath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `${index + 1}.${extension}`);
+ const thumbpath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `${index + 1}_thumb.${extension}`);
const hash = getHash(res.body);
await Promise.all([
@@ -118,7 +118,7 @@ async function storePhotos(release, releaseEntry) {
throw new Error(`Response ${res.statusCode} not OK`);
} catch (error) {
- console.warn(`Failed to store photo ${index + 1} for "${release.title}" (${photoUrl}, ${release.url}, ${release.site.name}, ${releaseEntry.id}): ${error}`);
+ console.warn(`Failed to store photo ${index + 1} for "${release.title}" (${photoUrl}, ${release.url}, ${release.site.name}, ${releaseId}): ${error}`);
return null;
}
@@ -136,24 +136,24 @@ async function storePhotos(release, releaseEntry) {
source: file.source,
index,
domain: 'releases',
- target_id: releaseEntry.id,
+ target_id: releaseId,
role: 'photo',
})));
}
-async function storeTrailer(release, releaseEntry) {
+async function storeTrailer(release, releaseId) {
if (!release.trailer || !release.trailer.src) {
- console.warn(`No trailer available for (${release.site.name}, ${releaseEntry.id}}) "${release.title}"`);
+ console.warn(`No trailer available for (${release.site.name}, ${releaseId}}) "${release.title}"`);
return;
}
- console.log(`Storing trailer for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
+ console.log(`Storing trailer for (${release.site.name}, ${releaseId}) "${release.title}"`);
const { pathname } = new URL(release.trailer.src);
const mimetype = release.trailer.type || mime.getType(pathname);
const res = await bhttp.get(release.trailer.src);
- const filepath = path.join(release.site.network.slug, release.site.slug, releaseEntry.id.toString(), `trailer${release.trailer.quality ? `_${release.trailer.quality}` : ''}.${mime.getExtension(mimetype)}`);
+ const filepath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `trailer${release.trailer.quality ? `_${release.trailer.quality}` : ''}.${mime.getExtension(mimetype)}`);
await Promise.all([
fs.writeFile(path.join(config.media.path, filepath), res.body),
@@ -162,7 +162,7 @@ async function storeTrailer(release, releaseEntry) {
mime: mimetype,
source: release.trailer.src,
domain: 'releases',
- target_id: releaseEntry.id,
+ target_id: releaseId,
role: 'trailer',
quality: release.trailer.quality || null,
}),
diff --git a/src/releases.js b/src/releases.js
index 925a7e6e..be7c3d21 100644
--- a/src/releases.js
+++ b/src/releases.js
@@ -4,8 +4,8 @@ const Promise = require('bluebird');
const knex = require('./knex');
const argv = require('./argv');
const whereOr = require('./utils/where-or');
-const { storeTags } = require('./tags');
-const { storeActors } = require('./actors');
+const { associateTags } = require('./tags');
+const { associateActors } = require('./actors');
const {
createMediaDirectory,
storePoster,
@@ -141,15 +141,6 @@ async function fetchReleases(queryObject = {}, options = {}) {
return curateReleases(releases);
}
-async function fetchReleasesByEntryIds(entryIds, queryObject = {}, options = {}) {
- const releases = await knex('releases')
- .modify(commonQuery, options)
- .whereIn('entry_id', entryIds)
- .andWhere(builder => whereOr(queryObject, 'releases', builder));
-
- return curateReleases(releases);
-}
-
async function fetchSiteReleases(queryObject, options = {}) {
const releases = await knex('releases')
.modify(commonQuery, options)
@@ -192,41 +183,52 @@ async function fetchTagReleases(queryObject, options = {}) {
return curateReleases(releases);
}
+async function storeReleaseAssets(release, releaseId) {
+ await createMediaDirectory(release, releaseId);
+
+ await Promise.all([
+ associateActors(release, releaseId),
+ associateTags(release, releaseId),
+ storePhotos(release, releaseId),
+ storePoster(release, releaseId),
+ storeTrailer(release, releaseId),
+ ]);
+}
+
async function storeRelease(release) {
+ const existingRelease = await knex('releases').where('entry_id', release.entryId).first();
const curatedRelease = curateScrapedRelease(release);
- const releaseEntries = await knex('releases')
+ if (existingRelease && !argv.redownload) {
+ return existingRelease.id;
+ }
+
+ if (existingRelease && argv.redownload) {
+ const [updatedRelease] = await knex('releases')
+ .where('entry_id', existingRelease.id)
+ .update({
+ ...existingRelease,
+ ...curatedRelease,
+ })
+ .returning('*');
+
+ await storeReleaseAssets(release, existingRelease.id);
+ console.log(`Updated release "${release.title}" (${existingRelease.id}, ${release.site.name})`);
+
+ return updatedRelease || existingRelease;
+ }
+
+ const [releaseEntry] = await knex('releases')
.insert(curatedRelease)
.returning('*');
- if (releaseEntries.length) {
- const releaseEntry = releaseEntries[0];
-
- console.log(`Stored (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
-
- await createMediaDirectory(release, releaseEntry.id);
-
- await Promise.all([
- storeActors(release, releaseEntry),
- storeTags(release, releaseEntry),
- storePhotos(release, releaseEntry),
- storePoster(release, releaseEntry),
- storeTrailer(release, releaseEntry),
- ]);
-
- return releaseEntry.id;
- }
-
- console.error(`Unable to save scene to database, possible collision: "${release.title}" (${release.site.name})`);
+ await storeReleaseAssets(release, releaseEntry.id);
+ console.log(`Stored release "${release.title}" (${releaseEntry.id}, ${release.site.name})`);
return null;
}
async function storeReleases(releases) {
- const existingReleases = await fetchReleasesByEntryIds(releases.map(release => release.entryId));
-
- console.log(existingReleases);
-
return Promise.map(releases, async (release) => {
try {
const releaseId = await storeRelease(release);
diff --git a/src/scrape-release.js b/src/scrape-release.js
index 81be87c4..db5f5453 100644
--- a/src/scrape-release.js
+++ b/src/scrape-release.js
@@ -7,6 +7,7 @@ const scrapers = require('./scrapers/scrapers');
const { storeReleases } = require('./releases');
const { findSiteByUrl } = require('./sites');
const { findNetworkByUrl } = require('./networks');
+const { scrapeBasicActors } = require('./actors');
async function findSite(url, release) {
const site = (release && release.site) || await findSiteByUrl(url);
@@ -48,7 +49,10 @@ async function scrapeRelease(url, release, deep = false) {
if (!deep && argv.save) {
// don't store release when called by site scraper
- const releaseId = await storeReleases([scene]);
+ const releaseId = await Promise.all([
+ storeReleases([scene]),
+ scrapeBasicActors(),
+ ]);
console.log(`http://${config.web.host}:${config.web.port}/scene/${releaseId}`);
}
diff --git a/src/scrape-sites.js b/src/scrape-sites.js
index 410261de..961772b3 100644
--- a/src/scrape-sites.js
+++ b/src/scrape-sites.js
@@ -9,6 +9,7 @@ const { fetchIncludedSites } = require('./sites');
const scrapers = require('./scrapers/scrapers');
const scrapeRelease = require('./scrape-release');
const { storeReleases } = require('./releases');
+const { scrapeBasicActors } = require('./actors');
function getAfterDate() {
return moment
@@ -58,7 +59,7 @@ async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), a
async function scrapeUpcomingReleases(scraper, site) {
if (scraper.fetchUpcoming) {
- const upcomingReleases = scraper.fetchUpcoming(site);
+ const upcomingReleases = await scraper.fetchUpcoming(site);
return upcomingReleases.map(release => ({ ...release, upcoming: true }));
}
@@ -131,6 +132,8 @@ async function scrapeReleases() {
}, {
concurrency: 2,
});
+
+ await scrapeBasicActors();
}
module.exports = scrapeReleases;
diff --git a/src/scrapers/freeones.js b/src/scrapers/freeones.js
index 6bdf1324..47f7f005 100644
--- a/src/scrapers/freeones.js
+++ b/src/scrapers/freeones.js
@@ -19,7 +19,9 @@ async function scrapeActorFrontpage(html, url, name) {
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
const birthdateString = bio['Date of Birth:'];
- const birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
+ const birthdate = birthdateString && birthdateString !== 'Unknown (Add)'
+ ? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate()
+ : null;
const boobsSizeString = bio['Measurements:'];
const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString;
@@ -74,8 +76,9 @@ async function scrapeActorBio(html, frontpageBio, url, name) {
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
const birthdateString = bio['Date of Birth:'];
- const birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
- const active = bio['Career Status:'].trim() === 'Active';
+ const birthdate = birthdateString && birthdateString !== 'Unknown'
+ ? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate()
+ : null;
const boobsSizeString = bio['Measurements:'];
const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString;
@@ -114,7 +117,6 @@ async function scrapeActorBio(html, frontpageBio, url, name) {
eyes,
piercings,
tattoos,
- active,
social,
};
}
@@ -124,11 +126,16 @@ async function fetchActor(actorName) {
const frontpageUrl = `https://freeones.com/html/v_links/${slug}`;
const resFrontpage = await bhttp.get(frontpageUrl);
- const { url, bio } = await scrapeActorFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
- const resBio = await bhttp.get(url);
+ if (resFrontpage.statusCode === 200) {
+ const { url, bio } = await scrapeActorFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
- return scrapeActorBio(resBio.body.toString(), bio, url, actorName);
+ const resBio = await bhttp.get(url);
+
+ return scrapeActorBio(resBio.body.toString(), bio, url, actorName);
+ }
+
+ return null;
}
module.exports = {
diff --git a/src/tags.js b/src/tags.js
index 77d23490..ce0d4cab 100644
--- a/src/tags.js
+++ b/src/tags.js
@@ -24,15 +24,15 @@ function curateTags(tags) {
return Promise.all(tags.map(async tag => curateTag(tag)));
}
-async function storeTags(release, releaseEntry) {
+async function associateTags(release, releaseId) {
if (!release.tags || release.tags.length === 0) {
- console.warn(`No tags available for (${release.site.name}, ${releaseEntry.id}}) "${release.title}"`);
+ console.warn(`No tags available for (${release.site.name}, ${releaseId}}) "${release.title}"`);
return;
}
await knex('tags_associated').insert(release.tags.map(tagId => ({
tag_id: tagId,
- release_id: releaseEntry.id,
+ release_id: releaseId,
})));
}
@@ -74,7 +74,7 @@ async function matchTags(rawTags) {
}
module.exports = {
- storeTags,
+ associateTags,
fetchTags,
matchTags,
};