Improved actor extraction for fcuk scraper. Changed 'copyright' to 'credit'. Redused entity page favicon size.

This commit is contained in:
DebaucheryLibrarian 2020-07-15 05:12:29 +02:00
parent c62df2228b
commit 5b886b3917
8 changed files with 18 additions and 10 deletions

View File

@ -43,7 +43,7 @@
> >
<img <img
:src="sfw ? `/img/${actor.avatar.sfw.thumbnail}` : `/media/${actor.avatar.thumbnail}`" :src="sfw ? `/img/${actor.avatar.sfw.thumbnail}` : `/media/${actor.avatar.thumbnail}`"
:title="actor.avatar.copyright && `© ${actor.avatar.copyright}`" :title="actor.avatar.credit && `© ${actor.avatar.credit}`"
class="avatar" class="avatar"
> >
</a> </a>

View File

@ -18,7 +18,7 @@
:src="sfw ? `/img/${actor.avatar.sfw.thumbnail}` : `/media/${actor.avatar.thumbnail}`" :src="sfw ? `/img/${actor.avatar.sfw.thumbnail}` : `/media/${actor.avatar.thumbnail}`"
:data-src="sfw ? `/img/${actor.avatar.sfw.thumbnail}` : `/media/${actor.avatar.thumbnail}`" :data-src="sfw ? `/img/${actor.avatar.sfw.thumbnail}` : `/media/${actor.avatar.thumbnail}`"
:data-loading="sfw ? `/img/${actor.avatar.sfw.lazy}` : `/media/${actor.avatar.lazy}`" :data-loading="sfw ? `/img/${actor.avatar.sfw.lazy}` : `/media/${actor.avatar.lazy}`"
:title="actor.avatar.copyright && `© ${actor.avatar.copyright}`" :title="actor.avatar.credit && `© ${actor.avatar.credit}`"
class="avatar photo" class="avatar photo"
@load="$parent.$emit('load')" @load="$parent.$emit('load')"
> >
@ -36,7 +36,7 @@
:src="sfw ? `/img/${photo.sfw.thumbnail}` : `/media/${photo.thumbnail}`" :src="sfw ? `/img/${photo.sfw.thumbnail}` : `/media/${photo.thumbnail}`"
:data-src="sfw ? `/img/${photo.sfw.thumbnail}` : `/media/${photo.thumbnail}`" :data-src="sfw ? `/img/${photo.sfw.thumbnail}` : `/media/${photo.thumbnail}`"
:data-loading="sfw ? `/img/${photo.sfw.lazy}` : `/media/${photo.lazy}`" :data-loading="sfw ? `/img/${photo.sfw.lazy}` : `/media/${photo.lazy}`"
:title="`© ${photo.copyright || photo.entity.name}`" :title="`© ${photo.credit || photo.entity.name}`"
class="photo" class="photo"
@load="$parent.$emit('load')" @load="$parent.$emit('load')"
> >

View File

@ -215,11 +215,14 @@ export default {
height: 2.5rem; height: 2.5rem;
} }
.logo-parent, .logo-parent {
.favicon {
height: 1.5rem; height: 1.5rem;
} }
.favicon {
height: 1rem;
}
.name { .name {
color: var(--text-light); color: var(--text-light);
display: flex; display: flex;

View File

@ -116,7 +116,7 @@ function initUiActions(_store, _router) {
thumbnail thumbnail
lazy lazy
comment comment
copyright credit
} }
birthCountry: countryByBirthCountryAlpha2 { birthCountry: countryByBirthCountryAlpha2 {
alpha2 alpha2
@ -135,7 +135,7 @@ function initUiActions(_store, _router) {
thumbnail thumbnail
lazy lazy
comment comment
copyright credit
} }
birthCountry: countryByBirthCountryAlpha2 { birthCountry: countryByBirthCountryAlpha2 {
alpha2 alpha2

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

View File

@ -180,6 +180,7 @@ const tags = [
{ {
name: 'behind the scenes', name: 'behind the scenes',
slug: 'behind-the-scenes', slug: 'behind-the-scenes',
priority: 6,
}, },
{ {
name: 'big dick', name: 'big dick',

View File

@ -2,6 +2,8 @@
const qu = require('../utils/qu'); const qu = require('../utils/qu');
// TODO: profile scraping
function scrapeLatestBlog(scenes, channel) { function scrapeLatestBlog(scenes, channel) {
return scenes.map(({ query }) => { return scenes.map(({ query }) => {
const release = {}; const release = {};
@ -18,7 +20,10 @@ function scrapeLatestBlog(scenes, channel) {
release.description = query.text('p'); release.description = query.text('p');
release.date = query.date('h5 strong, .videos h3', 'MMM. DD, YYYY', /\w+. \d{2}, \d{4}/); release.date = query.date('h5 strong, .videos h3', 'MMM. DD, YYYY', /\w+. \d{2}, \d{4}/);
if (!/\band\b/.test(release.title) && new RegExp(release.title).test(release.description)) { // remove common patterns so only the name is left
const curatedTitle = release.title.replace(/\b(part \d|\banal|bts)\b/gi, '').trim();
if (!/\band\b/.test(curatedTitle) && new RegExp(curatedTitle).test(release.description)) {
// scene title is probably the actor name // scene title is probably the actor name
release.actors = [release.title]; release.actors = [release.title];
} }
@ -63,7 +68,7 @@ function scrapeSceneBlog({ query }, url, channel) {
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-'); release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-');
release.title = query.q('h4 strong, .videos h3', true); release.title = query.text('h4 strong, .videos h3');
release.description = query.q('#about p, .videos p', true); release.description = query.q('#about p, .videos p', true);
const actors = query.urls('a[href*="/girl/"]').map(actorUrl => actorUrl.match(/video-([\w\s]+)/)?.[1]).filter(Boolean); const actors = query.urls('a[href*="/girl/"]').map(actorUrl => actorUrl.match(/video-([\w\s]+)/)?.[1]).filter(Boolean);
@ -98,7 +103,6 @@ function scrapeScene({ query, html }, url, channel) {
}; };
} }
console.log(release);
return release; return release;
} }