Generating and using URL slugs for releases, improver slugify module. Added 'extract' parameter to MindGeek scraper to get scenes not associate with a channel (see Digital Playground). Added various high res logos.

This commit is contained in:
ThePendulum 2020-02-04 03:12:09 +01:00
parent ca33704f51
commit f921bb4ae9
30 changed files with 132 additions and 22 deletions

View File

@ -48,7 +48,7 @@
</span>
<a
:href="`/${release.type || 'scene'}/${release.id}`"
:href="`/${release.type || 'scene'}/${release.id}/${release.slug || ''}`"
target="_blank"
rel="noopener noreferrer"
class="link"

View File

@ -112,6 +112,7 @@ const releaseFields = `
id
title
date
slug
createdAt
url
${releaseActorsFragment}

View File

@ -21,7 +21,7 @@ const routes = [
name: 'home',
},
{
path: '/scene/:releaseId',
path: '/scene/:releaseId/:releaseTitle?',
component: Release,
name: 'scene',
},

View File

@ -54,4 +54,5 @@ module.exports = {
trailerQuality: [480, 540],
limit: 25, // max number of photos per release
},
titleSlugLength: 50,
};

View File

@ -332,6 +332,7 @@ exports.up = knex => Promise.resolve()
table.string('url', 1000);
table.string('title');
table.string('slug');
table.date('date');
table.text('description');

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.2 KiB

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 71 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 377 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.2 KiB

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.8 KiB

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -0,0 +1,55 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 23.0.2, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
viewBox="0 0 1308 112" style="enable-background:new 0 0 1308 112;" xml:space="preserve">
<style type="text/css">
.st0{clip-path:url(#SVGID_2_);}
.st1{fill:#EE2A34;}
.st2{fill:#FFFFFF;}
</style>
<g>
<defs>
<rect id="SVGID_1_" width="1308" height="112"/>
</defs>
<clipPath id="SVGID_2_">
<use xlink:href="#SVGID_1_" style="overflow:visible;"/>
</clipPath>
<g class="st0">
<path class="st1" d="M87.5,93.5c3.7,0.1,7.4-0.2,11-0.8c2.8-0.5,5.8-1.3,9.1-2.2L98.3,112H0c1.8-2.4,3.4-5,4.6-7.8
c1.5-3.7,2.7-7.5,3.6-11.5l18.3-73.5c1.2-4.3,1.9-8.7,2.1-13.1c0-2.1-0.3-4.1-0.7-6.1h96.6l-1.9,21.5l-0.3-0.2
c-2.5-0.9-5.2-1.5-7.8-2c-3.4-0.5-6.9-0.8-10.4-0.7H69.2l-6.4,26.2h30.3c3.5,0.1,6.9-0.2,10.4-0.7c3.1-0.6,6.2-1.3,9.2-2.3h0.3
l-6.1,24.5l-0.2-0.2c-2.6-0.9-5.4-1.6-8.1-2.1c-3.4-0.5-6.8-0.8-10.2-0.7H58l-7.6,30H87.5z"/>
<path class="st1" d="M171,112L159.4,10.3c-0.3-3.8-1.8-7.4-4.2-10.3h53c-1.1,1.9-1.9,4-2.4,6.1c-0.6,2.1-0.9,4.3-0.9,6.5
c0.1,2.7,0.3,5.4,0.7,8l7.7,67.5l42.9-67.5c2.1-3.4,3.7-6.1,4.7-8.1c1-2,1.8-4.2,2.4-6.4c0.4-2,0.6-4.1,0.7-6.1h28.8
c-2.7,2.3-5.2,4.8-7.4,7.6c-3.2,4-6.1,8.3-8.7,12.7L218.5,112H171z"/>
<path class="st1" d="M297.4,112c1.8-2.4,3.4-5,4.6-7.8c1.5-3.7,2.7-7.5,3.6-11.5l18.3-73.5c1.2-4.3,1.9-8.7,2.1-13.1
c0-2.1-0.3-4.1-0.7-6.1h49.4c-1.8,2.4-3.4,5-4.6,7.8c-1.5,3.8-2.6,7.6-3.5,11.6l-18.3,73.5c-1.1,4.3-1.8,8.7-1.9,13.1
c0,2.1,0.3,4.1,0.7,6.1L297.4,112z"/>
<path class="st1" d="M484.9,93.5c3.4,0,6.9-0.3,10.3-0.9c3.2-0.5,6.2-1.5,9.1-2.9l-9.3,22.3h-96.2c1.8-2.4,3.4-5,4.6-7.8
c1.5-3.7,2.7-7.5,3.6-11.5l18.2-73.4c1.2-4.3,1.9-8.7,2.1-13.1c0-2.1-0.3-4.1-0.7-6.1H476c-1.7,2.4-3.2,4.9-4.3,7.6
c-1.5,3.7-2.8,7.5-3.7,11.5l-18.6,74.3H484.9z"/>
<path class="st2" d="M575,81.9l-11.3,15.4c-1.6,1.9-2.6,4.2-3.1,6.7c-0.5,1.3-0.7,2.7-0.7,4.1c0.1,1.4,0.4,2.7,1,4h-31.5
c2.2-2.1,4.1-3.9,5.6-5.5c1.5-1.6,2.9-3.1,4.2-4.5c1.3-1.5,2.5-2.9,3.7-4.4s2.6-3.4,4.4-5.8L609,6.3L605.7,0h50.4l15.7,96.6
c0.9,5.5,2.9,10.7,5.8,15.4h-55.8c1.2-1.4,2.3-2.9,3.2-4.5c0.5-0.9,0.9-1.9,1.1-2.9c0.5-1.9,0.7-3.9,0.7-5.8
c0-0.5-0.1-0.9-0.2-1.4c-0.1-0.4-0.2-0.8-0.2-1.2L624,81.8L575,81.9z M621.2,63.4l-6.4-37.9l-26.5,37.9H621.2z"/>
<path class="st2" d="M760.6,19.4h-0.3l-18,72.6c-1.2,4.4-2.1,8.9-2.4,13.5c0.1,2.3,0.6,4.6,1.6,6.6h-26.8c1.1-1.1,2.2-2.3,3.1-3.6
c0.9-1.2,1.6-2.4,2.2-3.8c0.7-1.6,1.3-3.3,1.8-5.1c0.6-1.9,1.3-4.5,2.1-7.7l17.8-71.8c0.8-3.5,1.4-6.3,1.8-8.4
c0.4-1.7,0.6-3.4,0.6-5.1c-0.1-2.3-0.6-4.6-1.6-6.6h59.8l22.5,91.3h0.5l17.8-71.2c0.8-3.5,1.4-6.3,1.8-8.4
c0.4-1.7,0.6-3.4,0.6-5.1c-0.1-2.3-0.6-4.6-1.6-6.6h26.8c-1.1,1.1-2.2,2.3-3.1,3.6c-0.8,1.2-1.6,2.4-2.2,3.7
c-0.7,1.6-1.4,3.3-1.8,5.1c-0.6,1.9-1.3,4.5-2.1,7.8l-22.8,91.9h-55.2L760.6,19.4z"/>
<path class="st2" d="M1028.7,44.8c-2.4,2.6-4.6,5.5-6.5,8.6c-1.4,2.8-2.4,5.8-3,8.9l-7.1,27.7c-0.8,3.6-1.5,6.6-1.9,8.9
c-0.4,1.8-0.6,3.7-0.7,5.6c0.1,2.6,0.7,5.1,1.7,7.5h-37.5l1.2-10.3h-0.3c-1.5,3.2-3.8,5.9-6.5,8c-3.6,1.8-7.6,2.6-11.6,2.3h-37.8
c-7.3,0-12.8-1.5-16.5-4.6c-3.7-3-5.5-7.9-5.5-14.6c0.1-4.7,0.7-9.3,1.9-13.8l11.6-46.2c2.9-11.7,7.3-20.1,13.2-25.2
c6.5-5.4,14.8-8.1,23.2-7.7h81.6l-2.1,21.5c-2.3-1-4.7-1.8-7.2-2.3c-3-0.5-6.1-0.8-9.1-0.7h-45.1c-2.4-0.1-4.7,0.4-6.8,1.7
c-1.8,1.3-2.9,3.3-3.3,5.5l-15,60.5c-0.2,0.9-0.3,1.7-0.3,2.6c0,3,2.3,4.6,6.8,4.6h14.3c2.4,0.1,4.7-0.4,6.8-1.7
c1.8-1.4,3-3.3,3.5-5.5l5.6-22.9h-15.9c-2.5,0.1-5,0.8-7.3,1.9l-0.3,0.2l5.2-20.7L1028.7,44.8z"/>
<path class="st2" d="M1143.6,93.5c3.7,0.1,7.3-0.2,11-0.8c2.8-0.5,5.8-1.3,9.1-2.2l-9.2,21.5h-98.3c1.8-2.4,3.4-5,4.6-7.8
c1.5-3.7,2.7-7.5,3.6-11.5l18.3-73.5c1.2-4.3,1.9-8.7,2.1-13.1c0-2.1-0.2-4.1-0.7-6.1h96.6l-1.9,21.5l-0.3-0.2
c-2.6-0.9-5.2-1.6-7.9-2.1c-3.4-0.5-6.9-0.8-10.4-0.7h-34.6l-6.4,26.2h30.3c3.5,0.1,6.9-0.2,10.4-0.7c3.1-0.6,6.1-1.3,9.1-2.3h0.2
l-6.1,24.5l-0.2-0.2c-2.7-0.9-5.4-1.6-8.1-2.1c-3.4-0.5-6.8-0.8-10.2-0.7h-30.2l-7.5,30.1H1143.6z"/>
<path class="st2" d="M1288.6,93.5c3.4,0,6.9-0.3,10.3-0.9c3.2-0.5,6.2-1.5,9.1-2.9l-9.2,22.2h-96.2c1.8-2.4,3.4-5,4.6-7.8
c1.5-3.7,2.7-7.5,3.6-11.5l18.3-73.5c1.2-4.3,1.9-8.7,2.1-13.1c0-2.1-0.2-4.1-0.7-6.1h49.4c-1.8,2.4-3.4,5-4.6,7.8
c-1.5,3.7-2.8,7.5-3.8,11.5l-18.2,74.3H1288.6z"/>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 4.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.8 KiB

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.6 KiB

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.6 KiB

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.7 KiB

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.4 KiB

After

Width:  |  Height:  |  Size: 13 KiB

View File

@ -1007,7 +1007,15 @@ const sites = [
description: 'Fantasy Blowjobs & POV Cock Sucking Videos and Photos Produced in VR, 4K and full HD featuring Sexy European Pornstars',
network: 'ddfnetwork',
},
// FAKE HUB
// DIGITAL PLAYGROUND
{
slug: 'digitalplayground',
name: 'Digital Playground',
url: 'https://www.digitalplayground.com/scenes',
description: '',
parameters: { extract: true },
network: 'digitalplayground',
},
{
slug: 'episodes',
name: 'Episodes',

View File

@ -367,6 +367,7 @@ async function scrapeActors(actorNames) {
} catch (error) {
if (error.warn !== false) {
logger.warn(`Error in scraper ${source}: ${error.message}`);
logger.error(error.stack);
}
}

View File

@ -39,7 +39,7 @@ const { argv } = yargs
describe: 'Scrape profiles for new actors after fetching scenes',
type: 'boolean',
alias: 'with-actors',
default: true,
default: false,
})
.option('scene', {
describe: 'Scrape scene info from URL',

View File

@ -1,5 +1,6 @@
'use strict';
const config = require('config');
const Promise = require('bluebird');
const moment = require('moment');
@ -16,6 +17,7 @@ const {
storeTrailer,
} = require('./media');
const { fetchSites, findSiteByUrl } = require('./sites');
const slugify = require('./utils/slugify');
function commonQuery(queryBuilder, {
filter = [],
@ -204,6 +206,11 @@ async function attachStudio(release) {
}
async function curateReleaseEntry(release) {
const slug = slugify(release.title, {
encode: true,
limit: config.titleSlugLength,
});
const curatedRelease = {
site_id: release.site.id,
studio_id: release.studio ? release.studio.id : null,
@ -213,6 +220,7 @@ async function curateReleaseEntry(release) {
type: release.type,
url: release.url,
title: release.title,
slug,
date: release.date,
description: release.description,
// director: release.director,
@ -397,7 +405,7 @@ async function storeRelease(release) {
logger.info(`Stored release "${release.title}" (${releaseEntry.id}, ${release.site.name})`);
return releaseEntry.id;
return releaseEntry;
}
async function storeReleases(releases) {
@ -405,10 +413,11 @@ async function storeReleases(releases) {
try {
const releaseWithChannelSite = await attachChannelSite(release);
const releaseWithStudio = await attachStudio(releaseWithChannelSite);
const releaseId = await storeRelease(releaseWithStudio);
const { id, slug } = await storeRelease(releaseWithStudio);
return {
id: releaseId,
id,
slug,
...releaseWithChannelSite,
};
} catch (error) {

View File

@ -97,7 +97,7 @@ async function scrapeReleases(sources, release = null, type = 'scene') {
const { releases: storedReleases } = await storeReleases(curatedReleases);
if (storedReleases) {
console.log(storedReleases.map(storedRelease => `http://${config.web.host}:${config.web.port}/scene/${storedRelease.id}`).join('\n'));
logger.info(storedReleases.map(storedRelease => `\nhttp://${config.web.host}:${config.web.port}/scene/${storedRelease.id}/${storedRelease.slug}`).join(''));
}
}
}

View File

@ -58,7 +58,7 @@ async function scrapeScene(scene, site, tokens) {
},
};
release.url = `${site.url}/scene/${release.entryId}/${slugify(release.title, true)}`;
release.url = `${site.url}/scene/${release.entryId}/${slugify(release.title, { encode: true })}`;
release.date = new Date(scene.sites.collection[scene.id].publishDate);
release.poster = scene._resources.primary[0].url;

View File

@ -26,6 +26,16 @@ function getThumbs(scene) {
}
function scrapeLatestX(data, site) {
if (site.parameters?.extract === true && data.collections.length > 0) {
// release should not belong to any channel
return null;
}
if (typeof site.parameters?.extract === 'string' && !data.collections.some(collection => collection.shortName === site.parameters.extract)) {
// release should belong to specific channel
return null;
}
const { id: entryId, title, description } = data;
const hostname = site.parameters?.native ? site.url : site.network.url;
const url = `${hostname}/scene/${entryId}/`;
@ -58,7 +68,9 @@ function scrapeLatestX(data, site) {
}
async function scrapeLatest(items, site) {
return Promise.all(items.map(async data => scrapeLatestX(data, site)));
const latestReleases = await Promise.all(items.map(async data => scrapeLatestX(data, site)));
return latestReleases.filter(Boolean);
}
function scrapeScene(data, url, _site) {
@ -85,10 +97,10 @@ function scrapeScene(data, url, _site) {
};
}
const siteName = data.collections[0].name;
const siteName = data.collections[0]?.name || data.brand;
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
release.url = url || `https://www.realitykings.com/scene/${entryId}/`;
release.url = url || `https://www.${data.brand}.com/scene/${entryId}/`;
return release;
}
@ -104,6 +116,9 @@ function getUrl(site) {
return `${site.url}/scenes`;
}
if (site.parameters?.extract) {
return `${site.url}/scenes`;
}
if (site.parameters?.siteId) {
return `${site.network.url}/scenes?site=${site.parameters.siteId}`;
@ -144,7 +159,7 @@ function scrapeProfile(data, html, releases = []) {
if (data.height) profile.height = inchesToCm(data.height);
if (data.weight) profile.weight = lbsToKg(data.weight);
if (data.images.card_main_rect && data.images.card_main_rect[0]) {
if (data.images.card_main_rect?.[0]) {
profile.avatar = data.images.card_main_rect[0].xl?.url
|| data.images.card_main_rect[0].lg?.url
|| data.images.card_main_rect[0].md?.url
@ -169,7 +184,7 @@ async function fetchLatest(site, page = 1) {
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
const limit = 10;
const apiUrl = site.parameters?.native
const apiUrl = site.parameters?.native || site.parameters?.extract
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;

View File

@ -77,7 +77,7 @@ async function scrapeScene(html, url, site) {
release.actors = qa('.value a[href*=models], .value a[href*=performer], .value a[href*=teen-babes]', true);
if (release.actors.length === 0) {
const actorEl = qa('.stat').find(stat => /Featuring/.test(stat.textContent))
const actorEl = qa('.stat').find(stat => /Featuring/.test(stat.textContent));
const actorString = qtext(actorEl);
console.log(actorString);
@ -147,7 +147,7 @@ function scrapeProfile(html) {
const bio = qa('.stat').reduce((acc, el) => {
const prop = q(el, '.label', true).slice(0, -1);
const key = slugify(prop, false, '_');
const key = slugify(prop, { delimiter: '_' });
const value = q(el, '.value', true);
return {

View File

@ -60,7 +60,7 @@ function destructConfigNetworks(networks = []) {
}
async function findSiteByUrl(url) {
const { origin, pathname } = new URL(url);
const { origin, hostname, pathname } = new URL(url);
// const domain = hostname.replace(/www.|tour./, '');
const dirUrl = `${origin}${pathname.split('/').slice(0, 2).join('/')}`; // allow for sites on URI directory
@ -72,6 +72,9 @@ async function findSiteByUrl(url) {
)
.where('sites.url', url)
.orWhere('sites.url', origin)
.orWhere('sites.url', origin.replace(/www\.|tour\./, ''))
.orWhere('sites.url', `https://www.${hostname}`)
.orWhere('sites.url', `http://www.${hostname}`)
.orWhere('sites.url', dirUrl)
// .orWhere('sites.url', 'like', `%${domain}`)
.first();

View File

@ -9,16 +9,18 @@ const knex = require('../knex');
async function init() {
const posters = await knex('actors')
.select('actors.name', 'releases.title', 'media.path')
.whereIn('name', argv.actors)
.select('actors.name as actor_name', 'releases.title', 'media.path', 'sites.name as site_name', 'networks.name as network_name')
.whereIn('actors.name', argv.actors)
.join('releases_actors', 'releases_actors.actor_id', 'actors.id')
.join('releases', 'releases_actors.release_id', 'releases.id')
.join('releases_posters', 'releases_posters.release_id', 'releases.id')
.join('sites', 'sites.id', 'releases.site_id')
.join('networks', 'networks.id', 'sites.network_id')
.join('media', 'releases_posters.media_id', 'media.id');
await Promise.all(posters.map(async (poster) => {
const source = path.join(config.media.path, poster.path);
const target = path.join(config.media.path, 'posters', `${poster.title.replace('/', '_')}.${poster.name}.jpeg`);
const target = path.join(config.media.path, 'posters', `${poster.actor_name} - ${poster.network_name}: ${poster.site_name} - ${poster.title.replace(/[/.]/g, '_')}.jpeg`);
const file = await fs.readFile(source);
await fs.writeFile(target, file);

View File

@ -1,7 +1,21 @@
'use strict';
function slugify(string, encode = false, delimiter = '-') {
const slug = string.trim().toLowerCase().match(/\w+/g).join(delimiter);
function slugify(string, {
encode = false,
delimiter = '-',
limit = 1000,
} = {}) {
const slugComponents = string.trim().toLowerCase().match(/\w+/g);
const slug = slugComponents.reduce((acc, component, index) => {
const accSlug = `${acc}${index > 0 ? delimiter : ''}${component}`;
if (accSlug.length < limit) {
return accSlug;
}
return acc;
}, '');
return encode ? encodeURI(slug) : slug;
}