Major refactor, cleand up site scrape module, fixed and cleaned up release scrape module. Removed old CLI code

This commit is contained in:
ThePendulum 2019-11-16 03:33:36 +01:00
parent b07f88d023
commit b489c8fc33
35 changed files with 595 additions and 847 deletions

View File

@ -48,23 +48,17 @@
</li> </li>
</ul> </ul>
<h3 class="heading">Latest releases</h3> <Releases
:releases="releases"
<ul class="nolist scenes"> :context="actor.name"
<li />
v-for="release in releases"
:key="`release-${release.id}`"
>
<ReleaseTile :release="release" />
</li>
</ul>
</div> </div>
</div> </div>
</template> </template>
<script> <script>
import FilterBar from '../header/filter-bar.vue'; import FilterBar from '../header/filter-bar.vue';
import ReleaseTile from '../tile/release.vue'; import Releases from '../releases/releases.vue';
async function fetchReleases() { async function fetchReleases() {
this.releases = await this.$store.dispatch('fetchActorReleases', this.$route.params.actorSlug); this.releases = await this.$store.dispatch('fetchActorReleases', this.$route.params.actorSlug);
@ -82,7 +76,7 @@ async function mounted() {
export default { export default {
components: { components: {
FilterBar, FilterBar,
ReleaseTile, Releases,
}, },
data() { data() {
return { return {

View File

@ -3,22 +3,16 @@
<FilterBar :fetch-releases="fetchReleases" /> <FilterBar :fetch-releases="fetchReleases" />
<div class="content-inner"> <div class="content-inner">
<ul class="scenes nolist"> <Releases
<li :releases="releases"
v-for="release in releases" />
:key="release.id"
class="scene"
>
<ReleaseTile :release="release" />
</li>
</ul>
</div> </div>
</div> </div>
</template> </template>
<script> <script>
import FilterBar from '../header/filter-bar.vue'; import FilterBar from '../header/filter-bar.vue';
import ReleaseTile from '../tile/release.vue'; import Releases from '../releases/releases.vue';
async function fetchReleases() { async function fetchReleases() {
this.releases = await this.$store.dispatch('fetchReleases'); this.releases = await this.$store.dispatch('fetchReleases');
@ -33,7 +27,7 @@ async function mounted() {
export default { export default {
components: { components: {
FilterBar, FilterBar,
ReleaseTile, Releases,
}, },
data() { data() {
return { return {

View File

@ -41,23 +41,17 @@
</ul> </ul>
</template> </template>
<h3 class="heading">Latest releases</h3> <Releases
:releases="releases"
<ul class="nolist scenes"> :context="network.name"
<li />
v-for="release in releases"
:key="`release-${release.id}`"
>
<ReleaseTile :release="release" />
</li>
</ul>
</div> </div>
</div> </div>
</template> </template>
<script> <script>
import FilterBar from '../header/filter-bar.vue'; import FilterBar from '../header/filter-bar.vue';
import ReleaseTile from '../tile/release.vue'; import Releases from '../releases/releases.vue';
import SiteTile from '../tile/site.vue'; import SiteTile from '../tile/site.vue';
async function fetchReleases() { async function fetchReleases() {
@ -80,7 +74,7 @@ async function mounted() {
export default { export default {
components: { components: {
FilterBar, FilterBar,
ReleaseTile, Releases,
SiteTile, SiteTile,
}, },
data() { data() {

View File

@ -0,0 +1,69 @@
<template>
<div>
<h3
v-if="context"
class="heading"
><span class="range">{{ range }}</span> releases for '{{ context }}'</h3>
<ul class="nolist releases">
<li
v-for="release in releases"
:key="`release-${release.id}`"
>
<ReleaseTile :release="release" />
</li>
</ul>
</div>
</template>
<script>
import ReleaseTile from '../tile/release.vue';
function range() {
return this.$store.state.ui.range;
}
export default {
components: {
ReleaseTile,
},
props: {
releases: {
type: Array,
default: () => [],
},
context: {
type: String,
default: null,
},
},
computed: {
range,
},
};
</script>
<style lang="scss" scoped>
@import 'theme';
.heading {
padding: 0;
margin: 0 0 1rem 0;
.range {
text-transform: capitalize;
}
}
.releases {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(20rem, .5fr));
grid-gap: 1rem;
}
@media(max-width: $breakpoint) {
.releases {
grid-template-columns: repeat(auto-fit, minmax(20rem, 1fr));
}
}
</style>

View File

@ -47,23 +47,17 @@
<p class="description">{{ site.description }}</p> <p class="description">{{ site.description }}</p>
<h3 class="heading">Latest releases</h3> <Releases
:releases="releases"
<ul class="nolist scenes"> :context="site.name"
<li />
v-for="release in releases"
:key="`release-${release.id}`"
>
<ReleaseTile :release="release" />
</li>
</ul>
</div> </div>
</div> </div>
</template> </template>
<script> <script>
import FilterBar from '../header/filter-bar.vue'; import FilterBar from '../header/filter-bar.vue';
import ReleaseTile from '../tile/release.vue'; import Releases from '../releases/releases.vue';
async function fetchReleases() { async function fetchReleases() {
this.releases = await this.$store.dispatch('fetchSiteReleases', this.$route.params.siteSlug); this.releases = await this.$store.dispatch('fetchSiteReleases', this.$route.params.siteSlug);
@ -81,7 +75,7 @@ async function mounted() {
export default { export default {
components: { components: {
FilterBar, FilterBar,
ReleaseTile, Releases,
}, },
data() { data() {
return { return {

View File

@ -17,23 +17,17 @@
</div> </div>
<div class="content-inner"> <div class="content-inner">
<h3 class="heading">Latest releases</h3> <Releases
:releases="releases"
<ul class="nolist scenes"> :context="tag.name"
<li />
v-for="release in releases"
:key="`release-${release.id}`"
>
<ReleaseTile :release="release" />
</li>
</ul>
</div> </div>
</div> </div>
</template> </template>
<script> <script>
import FilterBar from '../header/filter-bar.vue'; import FilterBar from '../header/filter-bar.vue';
import ReleaseTile from '../tile/release.vue'; import Releases from '../releases/releases.vue';
async function fetchReleases() { async function fetchReleases() {
this.releases = await this.$store.dispatch('fetchTagReleases', this.$route.params.tagSlug); this.releases = await this.$store.dispatch('fetchTagReleases', this.$route.params.tagSlug);
@ -51,7 +45,7 @@ async function mounted() {
export default { export default {
components: { components: {
FilterBar, FilterBar,
ReleaseTile, Releases,
}, },
data() { data() {
return { return {
@ -86,19 +80,4 @@ export default {
height: 1.25rem; height: 1.25rem;
} }
} }
.heading {
padding: 0;
margin: 0 0 1rem 0;
}
.bio-heading {
display: inline-block;
font-weight: bold;
margin: .5rem 0 0 0;
&::after {
content: ':';
}
}
</style> </style>

View File

@ -37,15 +37,3 @@ body {
fill: $primary; fill: $primary;
} }
} }
.scenes {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(20rem, .5fr));
grid-gap: 1rem;
}
@media(max-width: $breakpoint) {
.scenes {
grid-template-columns: repeat(auto-fit, minmax(20rem, 1fr));
}
}

View File

@ -6,6 +6,7 @@ module.exports = {
database: 'traxxx', database: 'traxxx',
}, },
web: { web: {
host: '0.0.0.0',
port: 5000, port: 5000,
}, },
include: [ include: [
@ -86,15 +87,4 @@ module.exports = {
path: './', path: './',
thumbnailSize: 320, // width for 16:9 will be exactly 576px thumbnailSize: 320, // width for 16:9 will be exactly 576px
}, },
filename: {
dateFormat: 'DD-MM-YYYY',
actorsJoin: ', ',
slash: '_',
subpatterns: {
siteName: '{siteName} - ',
sceneDate: ', {sceneDate}',
sceneId: ' {sceneId}',
},
pattern: '{siteName}{sceneTitle} ({sceneActors}{sceneDate}{sceneId})',
},
}; };

50
package-lock.json generated
View File

@ -1586,11 +1586,6 @@
"resolved": "https://registry.npmjs.org/aproba/-/aproba-1.2.0.tgz", "resolved": "https://registry.npmjs.org/aproba/-/aproba-1.2.0.tgz",
"integrity": "sha512-Y9J6ZjXtoYh8RnXVCMOU/ttDmk1aBjunq9vO0ta5x85WDQiQfUF9sIPBITdbiiIVcBo03Hi3jMxigBtsddlXRw==" "integrity": "sha512-Y9J6ZjXtoYh8RnXVCMOU/ttDmk1aBjunq9vO0ta5x85WDQiQfUF9sIPBITdbiiIVcBo03Hi3jMxigBtsddlXRw=="
}, },
"arch": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/arch/-/arch-2.1.1.tgz",
"integrity": "sha512-BLM56aPo9vLLFVa8+/+pJLnrZ7QGGTVHWsCwieAWT9o9K8UeGaQbzZbGoabWLOo2ksBCztoXdqBZBplqLDDCSg=="
},
"are-we-there-yet": { "are-we-there-yet": {
"version": "1.1.5", "version": "1.1.5",
"resolved": "https://registry.npmjs.org/are-we-there-yet/-/are-we-there-yet-1.1.5.tgz", "resolved": "https://registry.npmjs.org/are-we-there-yet/-/are-we-there-yet-1.1.5.tgz",
@ -2954,46 +2949,6 @@
"integrity": "sha1-/xnt6Kml5XkyQUewwR8PvLq+1jk=", "integrity": "sha1-/xnt6Kml5XkyQUewwR8PvLq+1jk=",
"dev": true "dev": true
}, },
"clipboardy": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/clipboardy/-/clipboardy-1.2.3.tgz",
"integrity": "sha512-2WNImOvCRe6r63Gk9pShfkwXsVtKCroMAevIbiae021mS850UkWPbevxsBz3tnvjZIEGvlwaqCPsw+4ulzNgJA==",
"requires": {
"arch": "^2.1.0",
"execa": "^0.8.0"
},
"dependencies": {
"cross-spawn": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-5.1.0.tgz",
"integrity": "sha1-6L0O/uWPz/b4+UUQoKVUu/ojVEk=",
"requires": {
"lru-cache": "^4.0.1",
"shebang-command": "^1.2.0",
"which": "^1.2.9"
}
},
"execa": {
"version": "0.8.0",
"resolved": "https://registry.npmjs.org/execa/-/execa-0.8.0.tgz",
"integrity": "sha1-2NdrvBtVIX7RkP1t1J08d07PyNo=",
"requires": {
"cross-spawn": "^5.0.1",
"get-stream": "^3.0.0",
"is-stream": "^1.1.0",
"npm-run-path": "^2.0.0",
"p-finally": "^1.0.0",
"signal-exit": "^3.0.0",
"strip-eof": "^1.0.0"
}
},
"get-stream": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-3.0.0.tgz",
"integrity": "sha1-jpQ9E1jcN1VQVOy+LtsFqhdO3hQ="
}
}
},
"cliui": { "cliui": {
"version": "4.1.0", "version": "4.1.0",
"resolved": "https://registry.npmjs.org/cliui/-/cliui-4.1.0.tgz", "resolved": "https://registry.npmjs.org/cliui/-/cliui-4.1.0.tgz",
@ -7451,11 +7406,6 @@
"integrity": "sha512-iyam8fBuCUpWeKPGpaNMetEocMt364qkCsfL9JuhjXX6dRnguRVOfk2GZaDpPjcOKiiXCPINZC1GczQ7iTq3Zw==", "integrity": "sha512-iyam8fBuCUpWeKPGpaNMetEocMt364qkCsfL9JuhjXX6dRnguRVOfk2GZaDpPjcOKiiXCPINZC1GczQ7iTq3Zw==",
"dev": true "dev": true
}, },
"neo-blessed": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/neo-blessed/-/neo-blessed-0.2.0.tgz",
"integrity": "sha512-C2kC4K+G2QnNQFXUIxTQvqmrdSIzGTX1ZRKeDW6ChmvPRw8rTkTEJzbEQHiHy06d36PCl/yMOCjquCRV8SpSQw=="
},
"nice-try": { "nice-try": {
"version": "1.0.5", "version": "1.0.5",
"resolved": "https://registry.npmjs.org/nice-try/-/nice-try-1.0.5.tgz", "resolved": "https://registry.npmjs.org/nice-try/-/nice-try-1.0.5.tgz",

View File

@ -69,7 +69,6 @@
"body-parser": "^1.19.0", "body-parser": "^1.19.0",
"cheerio": "^1.0.0-rc.2", "cheerio": "^1.0.0-rc.2",
"cli-confirm": "^1.0.1", "cli-confirm": "^1.0.1",
"clipboardy": "^1.2.3",
"config": "^3.0.1", "config": "^3.0.1",
"dayjs": "^1.8.14", "dayjs": "^1.8.14",
"express": "^4.16.4", "express": "^4.16.4",
@ -81,7 +80,6 @@
"knex-migrate": "^1.7.1", "knex-migrate": "^1.7.1",
"mime": "^2.4.4", "mime": "^2.4.4",
"moment": "^2.24.0", "moment": "^2.24.0",
"neo-blessed": "^0.2.0",
"opn": "^5.4.0", "opn": "^5.4.0",
"pg": "^7.9.0", "pg": "^7.9.0",
"prop-types": "^15.7.2", "prop-types": "^15.7.2",

View File

@ -226,6 +226,25 @@
color: #ff6c88; color: #ff6c88;
} }
/* $primary: #ff886c; */
.heading[data-v-22ffe3e4] {
padding: 0;
margin: 0 0 1rem 0;
}
.heading .range[data-v-22ffe3e4] {
text-transform: capitalize;
}
.releases[data-v-22ffe3e4] {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(20rem, 0.5fr));
grid-gap: 1rem;
}
@media (max-width: 720px) {
.releases[data-v-22ffe3e4] {
grid-template-columns: repeat(auto-fit, minmax(20rem, 1fr));
}
}
/* $primary: #ff886c; */ /* $primary: #ff886c; */
.actor[data-v-6989dc6f] { .actor[data-v-6989dc6f] {
background: #fff; background: #fff;
@ -571,18 +590,6 @@
width: 1.25rem; width: 1.25rem;
height: 1.25rem; height: 1.25rem;
} }
.heading[data-v-80991bcc] {
padding: 0;
margin: 0 0 1rem 0;
}
.bio-heading[data-v-80991bcc] {
display: inline-block;
font-weight: bold;
margin: .5rem 0 0 0;
}
.bio-heading[data-v-80991bcc]::after {
content: ':';
}
/* $primary: #ff886c; */ /* $primary: #ff886c; */
.errorpage[data-v-29109daf] { .errorpage[data-v-29109daf] {
@ -724,15 +731,6 @@ body {
.icon.icon-href :hover { .icon.icon-href :hover {
fill: #ff6c88; } fill: #ff6c88; }
.scenes {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(20rem, 0.5fr));
grid-gap: 1rem; }
@media (max-width: 720px) {
.scenes {
grid-template-columns: repeat(auto-fit, minmax(20rem, 1fr)); } }
/* $primary: #ff886c; */ /* $primary: #ff886c; */
.header[data-v-10b7ec04] { .header[data-v-10b7ec04] {
background: #fff; background: #fff;

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.8 KiB

After

Width:  |  Height:  |  Size: 7.4 KiB

View File

@ -1,80 +1,30 @@
'use strict'; 'use strict';
const blessed = require('neo-blessed');
const clipboard = require('clipboardy');
const argv = require('./argv'); const argv = require('./argv');
const { renderReleases, renderScene } = require('./tui/render'); const knex = require('./knex');
const initServer = require('./web/server'); const initServer = require('./web/server');
const fetchReleases = require('./fetch-releases'); const scrapeSites = require('./scrape-sites');
const fetchScene = require('./fetch-scene'); const scrapeRelease = require('./scrape-release');
function initScreen() {
const screen = blessed.screen({
title: `traxxx ${new Date().getTime()}`,
smartCSR: true,
mouse: false,
});
screen.enableInput();
screen.key(['escape', 'q', 'C-c'], () => {
screen.render();
screen.destroy();
});
return screen;
}
function getMethod() {
if (argv.scene) {
return {
fetch: () => fetchScene(argv.scene),
render: renderScene,
};
}
if (argv.fetch) {
return {
fetch: () => fetchReleases(),
render: renderReleases,
};
}
return initServer();
}
async function init() { async function init() {
const screen = argv.render && !argv.filename && initScreen(); if (argv.url) {
await scrapeRelease(argv.url);
knex.destroy();
try {
const method = getMethod();
if (method) {
const result = await method.fetch();
if (result) {
if (argv.copy && result.copy) {
clipboard.writeSync(result.copy);
console.log(`Result copied to clipboard: ${result.copy}`);
}
if (argv.filename && result.filename) {
console.log(result.filename);
// setTimeout(() => log(), 5000);
return; return;
} }
if (argv.render) {
method.render(result, screen); if (argv.networks || argv.sites) {
} await scrapeSites();
} knex.destroy();
}
} catch (error) { return;
console.error(argv.debug ? error : error.message);
} }
await initServer();
} }
init(); init();

View File

@ -5,25 +5,25 @@ const yargs = require('yargs');
const { argv } = yargs const { argv } = yargs
.command('npm start') .command('npm start')
.option('fetch', { .option('networks', {
describe: 'Fetch latest releases', describe: 'Networks to scrape (overrides config)',
type: 'boolean', type: 'array',
default: false, alias: 'network',
})
.option('sites', {
describe: 'Sites to scrape (overrides config)',
type: 'array',
alias: 'site',
}) })
.option('deep', { .option('deep', {
describe: 'Fetch details for all releases', describe: 'Fetch details for all releases',
type: 'boolean', type: 'boolean',
default: true, default: true,
}) })
.option('networks', { .option('url', {
describe: 'Networks to include (overrides config)', describe: 'Scrape scene info from URL',
type: 'array', type: 'string',
alias: 'network', alias: 'fetch',
})
.option('sites', {
describe: 'Sites to include (overrides config)',
type: 'array',
alias: 'site',
}) })
.option('after', { .option('after', {
describe: 'Don\'t fetch scenes older than', describe: 'Don\'t fetch scenes older than',
@ -40,32 +40,9 @@ const { argv } = yargs
type: 'boolean', type: 'boolean',
default: true, default: true,
}) })
.option('render', {
describe: 'Fetch data without rendering interface',
type: 'boolean',
default: false,
})
.option('scene', {
describe: 'Fetch scene info from URL',
type: 'string',
})
.option('copy', {
describe: 'Copy relevant result to clipboard',
type: 'boolean',
alias: 'c',
})
.option('filename', {
describe: 'Only output the suggested filename of a scene',
type: 'boolean',
})
.option('debug', { .option('debug', {
describe: 'Show error stack traces', describe: 'Show error stack traces',
type: 'boolean', type: 'boolean',
})
.option('quit', {
describe: 'Exit after fetching data. Usually used with --copy.',
type: 'boolean',
alias: 'q',
}); });
module.exports = argv; module.exports = argv;

View File

View File

@ -1,270 +0,0 @@
'use strict';
const config = require('config');
const fs = require('fs-extra');
const path = require('path');
const Promise = require('bluebird');
const moment = require('moment');
const argv = require('./argv');
const knex = require('./knex');
const scrapers = require('./scrapers');
const fetchScene = require('./fetch-scene');
const { storeTags } = require('./tags');
const { storeActors } = require('./actors');
const { storePoster, storePhotos, storeTrailer } = require('./media');
function destructConfigNetworks(networks) {
return networks.reduce((acc, network) => {
if (Array.isArray(network)) {
// network specifies sites
return {
...acc,
sites: [...acc.sites, ...network[1]],
};
}
return {
...acc,
networks: [...acc.networks, network],
};
}, {
networks: [],
sites: [],
});
}
function curateSites(sites) {
return sites.map(site => ({
id: site.id,
name: site.name,
slug: site.slug,
description: site.description,
url: site.url,
network: {
id: site.network_id,
name: site.network_name,
slug: site.network_slug,
parameters: JSON.parse(site.network_parameters),
},
parameters: JSON.parse(site.parameters),
}));
}
async function accumulateIncludedSites() {
if (argv.networks || argv.sites) {
const networks = await knex('networks').select('id').whereIn('slug', argv.networks || []);
const networkIds = networks.map(network => network.id);
const rawSites = await knex('sites')
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.parameters as network_parameters')
.whereIn('sites.slug', argv.sites || [])
.orWhereIn('network_id', networkIds)
.leftJoin('networks', 'sites.network_id', 'networks.id');
return curateSites(rawSites);
}
const included = destructConfigNetworks(config.include);
const networks = await knex('networks').select('id').whereIn('slug', included.networks || []);
const networkIds = networks.map(network => network.id);
const rawSites = await knex('sites')
.select('sites.*', 'networks.name as network_name')
.whereIn('sites.slug', included.sites || [])
.orWhereIn('network_id', networkIds)
.leftJoin('networks', 'sites.network_id', 'networks.id');
return curateSites(rawSites);
}
async function findDuplicateReleases(latestReleases, _siteId) {
const latestReleasesShootIds = latestReleases.map(release => release.shootId).filter(release => release !== undefined);
const latestReleasesEntryIds = latestReleases.map(release => release.entryId).filter(release => release !== undefined);
return knex('releases')
.whereIn('shoot_id', latestReleasesShootIds)
.orWhereIn('entry_id', latestReleasesEntryIds);
}
async function storeRelease(release) {
const curatedRelease = {
site_id: release.site.id,
studio_id: release.studio ? release.studio.id : null,
shoot_id: release.shootId || null,
entry_id: release.entryId || null,
url: release.url,
title: release.title,
date: release.date,
description: release.description,
// director: release.director,
duration: release.duration,
likes: release.rating && release.rating.likes,
dislikes: release.rating && release.rating.dislikes,
rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
deep: Boolean(argv.deep && release.url && !release.upcoming),
};
const releaseEntries = await knex('releases')
.insert(curatedRelease)
.returning('*');
if (releaseEntries.length) {
const releaseEntry = releaseEntries[0];
console.log(`Stored (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
if (release.poster || (release.photos && release.photos.length)) {
await fs.mkdir(path.join(config.media.path, release.site.network.slug, release.site.slug, releaseEntry.id.toString()), { recursive: true });
}
await Promise.all([
release.actors && release.actors.length > 0
? storeActors(release, releaseEntry) : Promise.resolve(),
release.tags && release.tags.length > 0
? storeTags(release, releaseEntry) : Promise.resolve(),
release.photos && release.photos.length > 0
? storePhotos(release, releaseEntry) : Promise.resolve(),
release.poster
? storePoster(release, releaseEntry) : Promise.resolve(),
release.trailer && release.trailer.src
? storeTrailer(release, releaseEntry) : Promise.resolve(),
]);
return;
}
console.error(`Unable to save scene to database, possible collision: "${release.title}" (${release.site.name})`);
}
async function storeReleases(releases = []) {
return Promise.map(releases, async (release) => {
try {
return storeRelease(release);
} catch (error) {
console.error(error);
return null;
}
}, {
concurrency: 2,
});
}
async function fetchNewReleases(scraper, site, afterDate, accReleases = [], page = 1) {
const latestReleases = await scraper.fetchLatest(site, page);
if (latestReleases.length === 0) {
return [];
}
const duplicateReleases = await findDuplicateReleases(latestReleases, site.id);
const duplicateReleasesIds = new Set(
duplicateReleases
.map(release => release.shoot_id || release.entry_id)
.concat(duplicateReleases.map(release => release.entry_id || release.shoot_id))
// exclude accumulated releases to prevent an infinite loop if the next page contains the same releases as the previous
.concat(accReleases.map(release => release.shootId || release.entryId)),
);
const uniqueReleases = latestReleases.filter(release => !duplicateReleasesIds.has(String(release.shootId))
&& !duplicateReleasesIds.has(String(release.entryId))
&& moment(release.date).isAfter(afterDate));
console.log(`\x1b[90m${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases\x1b[0m`);
const oldestReleaseOnPage = latestReleases.slice(-1)[0].date;
if (uniqueReleases.length > 0 && moment(oldestReleaseOnPage).isAfter(afterDate) && (oldestReleaseOnPage || page < argv.pages)) {
return fetchNewReleases(scraper, site, afterDate, accReleases.concat(uniqueReleases), page + 1);
}
return accReleases.concat(uniqueReleases);
}
async function fetchReleases() {
const sites = await accumulateIncludedSites();
if (sites.length === 0) {
console.error('None of the specified sites are in the database');
return [];
}
const scenesPerSite = await Promise.map(sites, async (site) => {
const scraper = scrapers[site.slug] || scrapers[site.network.slug];
if (scraper) {
try {
const afterDate = moment.utc().subtract(...argv.after.split(' ')).toDate();
const [newReleases, upcomingReleases] = await Promise.all([
fetchNewReleases(scraper, site, afterDate),
scraper.fetchUpcoming ? scraper.fetchUpcoming(site) : [],
]);
console.log(`${site.name}: Found ${newReleases.length} recent releases, ${upcomingReleases.length} upcoming releases`);
const markedUpcomingReleases = upcomingReleases.map(release => ({ ...release, upcoming: true }));
const finalReleases = argv.deep
? await Promise.map([...newReleases, ...markedUpcomingReleases], async (release) => {
if (release.url) {
const scene = await fetchScene(release.url, release);
return {
...release,
...scene,
};
}
return release;
}, {
concurrency: 2,
})
: newReleases;
if (argv.save) {
await storeReleases(finalReleases);
}
return [
...finalReleases.map(release => ({
...release,
network: site.network,
})),
...upcomingReleases.map(release => ({
...release,
network: site.network,
upcoming: true,
})),
];
} catch (error) {
if (argv.debug) {
console.error(`${site.id}: Failed to fetch releases`, error);
return [];
}
console.log(`${site.id}: Failed to fetch releases`);
return [];
}
}
console.error(`Cound not find scraper for '${site.name}' (${site.slug})`);
return [];
}, {
concurrency: 2,
});
const accumulatedScenes = scenesPerSite.reduce((acc, siteScenes) => ([...acc, ...siteScenes]), []);
const sortedScenes = accumulatedScenes.sort(({ date: dateA }, { date: dateB }) => moment(dateB).diff(dateA));
knex.destroy();
return sortedScenes;
}
module.exports = fetchReleases;

View File

@ -1,126 +0,0 @@
'use strict';
const config = require('config');
const moment = require('moment');
const knex = require('./knex');
const argv = require('./argv');
const scrapers = require('./scrapers');
async function findSite(url) {
const { hostname } = new URL(url);
const domain = hostname.replace(/^www./, '');
const site = await knex('sites')
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.parameters as network_parameters')
.where('sites.url', 'like', `%${domain}`)
.leftJoin('networks', 'sites.network_id', 'networks.id')
.first()
// scene might use generic network URL, let network scraper determine channel site
|| await knex('networks')
.where('url', 'like', `%${domain}`)
.first();
return {
id: site.id,
name: site.name,
slug: site.slug,
description: site.description,
url: site.url,
network: {
id: site.network_id || site.id,
slug: site.network_slug || site.slug,
parameters: site.network_parameters && JSON.parse(site.network_parameters),
},
parameters: site.parameters && JSON.parse(site.parameters),
isFallback: site.network_id === undefined,
};
}
function deriveFilename(scene) {
const props = {
siteName: scene.site.name,
sceneId: scene.shootId,
sceneTitle: scene.title,
sceneActors: scene.actors.join(config.filename.actorsJoin),
sceneDate: moment.utc(scene.date).format(config.filename.dateFormat),
};
const filename = config.filename.pattern.replace(/\{\w+\}/g, (match) => {
const prop = match.slice(1, -1);
const value = props[prop];
if (value && config.filename.subpatterns[prop]) {
return config.filename.subpatterns[prop]
.replace(/\{\w+\}/, value)
.replace(/\//g, config.filename.slash);
}
if (value) {
return value.replace(/\//g, config.filename.slash) || '';
}
return '';
});
return filename;
}
async function storeRelease(release) {
const curatedRelease = {
site_id: release.site.id,
shoot_id: release.shootId || null,
entry_id: release.entryId || null,
url: release.url,
title: release.title,
date: release.date,
description: release.description,
// director: release.director,
duration: release.duration,
photos: release.photos ? release.photos.length : 0,
likes: release.rating && release.rating.likes,
dislikes: release.rating && release.rating.dislikes,
rating: release.rating && release.rating.stars,
};
console.log('Saving release to database');
await knex.raw(`${knex('releases').insert(curatedRelease).toString()} ON CONFLICT (site_id, shoot_id) DO UPDATE SET
description = EXCLUDED.description,
likes = EXCLUDED.likes,
dislikes = EXCLUDED.dislikes,
rating = EXCLUDED.rating
`);
return release;
}
async function fetchScene(url, release) {
const site = release.site || await findSite(url);
const scraper = scrapers[site.slug] || scrapers[site.network.slug];
if (!scraper) {
throw new Error('Could not find scraper for URL');
}
if (!scraper.fetchScene) {
throw new Error(`The '${site.name}'-scraper cannot fetch individual scenes`);
}
const scene = await scraper.fetchScene(url, site);
const filename = deriveFilename(scene);
if (argv.scene && argv.save) {
await storeRelease(scene);
}
// knex.destroy();
return {
...scene,
filename,
copy: filename,
};
}
module.exports = fetchScene;

View File

@ -28,7 +28,21 @@ async function getThumbnail(buffer) {
.toBuffer(); .toBuffer();
} }
async function createMediaDirectory(release, releaseId) {
if (release.poster || (release.photos && release.photos.length)) {
await fs.mkdir(
path.join(config.media.path, release.site.network.slug, release.site.slug, releaseId.toString()),
{ recursive: true },
);
}
}
async function storePoster(release, releaseEntry) { async function storePoster(release, releaseEntry) {
if (!release.poster) {
console.warn(`No poster available for (${release.site.name}, ${releaseEntry.id}}) "${release.title}"`);
return;
}
console.log(`Storing poster for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`); console.log(`Storing poster for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
const res = await bhttp.get(release.poster); const res = await bhttp.get(release.poster);
@ -66,6 +80,11 @@ async function storePoster(release, releaseEntry) {
} }
async function storePhotos(release, releaseEntry) { async function storePhotos(release, releaseEntry) {
if (release.photos.length === 0) {
console.warn(`No photos available for (${release.site.name}, ${releaseEntry.id}}) "${release.title}"`);
return;
}
console.log(`Storing ${release.photos.length} photos for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`); console.log(`Storing ${release.photos.length} photos for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
const files = await Promise.map(release.photos, async (photoUrl, index) => { const files = await Promise.map(release.photos, async (photoUrl, index) => {
@ -123,6 +142,11 @@ async function storePhotos(release, releaseEntry) {
} }
async function storeTrailer(release, releaseEntry) { async function storeTrailer(release, releaseEntry) {
if (!release.trailer || !release.trailer.src) {
console.warn(`No trailer available for (${release.site.name}, ${releaseEntry.id}}) "${release.title}"`);
return;
}
console.log(`Storing trailer for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`); console.log(`Storing trailer for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
const { pathname } = new URL(release.trailer.src); const { pathname } = new URL(release.trailer.src);
@ -146,6 +170,7 @@ async function storeTrailer(release, releaseEntry) {
} }
module.exports = { module.exports = {
createMediaDirectory,
storePoster, storePoster,
storePhotos, storePhotos,
storeTrailer, storeTrailer,

View File

@ -4,7 +4,7 @@ const knex = require('./knex');
const whereOr = require('./utils/where-or'); const whereOr = require('./utils/where-or');
const { fetchSites } = require('./sites'); const { fetchSites } = require('./sites');
async function curateNetwork(network) { async function curateNetwork(network, includeParameters = false) {
const [sites, studios] = await Promise.all([ const [sites, studios] = await Promise.all([
fetchSites({ network_id: network.id }), fetchSites({ network_id: network.id }),
knex('studios') knex('studios')
@ -18,6 +18,7 @@ async function curateNetwork(network) {
description: network.description, description: network.description,
slug: network.slug, slug: network.slug,
sites, sites,
parameters: includeParameters ? JSON.parse(network.parameters) : null,
studios: studios.map(studio => ({ studios: studios.map(studio => ({
id: studio.id, id: studio.id,
name: studio.name, name: studio.name,
@ -32,6 +33,21 @@ function curateNetworks(releases) {
return Promise.all(releases.map(async release => curateNetwork(release))); return Promise.all(releases.map(async release => curateNetwork(release)));
} }
async function findNetworkByUrl(url) {
const { hostname } = new URL(url);
const domain = hostname.replace(/^www./, '');
const network = await knex('networks')
.where('networks.url', 'like', `%${domain}`)
.first();
if (network) {
return curateNetwork(network, true);
}
return null;
}
async function fetchNetworks(queryObject) { async function fetchNetworks(queryObject) {
const releases = await knex('networks') const releases = await knex('networks')
.where(builder => whereOr(queryObject, 'networks', builder)) .where(builder => whereOr(queryObject, 'networks', builder))
@ -54,4 +70,5 @@ async function fetchNetworksFromReleases() {
module.exports = { module.exports = {
fetchNetworks, fetchNetworks,
fetchNetworksFromReleases, fetchNetworksFromReleases,
findNetworkByUrl,
}; };

View File

@ -1,7 +1,17 @@
'use strict'; 'use strict';
const Promise = require('bluebird');
const knex = require('./knex'); const knex = require('./knex');
const argv = require('./argv');
const whereOr = require('./utils/where-or'); const whereOr = require('./utils/where-or');
const { storeTags } = require('./tags');
const { storeActors } = require('./actors');
const {
createMediaDirectory,
storePoster,
storePhotos,
storeTrailer,
} = require('./media');
async function curateRelease(release) { async function curateRelease(release) {
const [actors, tags, media] = await Promise.all([ const [actors, tags, media] = await Promise.all([
@ -71,6 +81,69 @@ function curateReleases(releases) {
return Promise.all(releases.map(async release => curateRelease(release))); return Promise.all(releases.map(async release => curateRelease(release)));
} }
function curateScrapedRelease(release) {
return {
site_id: release.site.id,
studio_id: release.studio ? release.studio.id : null,
shoot_id: release.shootId || null,
entry_id: release.entryId || null,
url: release.url,
title: release.title,
date: release.date,
description: release.description,
// director: release.director,
duration: release.duration,
likes: release.rating && release.rating.likes,
dislikes: release.rating && release.rating.dislikes,
rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
deep: Boolean(argv.deep && release.url && !release.upcoming),
};
}
async function storeRelease(release) {
const curatedRelease = curateScrapedRelease(release);
const releaseEntries = await knex('releases')
.insert(curatedRelease)
.returning('*');
if (releaseEntries.length) {
const releaseEntry = releaseEntries[0];
console.log(`Stored (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
await createMediaDirectory(release, releaseEntry.id);
await Promise.all([
storeActors(release, releaseEntry),
storeTags(release, releaseEntry),
storePhotos(release, releaseEntry),
storePoster(release, releaseEntry),
storeTrailer(release, releaseEntry),
]);
return releaseEntry.id;
}
console.error(`Unable to save scene to database, possible collision: "${release.title}" (${release.site.name})`);
return null;
}
async function storeReleases(releases) {
return Promise.map(releases, async (release) => {
try {
return storeRelease(release);
} catch (error) {
console.error(error);
return null;
}
}, {
concurrency: 2,
});
}
function commonQuery(queryBuilder, { function commonQuery(queryBuilder, {
filter = [], filter = [],
after = new Date(0), // January 1970 after = new Date(0), // January 1970
@ -160,4 +233,6 @@ module.exports = {
fetchSiteReleases, fetchSiteReleases,
fetchNetworkReleases, fetchNetworkReleases,
fetchTagReleases, fetchTagReleases,
storeRelease,
storeReleases,
}; };

58
src/scrape-release.js Normal file
View File

@ -0,0 +1,58 @@
'use strict';
const config = require('config');
const argv = require('./argv');
const scrapers = require('./scrapers/scrapers');
const { storeRelease } = require('./releases');
const { findSiteByUrl } = require('./sites');
const { findNetworkByUrl } = require('./networks');
async function findSite(url, release) {
const site = (release && release.site) || await findSiteByUrl(url);
if (site) {
return site;
}
const network = await findNetworkByUrl(url);
if (network) {
return {
...network,
isFallback: true,
};
}
return null;
}
async function scrapeRelease(url, release, deep = false) {
const site = await findSite(url, release);
const scraper = scrapers[site.slug] || scrapers[site.network.slug];
if (!site) {
throw new Error('Could not find site in database');
}
if (!scraper) {
throw new Error('Could not find scraper for URL');
}
if (!scraper.fetchScene) {
throw new Error(`The '${site.name}'-scraper cannot fetch individual releases`);
}
const scene = await scraper.fetchScene(url, site);
if (!deep && argv.save) {
// don't store release when called by site scraper
const releaseId = await storeRelease(scene);
console.log(`http://${config.web.host}:${config.web.port}/scene/${releaseId}`);
}
return scene;
}
module.exports = scrapeRelease;

136
src/scrape-sites.js Normal file
View File

@ -0,0 +1,136 @@
'use strict';
const Promise = require('bluebird');
const moment = require('moment');
const argv = require('./argv');
const knex = require('./knex');
const { fetchIncludedSites } = require('./sites');
const scrapers = require('./scrapers/scrapers');
const scrapeRelease = require('./scrape-release');
const { storeReleases } = require('./releases');
function getAfterDate() {
return moment
.utc()
.subtract(...argv.after.split(' '))
.toDate();
}
async function findDuplicateReleaseIds(latestReleases, accReleases) {
const duplicateReleases = await knex('releases')
.whereIn('entry_id', latestReleases.map(({ entryId }) => entryId));
// include accumulated releases as duplicates to prevent an infinite
// loop when the next page contains the same releases as the previous
return new Set(duplicateReleases
.map(release => release.entry_id)
.concat(accReleases.map(release => release.entryId)));
}
async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = 1) {
const latestReleases = await scraper.fetchLatest(site, page);
const oldestReleaseOnPage = latestReleases.slice(-1)[0].date;
if (latestReleases.length === 0) {
return [];
}
const duplicateReleaseIds = await findDuplicateReleaseIds(latestReleases, accReleases);
const uniqueReleases = latestReleases
.filter(release => !duplicateReleaseIds.has(String(release.entryId)) // release is already in database
&& moment(release.date).isAfter(afterDate)); // release is older than specified date limit
console.log(`\x1b[90m${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases\x1b[0m`);
if (
uniqueReleases.length > 0
&& (oldestReleaseOnPage || page < argv.pages)
&& moment(oldestReleaseOnPage).isAfter(afterDate)
) {
// oldest release on page is newer that specified limit, fetch next page
return scrapeUniqueReleases(scraper, site, afterDate, accReleases.concat(uniqueReleases), page + 1);
}
return accReleases.concat(uniqueReleases);
}
async function scrapeUpcomingReleases(scraper, site) {
if (scraper.fetchUpcoming) {
const upcomingReleases = scraper.fetchUpcoming(site);
return upcomingReleases.map(release => ({ ...release, upcoming: true }));
}
return [];
}
async function deepFetchReleases(baseReleases) {
return Promise.map(baseReleases, async (release) => {
if (release.url) {
const fullRelease = await scrapeRelease(release.url, release, true);
return {
...release,
...fullRelease,
};
}
return release;
}, {
concurrency: 2,
});
}
async function scrapeSiteReleases(scraper, site) {
const [newReleases, upcomingReleases] = await Promise.all([
scrapeUniqueReleases(scraper, site), // fetch basic release info from scene overview
scrapeUpcomingReleases(scraper, site), // fetch basic release info from upcoming overview
]);
console.log(`${site.name}: Found ${newReleases.length} recent releases, ${upcomingReleases.length} upcoming releases`);
const baseReleases = [...newReleases, ...upcomingReleases];
if (argv.deep) {
// follow URL for every release
return deepFetchReleases(baseReleases);
}
return baseReleases;
}
async function scrapeReleases() {
const sites = await fetchIncludedSites();
console.log(`Found ${sites.length} sites in database`);
await Promise.map(sites, async (site) => {
const scraper = scrapers[site.slug] || scrapers[site.network.slug];
if (!scraper) {
console.warn(`No scraper found for '${site.name}' (${site.slug})`);
return;
}
try {
const siteReleases = await scrapeSiteReleases(scraper, site);
if (argv.save) {
await storeReleases(siteReleases);
}
} catch (error) {
if (argv.debug) {
console.error(`${site.id}: Failed to fetch releases`, error);
return;
}
console.warn(`${site.id}: Failed to fetch releases`);
}
}, {
concurrency: 2,
});
}
module.exports = scrapeReleases;

View File

@ -5,7 +5,7 @@ const bhttp = require('bhttp');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const moment = require('moment'); const moment = require('moment');
const fetchSites = require('../sites'); const { fetchSites } = require('../sites');
const { matchTags } = require('../tags'); const { matchTags } = require('../tags');
function scrape(html, site, upcoming) { function scrape(html, site, upcoming) {
@ -23,7 +23,7 @@ function scrape(html, site, upcoming) {
const url = `https://www.brazzers.com${sceneLinkElement.attr('href')}`; const url = `https://www.brazzers.com${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title'); const title = sceneLinkElement.attr('title');
const shootId = url.split('/').slice(-3, -2)[0]; const entryId = url.split('/').slice(-3, -2)[0];
const date = moment.utc($(element).find('time').text(), 'MMMM DD, YYYY').toDate(); const date = moment.utc($(element).find('time').text(), 'MMMM DD, YYYY').toDate();
const actors = $(element).find('.model-names a').map((actorIndex, actorElement) => $(actorElement).attr('title')).toArray(); const actors = $(element).find('.model-names a').map((actorIndex, actorElement) => $(actorElement).attr('title')).toArray();
@ -36,7 +36,7 @@ function scrape(html, site, upcoming) {
return acc.concat({ return acc.concat({
url, url,
shootId, entryId,
title, title,
actors, actors,
date, date,
@ -56,7 +56,7 @@ async function scrapeScene(html, url, site) {
const videoJson = $('script:contains("window.videoUiOptions")').html(); const videoJson = $('script:contains("window.videoUiOptions")').html();
const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('"},') + 2)); const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('"},') + 2));
const shootId = url.split('/').slice(-3, -2)[0]; const entryId = url.split('/').slice(-3, -2)[0];
const title = $('.scene-title[itemprop="name"]').text(); const title = $('.scene-title[itemprop="name"]').text();
const description = $('#scene-description p[itemprop="description"]') const description = $('#scene-description p[itemprop="description"]')
@ -83,20 +83,20 @@ async function scrapeScene(html, url, site) {
const trailer = `https:${videoData.stream_info.http.paths.mp4_480_1500}`; const trailer = `https:${videoData.stream_info.http.paths.mp4_480_1500}`;
const photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray(); const photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
const [tags, channelSite] = await Promise.all([ const [tags, [channelSite]] = await Promise.all([
matchTags(rawTags), matchTags(rawTags),
site.isFallback site.isFallback
? [fetchSites({ ? fetchSites({
slug: siteSlug, slug: siteSlug,
name: siteName, name: siteName,
url: siteUrl, url: siteUrl,
})] })
: site, : [site],
]); ]);
return { return {
url, url,
shootId, entryId,
title, title,
description, description,
actors, actors,

View File

@ -5,8 +5,8 @@ const Promise = require('bluebird');
const bhttp = require('bhttp'); const bhttp = require('bhttp');
const { JSDOM } = require('jsdom'); const { JSDOM } = require('jsdom');
const moment = require('moment'); const moment = require('moment');
const knex = require('knex');
const knex = require('../knex');
const { matchTags } = require('../tags'); const { matchTags } = require('../tags');
const pluckPhotos = require('../utils/pluck-photos'); const pluckPhotos = require('../utils/pluck-photos');

View File

@ -3,8 +3,8 @@
const bhttp = require('bhttp'); const bhttp = require('bhttp');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const moment = require('moment'); const moment = require('moment');
const knex = require('knex');
const { fetchSites } = require('../sites');
const { matchTags } = require('../tags'); const { matchTags } = require('../tags');
function scrapeLatest(html, site) { function scrapeLatest(html, site) {
@ -71,13 +71,17 @@ async function scrapeScene(html, url, shootId, ratingRes, site) {
const { average: stars } = ratingRes.body; const { average: stars } = ratingRes.body;
const sitename = $('.shoot-logo a').attr('href').split('/')[2]; const siteName = $('.shoot-logo a').attr('href').split('/')[2];
const siteSlug = siteName.replace(/\s+/g, '').toLowerCase();
const rawTags = $('.tag-list > a[href*="/tag"]').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); const rawTags = $('.tag-list > a[href*="/tag"]').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const [channelSite, tags] = await Promise.all([ const [[channelSite], tags] = await Promise.all([
site.isFallback site.isFallback
? knex('sites').where({ slug: sitename }).first() ? fetchSites({
: site, slug: siteSlug,
name: siteName,
})
: [site],
matchTags(rawTags), matchTags(rawTags),
]); ]);

View File

@ -7,7 +7,7 @@ const moment = require('moment');
function scrape(html, site) { function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
const shootId = $('li').attr('id'); const entryId = $('li').attr('id');
const sceneLinkElement = $('#scene_title_border a'); const sceneLinkElement = $('#scene_title_border a');
const url = `${site.url}/${sceneLinkElement.attr('href')}`; const url = `${site.url}/${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
@ -22,7 +22,7 @@ function scrape(html, site) {
return { return {
url, url,
shootId, entryId,
title, title,
actors, actors,
date, date,

View File

@ -8,10 +8,10 @@ const moment = require('moment');
const fetchSites = require('../sites'); const fetchSites = require('../sites');
const { matchTags } = require('../tags'); const { matchTags } = require('../tags');
async function getPhotos(shootId, site) { async function getPhotos(entryId, site) {
const { hostname } = new URL(site.url); const { hostname } = new URL(site.url);
const res = await bhttp.get(`https://${hostname}/gallery.php?type=highres&id=${shootId}`); const res = await bhttp.get(`https://${hostname}/gallery.php?type=highres&id=${entryId}`);
const html = res.body.toString(); const html = res.body.toString();
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
@ -50,7 +50,7 @@ function scrapeLatest(html, site) {
const url = sceneLinkElement.attr('href'); const url = sceneLinkElement.attr('href');
const title = sceneLinkElement.text(); const title = sceneLinkElement.text();
const shootId = url.split('/').slice(-1)[0]; const entryId = url.split('/').slice(-1)[0];
const date = moment.utc($(element).find('.scene-date'), 'MM/DD/YYYY').toDate(); const date = moment.utc($(element).find('.scene-date'), 'MM/DD/YYYY').toDate();
@ -64,7 +64,7 @@ function scrapeLatest(html, site) {
const scene = { const scene = {
url, url,
shootId, entryId,
title, title,
actors, actors,
date, date,
@ -83,7 +83,7 @@ function scrapeLatest(html, site) {
async function scrapeScene(html, url, site) { async function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
const shootId = url.split('/').slice(-1)[0]; const entryId = url.split('/').slice(-1)[0];
const title = $('.video-wrapper meta[itemprop="name"]').attr('content'); const title = $('.video-wrapper meta[itemprop="name"]').attr('content');
const date = moment.utc($('.video-wrapper meta[itemprop="uploadDate"]').attr('content'), 'MM/DD/YYYY').toDate(); const date = moment.utc($('.video-wrapper meta[itemprop="uploadDate"]').attr('content'), 'MM/DD/YYYY').toDate();
@ -93,8 +93,12 @@ async function scrapeScene(html, url, site) {
const [minutes, seconds] = $('.video-wrapper meta[itemprop="duration"]').attr('content').match(/\d+/g); const [minutes, seconds] = $('.video-wrapper meta[itemprop="duration"]').attr('content').match(/\d+/g);
const duration = Number(minutes) * 60 + Number(seconds); const duration = Number(minutes) * 60 + Number(seconds);
const poster = $('meta[property="og:image"]').attr('content'); const posterScript = $('script:contains(poster)').html();
const trailer = $('meta[property="og:video"]').attr('content'); const posterLink = posterScript.slice(posterScript.indexOf('https://'), posterScript.indexOf('.jpg') + 4);
const poster = $('meta[property="og:image"]').attr('content') || posterLink;
const trailerElementSrc = $('#videojs-trailer source').attr('src');
const trailer = $('meta[property="og:video"]').attr('content') || trailerElementSrc;
const likes = Number($('.content-desc #social-actions #likes').text()); const likes = Number($('.content-desc #social-actions #likes').text());
@ -102,13 +106,13 @@ async function scrapeScene(html, url, site) {
const [tags, photos, channelSite] = await Promise.all([ const [tags, photos, channelSite] = await Promise.all([
matchTags(rawTags), matchTags(rawTags),
getPhotos(shootId, site), getPhotos(entryId, site),
getChannelSite($, site), getChannelSite($, site),
]); ]);
const scene = { const scene = {
url, url,
shootId, entryId,
title, title,
date, date,
actors, actors,

View File

@ -23,7 +23,7 @@ function scrapeLatest(html, site) {
const { videos: scenes } = JSON.parse(stateScript.slice(stateScript.indexOf('{'), stateScript.indexOf('};') + 1)); const { videos: scenes } = JSON.parse(stateScript.slice(stateScript.indexOf('{'), stateScript.indexOf('};') + 1));
return scenes.map((scene) => { return scenes.map((scene) => {
const shootId = String(scene.newId); const entryId = String(scene.newId);
const { const {
title, title,
@ -40,7 +40,7 @@ function scrapeLatest(html, site) {
return { return {
url, url,
shootId, entryId,
title, title,
actors, actors,
date, date,
@ -65,8 +65,8 @@ async function scrapeScene(html, url, site) {
const stateObject = $('script:contains("INITIAL_STATE")'); const stateObject = $('script:contains("INITIAL_STATE")');
const data = JSON.parse(stateObject.html().trim().slice(27, -1)); const data = JSON.parse(stateObject.html().trim().slice(27, -1));
const shootId = data.page.data[`${pathname}${search}`].data.video; const entryId = data.page.data[`${pathname}${search}`].data.video;
const scene = data.videos.find(video => video.newId === shootId); const scene = data.videos.find(video => video.newId === entryId);
const [poster, ...photos] = scene.rotatingThumbsUrlSizes.map(photo => photo['1040w']); const [poster, ...photos] = scene.rotatingThumbsUrlSizes.map(photo => photo['1040w']);
const trailer = scene.previews.listing.find(preview => preview.height === 353) || null; const trailer = scene.previews.listing.find(preview => preview.height === 353) || null;
@ -86,7 +86,7 @@ async function scrapeScene(html, url, site) {
return { return {
url, url,
shootId, entryId,
title, title,
description, description,
actors, actors,

View File

@ -3,9 +3,9 @@
const Promise = require('bluebird'); const Promise = require('bluebird');
const bhttp = require('bhttp'); const bhttp = require('bhttp');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const knex = require('knex');
const moment = require('moment'); const moment = require('moment');
const knex = require('../knex');
const { matchTags } = require('../tags'); const { matchTags } = require('../tags');
const defaultTags = { const defaultTags = {

View File

@ -1,9 +1,12 @@
'use strict'; 'use strict';
const config = require('config');
const argv = require('./argv');
const knex = require('./knex'); const knex = require('./knex');
const whereOr = require('./utils/where-or'); const whereOr = require('./utils/where-or');
async function curateSite(site) { async function curateSite(site, includeParameters = false) {
const parameters = JSON.parse(site.parameters); const parameters = JSON.parse(site.parameters);
return { return {
@ -13,12 +16,14 @@ async function curateSite(site) {
description: site.description, description: site.description,
slug: site.slug, slug: site.slug,
independent: !!parameters && parameters.independent, independent: !!parameters && parameters.independent,
parameters: includeParameters ? JSON.parse(site.parameters) : null,
network: { network: {
id: site.network_id, id: site.network_id,
name: site.network_name, name: site.network_name,
description: site.network_description, description: site.network_description,
slug: site.network_slug, slug: site.network_slug,
url: site.network_url, url: site.network_url,
parameters: includeParameters ? JSON.parse(site.network_parameters) : null,
}, },
}; };
} }
@ -27,12 +32,85 @@ function curateSites(sites) {
return Promise.all(sites.map(async site => curateSite(site))); return Promise.all(sites.map(async site => curateSite(site)));
} }
function destructConfigNetworks(networks) {
return networks.reduce((acc, network) => {
if (Array.isArray(network)) {
// network specifies sites
return {
...acc,
sites: [...acc.sites, ...network[1]],
};
}
return {
...acc,
networks: [...acc.networks, network],
};
}, {
networks: [],
sites: [],
});
}
async function findSiteByUrl(url) {
const { hostname } = new URL(url);
const domain = hostname.replace(/^www./, '');
const site = await knex('sites')
.leftJoin('networks', 'sites.network_id', 'networks.id')
.select(
'sites.*',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
)
.where('sites.url', 'like', `%${domain}`)
.first();
if (site) {
return curateSite(site, true);
}
return null;
}
async function fetchSitesFromArgv() {
const rawSites = await knex('sites')
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.parameters as network_parameters')
.whereIn('sites.slug', argv.sites || [])
.orWhereIn('networks.slug', argv.networks || [])
.leftJoin('networks', 'sites.network_id', 'networks.id');
return curateSites(rawSites, true);
}
async function fetchSitesFromConfig() {
const included = destructConfigNetworks(config.include);
const networks = await knex('networks').select('id').whereIn('slug', included.networks || []);
const networkIds = networks.map(network => network.id);
const rawSites = await knex('sites')
.select('sites.*', 'networks.name as network_name')
.whereIn('sites.slug', included.sites || [])
.orWhereIn('network_id', networkIds)
.leftJoin('networks', 'sites.network_id', 'networks.id');
return curateSites(rawSites, true);
}
async function fetchIncludedSites() {
if (argv.networks || argv.sites) {
return fetchSitesFromArgv();
}
return fetchSitesFromConfig();
}
async function fetchSites(queryObject) { async function fetchSites(queryObject) {
const sites = await knex('sites') const sites = await knex('sites')
.where(builder => whereOr(queryObject, 'sites', builder)) .where(builder => whereOr(queryObject, 'sites', builder))
.select( .select(
'sites.*', 'sites.*',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as networks_description', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
) )
.leftJoin('networks', 'sites.network_id', 'networks.id') .leftJoin('networks', 'sites.network_id', 'networks.id')
.limit(100); .limit(100);
@ -51,6 +129,11 @@ async function fetchSitesFromReleases() {
} }
module.exports = { module.exports = {
curateSites,
fetchIncludedSites,
fetchSites, fetchSites,
fetchSitesFromConfig,
fetchSitesFromArgv,
fetchSitesFromReleases, fetchSitesFromReleases,
findSiteByUrl,
}; };

View File

@ -25,7 +25,12 @@ function curateTags(tags) {
} }
async function storeTags(release, releaseEntry) { async function storeTags(release, releaseEntry) {
return knex('tags_associated').insert(release.tags.map(tagId => ({ if (!release.tags || release.tags.length === 0) {
console.warn(`No tags available for (${release.site.name}, ${releaseEntry.id}}) "${release.title}"`);
return;
}
await knex('tags_associated').insert(release.tags.map(tagId => ({
tag_id: tagId, tag_id: tagId,
release_id: releaseEntry.id, release_id: releaseEntry.id,
}))); })));

View File

@ -1,29 +0,0 @@
'use strict';
const moment = require('moment');
const formatters = {
site: site => site.name,
network: network => network.name,
date: (date, column) => moment(date).format(column.format || 'MMM DD, YYYY'),
actors: actors => actors.join(', '),
rating: (rating) => {
if ((rating.likes === 0 && rating.dislikes === 0) || rating.stars === 0) {
return '\x1b[90mUnrated\x1b[0m';
}
if (rating.likes !== undefined && rating.dislikes === undefined) {
return `\x1b[93m★\x1b[0m N/A \x1b[92m▲\x1b[0m ${String(rating.likes).padEnd(3)}`;
}
if (rating.stars) {
return `\x1b[93m★ ${rating.stars.toFixed(2)}\x1b[0m`;
}
const stars = rating.likes || rating.dislikes ? Math.floor(((rating.likes * 5 + rating.dislikes) / (rating.likes + rating.dislikes)) * 100) / 100 : null;
return `\x1b[93m★\x1b[0m ${stars.toFixed(2)} \x1b[92m▲\x1b[0m ${String(rating.likes).padEnd(3)} \x1b[31m▼\x1b[0m ${String(rating.dislikes).padEnd(3)}`;
},
};
module.exports = formatters;

View File

@ -1,111 +0,0 @@
'use strict';
const config = require('config');
const blessed = require('neo-blessed');
const opn = require('opn');
const formatters = require('./formatters');
function renderReleases(scenes, screen) {
screen.realloc();
const tableTop = blessed.Text({
content: config.columns.reduce((acc, column, index) => `${acc}${'─'.repeat(column.width)}${index < config.columns.length - 1 ? '┬' : '┐\x1b[0m'}`, '\x1b[30m┌'),
});
const items = scenes.map((scene, sceneIndex) => {
const row = config.columns.reduce((acc, column) => {
const value = (scene[column.value] && (formatters[column.value]
? formatters[column.value](scene[column.value], column)
: scene[column.value])
.toString()) || '\x1b[90mNot available\x1b[0m';
const realLength = value.replace(/\x1b\[\d+m/g, '').length; // eslint-disable-line no-control-regex
const entityLength = value.length - realLength;
const truncatedValue = realLength > column.width - 2 ? `${value.slice(0, column.width - 2 - 3)}...` : value;
const paddedValue = truncatedValue.padEnd(column.width + entityLength - 1).padStart(column.width + entityLength);
const coloredValue = scene.upcoming ? `\x1b[92m${paddedValue}\x1b[0m` : `\x1b[97m${paddedValue}\x1b[0m`;
return `${acc}${coloredValue}\x1b[90m│\x1b[0m`;
}, '\x1b[90m│\x1b[0m');
if (sceneIndex < scenes.length - 1) {
const line = config.columns.reduce((acc, column, index) => `${acc}${'─'.repeat(column.width)}${index < config.columns.length - 1 ? '┼' : '┤\x1b[0m'}`, '\n\x1b[30m├');
return `${row}${line}`;
}
return `${row}${sceneIndex}`;
});
const menu = blessed.List({
style: {
selected: {
bold: true,
},
},
top: 1,
height: screen.rows - 3,
// width: 161,
width: config.columns.reduce((acc, column) => acc + column.width, 0),
keys: true,
vi: true,
mouse: true,
scrollbar: {
style: {
bg: 'red',
},
track: {
bg: 'magenta',
},
},
items,
});
menu.search = (cb) => {
const searchbox = blessed.Textbox({
inputOnFocus: true,
});
screen.append(searchbox);
searchbox.focus();
screen.render();
searchbox.on('submit', () => {
menu.focus();
cb(null, searchbox.value);
screen.append(menu);
screen.render();
});
};
const tableBottom = blessed.Text({
content: config.columns.reduce((acc, column, index) => `${acc}${'─'.repeat(column.width)}${index < config.columns.length - 1 ? '┴' : '┘\x1b[0m\n'}`, '\x1b[30m└'),
top: screen.rows - 2,
});
screen.append(tableTop);
screen.append(menu);
screen.append(tableBottom);
menu.focus();
menu.on('select', (child) => {
const scene = scenes[menu.getItemIndex(child)];
opn(scene.url);
});
screen.render();
}
function renderScene(scene, _screen) {
console.log(scene);
}
module.exports = {
renderReleases,
renderScene,
};

View File

@ -63,8 +63,10 @@ function initServer() {
app.use(router); app.use(router);
app.listen(config.web.port, () => { const server = app.listen(config.web.port, config.web.host, () => {
console.log(`Web server listening on port ${config.web.port}`); const { address, port } = server.address();
console.log(`Web server listening on ${address}:${port}`);
}); });
} }