Compare commits
4 Commits
8ca98b394f
...
6bfc5e4378
Author | SHA1 | Date |
---|---|---|
|
6bfc5e4378 | |
|
5c55750c0c | |
|
61a795d634 | |
|
638757b6e4 |
|
@ -98,7 +98,7 @@
|
||||||
<div class="info column">
|
<div class="info column">
|
||||||
<h2 class="row title">{{ release.title }}</h2>
|
<h2 class="row title">{{ release.title }}</h2>
|
||||||
|
|
||||||
<div class="row">
|
<div class="row associations">
|
||||||
<ul class="actors nolist">
|
<ul class="actors nolist">
|
||||||
<li
|
<li
|
||||||
v-for="actor in release.actors"
|
v-for="actor in release.actors"
|
||||||
|
@ -107,24 +107,26 @@
|
||||||
<Actor :actor="actor" />
|
<Actor :actor="actor" />
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
|
<div
|
||||||
|
v-if="release.movies && release.movies.length > 0"
|
||||||
|
class="movies"
|
||||||
|
>
|
||||||
|
<Release :release="release.movies[0]" />
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div v-if="release.scenes && release.scenes.length > 0">
|
<div
|
||||||
|
v-if="release.scenes && release.scenes.length > 0"
|
||||||
|
class="scenes"
|
||||||
|
>
|
||||||
<h3>Scenes</h3>
|
<h3>Scenes</h3>
|
||||||
|
|
||||||
<Releases
|
<Releases
|
||||||
v-if="release.scenes && release.scenes.length > 0"
|
|
||||||
:releases="release.scenes"
|
:releases="release.scenes"
|
||||||
class="row"
|
class="row"
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div v-if="release.movie">
|
|
||||||
<h3>Movie</h3>
|
|
||||||
|
|
||||||
<Release :release="release.movie" />
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div
|
<div
|
||||||
v-if="release.tags.length > 0"
|
v-if="release.tags.length > 0"
|
||||||
class="row"
|
class="row"
|
||||||
|
@ -254,8 +256,8 @@ export default {
|
||||||
components: {
|
components: {
|
||||||
Actor,
|
Actor,
|
||||||
Banner,
|
Banner,
|
||||||
Releases,
|
|
||||||
Release,
|
Release,
|
||||||
|
Releases,
|
||||||
},
|
},
|
||||||
data() {
|
data() {
|
||||||
return {
|
return {
|
||||||
|
@ -295,6 +297,10 @@ export default {
|
||||||
align-items: center;
|
align-items: center;
|
||||||
margin: 0 0 1rem 0;
|
margin: 0 0 1rem 0;
|
||||||
|
|
||||||
|
&.associations {
|
||||||
|
align-items: start;
|
||||||
|
}
|
||||||
|
|
||||||
.icon {
|
.icon {
|
||||||
display: inline-block;
|
display: inline-block;
|
||||||
width: 1rem;
|
width: 1rem;
|
||||||
|
@ -395,6 +401,7 @@ export default {
|
||||||
|
|
||||||
.actors {
|
.actors {
|
||||||
display: flex;
|
display: flex;
|
||||||
|
flex-grow: 1;
|
||||||
flex-wrap: wrap;
|
flex-wrap: wrap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -403,6 +410,9 @@ export default {
|
||||||
margin: 0 1rem .5rem 0;
|
margin: 0 1rem .5rem 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.movies {
|
||||||
|
}
|
||||||
|
|
||||||
.filename {
|
.filename {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
padding: .5rem;
|
padding: .5rem;
|
||||||
|
|
|
@ -26,11 +26,15 @@ function curateRelease(release) {
|
||||||
};
|
};
|
||||||
|
|
||||||
if (release.site) curatedRelease.network = release.site.network;
|
if (release.site) curatedRelease.network = release.site.network;
|
||||||
|
if (release.scenes) curatedRelease.scenes = release.scenes.map(({ scene }) => curateRelease(scene));
|
||||||
|
if (release.movies) curatedRelease.movies = release.movies.map(({ movie }) => curateRelease(movie));
|
||||||
if (release.photos) curatedRelease.photos = release.photos.map(({ media }) => media);
|
if (release.photos) curatedRelease.photos = release.photos.map(({ media }) => media);
|
||||||
if (release.covers) curatedRelease.covers = release.covers.map(({ media }) => media);
|
if (release.covers) curatedRelease.covers = release.covers.map(({ media }) => media);
|
||||||
if (release.trailer) curatedRelease.trailer = release.trailer.media;
|
if (release.trailer) curatedRelease.trailer = release.trailer.media;
|
||||||
if (release.teaser) curatedRelease.teaser = release.teaser.media;
|
if (release.teaser) curatedRelease.teaser = release.teaser.media;
|
||||||
if (release.actors) curatedRelease.actors = release.actors.map(({ actor }) => curateActor(actor, curatedRelease));
|
if (release.actors) curatedRelease.actors = release.actors.map(({ actor }) => curateActor(actor, curatedRelease));
|
||||||
|
if (release.movieTags && release.movieTags.length > 0) curatedRelease.tags = release.movieTags.map(({ tag }) => tag);
|
||||||
|
if (release.movieActors && release.movieActors.length > 0) curatedRelease.actors = release.movieActors.map(({ actor }) => curateActor(actor, curatedRelease));
|
||||||
|
|
||||||
return curatedRelease;
|
return curatedRelease;
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,9 +30,7 @@ const sitesFragment = `
|
||||||
}
|
}
|
||||||
`;
|
`;
|
||||||
|
|
||||||
const releaseActorsFragment = `
|
const actorFields = `
|
||||||
actors: releasesActors(orderBy: ACTOR_BY_ACTOR_ID__GENDER_ASC) {
|
|
||||||
actor {
|
|
||||||
id
|
id
|
||||||
name
|
name
|
||||||
slug
|
slug
|
||||||
|
@ -49,6 +47,12 @@ const releaseActorsFragment = `
|
||||||
thumbnail
|
thumbnail
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
`;
|
||||||
|
|
||||||
|
const releaseActorsFragment = `
|
||||||
|
actors: releasesActors(orderBy: ACTOR_BY_ACTOR_ID__GENDER_ASC) {
|
||||||
|
actor {
|
||||||
|
${actorFields}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
`;
|
`;
|
||||||
|
@ -165,6 +169,18 @@ const releasesFragment = `
|
||||||
orderBy: $orderBy,
|
orderBy: $orderBy,
|
||||||
) {
|
) {
|
||||||
${releaseFields}
|
${releaseFields}
|
||||||
|
movieActors: movieActorsByMovieId(orderBy: ACTOR_BY_ACTOR_ID__GENDER_ASC) {
|
||||||
|
actor {
|
||||||
|
${actorFields}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
movieTags: movieTagsByMovieId(orderBy: TAG_BY_TAG_ID__PRIORITY_DESC) {
|
||||||
|
tag {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
slug
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
`;
|
`;
|
||||||
|
|
||||||
|
@ -186,6 +202,42 @@ const releaseFragment = `
|
||||||
${releaseTrailerFragment}
|
${releaseTrailerFragment}
|
||||||
${releaseTeaserFragment}
|
${releaseTeaserFragment}
|
||||||
${siteFragment}
|
${siteFragment}
|
||||||
|
movieActors: movieActorsByMovieId(orderBy: ACTOR_BY_ACTOR_ID__GENDER_ASC) {
|
||||||
|
actor {
|
||||||
|
${actorFields}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
movieTags: movieTagsByMovieId(orderBy: TAG_BY_TAG_ID__PRIORITY_DESC) {
|
||||||
|
tag {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
slug
|
||||||
|
}
|
||||||
|
}
|
||||||
|
movies: releasesMoviesBySceneId {
|
||||||
|
movie {
|
||||||
|
id
|
||||||
|
title
|
||||||
|
date
|
||||||
|
slug
|
||||||
|
createdAt
|
||||||
|
url
|
||||||
|
${releaseCoversFragment}
|
||||||
|
${siteFragment}
|
||||||
|
actors: movieActorsByMovieId {
|
||||||
|
actor {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
slug
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
scenes: releasesMoviesByMovieId {
|
||||||
|
scene {
|
||||||
|
${releaseFields}
|
||||||
|
}
|
||||||
|
}
|
||||||
studio {
|
studio {
|
||||||
id
|
id
|
||||||
name
|
name
|
||||||
|
|
|
@ -66,6 +66,8 @@ module.exports = {
|
||||||
'21sextury',
|
'21sextury',
|
||||||
'julesjordan',
|
'julesjordan',
|
||||||
'naughtyamerica',
|
'naughtyamerica',
|
||||||
|
'cherrypimps',
|
||||||
|
'pimpxxx',
|
||||||
[
|
[
|
||||||
'hussiepass',
|
'hussiepass',
|
||||||
'hushpass',
|
'hushpass',
|
||||||
|
@ -75,16 +77,6 @@ module.exports = {
|
||||||
'seehimfuck',
|
'seehimfuck',
|
||||||
'eyeontheguy',
|
'eyeontheguy',
|
||||||
],
|
],
|
||||||
[
|
|
||||||
'cherrypimps',
|
|
||||||
'drilledxxx',
|
|
||||||
'wildoncam',
|
|
||||||
'bcmxxx',
|
|
||||||
'familyxxx',
|
|
||||||
'petitexxx',
|
|
||||||
'confessionsxxx',
|
|
||||||
'cuckedxxx',
|
|
||||||
],
|
|
||||||
[
|
[
|
||||||
// Full Porn Network
|
// Full Porn Network
|
||||||
'analized',
|
'analized',
|
||||||
|
|
|
@ -362,10 +362,6 @@ exports.up = knex => Promise.resolve()
|
||||||
table.integer('duration')
|
table.integer('duration')
|
||||||
.unsigned();
|
.unsigned();
|
||||||
|
|
||||||
table.integer('parent_id', 16)
|
|
||||||
.references('id')
|
|
||||||
.inTable('releases');
|
|
||||||
|
|
||||||
table.boolean('deep');
|
table.boolean('deep');
|
||||||
table.string('deep_url', 1000);
|
table.string('deep_url', 1000);
|
||||||
|
|
||||||
|
@ -392,6 +388,25 @@ exports.up = knex => Promise.resolve()
|
||||||
.inTable('actors');
|
.inTable('actors');
|
||||||
|
|
||||||
table.unique(['release_id', 'actor_id']);
|
table.unique(['release_id', 'actor_id']);
|
||||||
|
|
||||||
|
table.datetime('created_at')
|
||||||
|
.defaultTo(knex.fn.now());
|
||||||
|
}))
|
||||||
|
.then(() => knex.schema.createTable('releases_movies', (table) => {
|
||||||
|
table.integer('movie_id', 16)
|
||||||
|
.notNullable()
|
||||||
|
.references('id')
|
||||||
|
.inTable('releases');
|
||||||
|
|
||||||
|
table.integer('scene_id', 16)
|
||||||
|
.notNullable()
|
||||||
|
.references('id')
|
||||||
|
.inTable('releases');
|
||||||
|
|
||||||
|
table.unique(['movie_id', 'scene_id']);
|
||||||
|
|
||||||
|
table.datetime('created_at')
|
||||||
|
.defaultTo(knex.fn.now());
|
||||||
}))
|
}))
|
||||||
.then(() => knex.schema.createTable('releases_directors', (table) => {
|
.then(() => knex.schema.createTable('releases_directors', (table) => {
|
||||||
table.integer('release_id', 16)
|
table.integer('release_id', 16)
|
||||||
|
@ -526,6 +541,21 @@ exports.up = knex => Promise.resolve()
|
||||||
SELECT NOT EXISTS(SELECT true FROM batches WHERE batches.id = release.created_batch_id + 1 LIMIT 1);
|
SELECT NOT EXISTS(SELECT true FROM batches WHERE batches.id = release.created_batch_id + 1 LIMIT 1);
|
||||||
$$ LANGUAGE sql STABLE;
|
$$ LANGUAGE sql STABLE;
|
||||||
|
|
||||||
|
CREATE VIEW movie_actors AS
|
||||||
|
SELECT releases_movies.movie_id, releases_actors.actor_id FROM releases_movies
|
||||||
|
LEFT JOIN releases ON releases.id = releases_movies.scene_id
|
||||||
|
LEFT JOIN releases_actors ON releases_actors.release_id = releases.id
|
||||||
|
GROUP BY movie_id, actor_id;
|
||||||
|
|
||||||
|
CREATE VIEW movie_tags AS
|
||||||
|
SELECT releases_movies.movie_id, releases_tags.tag_id FROM releases_movies
|
||||||
|
LEFT JOIN releases ON releases.id = releases_movies.scene_id
|
||||||
|
LEFT JOIN releases_tags ON releases_tags.release_id = releases.id
|
||||||
|
GROUP BY movie_id, tag_id;
|
||||||
|
|
||||||
|
COMMENT ON VIEW movie_actors IS E'@foreignKey (movie_id) references releases (id)\n@foreignKey (actor_id) references actors (id)';
|
||||||
|
COMMENT ON VIEW movie_tags IS E'@foreignKey (movie_id) references releases (id)\n@foreignKey (tag_id) references tags (id)';
|
||||||
|
|
||||||
COMMENT ON COLUMN actors.height IS E'@omit read,update,create,delete,all,many';
|
COMMENT ON COLUMN actors.height IS E'@omit read,update,create,delete,all,many';
|
||||||
COMMENT ON COLUMN actors.weight IS E'@omit read,update,create,delete,all,many';
|
COMMENT ON COLUMN actors.weight IS E'@omit read,update,create,delete,all,many';
|
||||||
`));
|
`));
|
||||||
|
@ -534,9 +564,11 @@ exports.down = knex => knex.raw(`
|
||||||
DROP FUNCTION IF EXISTS releases_by_tag_slugs;
|
DROP FUNCTION IF EXISTS releases_by_tag_slugs;
|
||||||
DROP FUNCTION IF EXISTS search_sites;
|
DROP FUNCTION IF EXISTS search_sites;
|
||||||
|
|
||||||
DROP VIEW IF EXISTS releases_actors_view;
|
DROP VIEW IF EXISTS movie_actors;
|
||||||
|
DROP VIEW IF EXISTS movie_tags;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS releases_actors CASCADE;
|
DROP TABLE IF EXISTS releases_actors CASCADE;
|
||||||
|
DROP TABLE IF EXISTS releases_movies CASCADE;
|
||||||
DROP TABLE IF EXISTS releases_directors CASCADE;
|
DROP TABLE IF EXISTS releases_directors CASCADE;
|
||||||
DROP TABLE IF EXISTS releases_posters CASCADE;
|
DROP TABLE IF EXISTS releases_posters CASCADE;
|
||||||
DROP TABLE IF EXISTS releases_photos CASCADE;
|
DROP TABLE IF EXISTS releases_photos CASCADE;
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"name": "traxxx",
|
"name": "traxxx",
|
||||||
"version": "1.109.0",
|
"version": "1.109.1",
|
||||||
"lockfileVersion": 1,
|
"lockfileVersion": 1,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"name": "traxxx",
|
"name": "traxxx",
|
||||||
"version": "1.109.0",
|
"version": "1.109.1",
|
||||||
"description": "All the latest porn releases in one place",
|
"description": "All the latest porn releases in one place",
|
||||||
"main": "src/app.js",
|
"main": "src/app.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
|
|
@ -118,7 +118,7 @@ const tags = [
|
||||||
{
|
{
|
||||||
name: 'asian',
|
name: 'asian',
|
||||||
slug: 'asian',
|
slug: 'asian',
|
||||||
priority: 7,
|
priority: 5,
|
||||||
group: 'ethnicity',
|
group: 'ethnicity',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -342,7 +342,7 @@ const tags = [
|
||||||
{
|
{
|
||||||
name: 'ebony',
|
name: 'ebony',
|
||||||
slug: 'ebony',
|
slug: 'ebony',
|
||||||
priority: 7,
|
priority: 5,
|
||||||
group: 'ethnicity',
|
group: 'ethnicity',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -427,7 +427,7 @@ const tags = [
|
||||||
{
|
{
|
||||||
name: 'gaping',
|
name: 'gaping',
|
||||||
slug: 'gaping',
|
slug: 'gaping',
|
||||||
priority: 7,
|
priority: 6,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'gay',
|
name: 'gay',
|
||||||
|
@ -481,7 +481,7 @@ const tags = [
|
||||||
{
|
{
|
||||||
name: 'Latina',
|
name: 'Latina',
|
||||||
slug: 'latina',
|
slug: 'latina',
|
||||||
priority: 7,
|
priority: 5,
|
||||||
group: 'ethnicity',
|
group: 'ethnicity',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -5,7 +5,7 @@ const knex = require('./knex');
|
||||||
const initServer = require('./web/server');
|
const initServer = require('./web/server');
|
||||||
|
|
||||||
const scrapeSites = require('./scrape-sites');
|
const scrapeSites = require('./scrape-sites');
|
||||||
const { scrapeReleases, deepFetchReleases } = require('./scrape-releases');
|
const { scrapeScenes, scrapeMovies, deepFetchReleases } = require('./scrape-releases');
|
||||||
const { storeReleases } = require('./releases');
|
const { storeReleases } = require('./releases');
|
||||||
const { scrapeActors, scrapeBasicActors } = require('./actors');
|
const { scrapeActors, scrapeBasicActors } = require('./actors');
|
||||||
|
|
||||||
|
@ -15,11 +15,11 @@ if (process.env.NODE_ENV === 'development') {
|
||||||
|
|
||||||
async function init() {
|
async function init() {
|
||||||
if (argv.scene) {
|
if (argv.scene) {
|
||||||
await scrapeReleases(argv.scene, null, 'scene');
|
await scrapeScenes(argv.scene);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argv.movie) {
|
if (argv.movie) {
|
||||||
await scrapeReleases(argv.movie, null, 'movie');
|
await scrapeMovies(argv.movie);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argv.scrape || argv.networks || argv.sites) {
|
if (argv.scrape || argv.networks || argv.sites) {
|
||||||
|
|
19
src/argv.js
19
src/argv.js
|
@ -29,12 +29,17 @@ const { argv } = yargs
|
||||||
type: 'array',
|
type: 'array',
|
||||||
alias: 'actor',
|
alias: 'actor',
|
||||||
})
|
})
|
||||||
.option('with-releases', {
|
.option('with-scenes', {
|
||||||
describe: 'Fetch all releases for an actor',
|
describe: 'Fetch all scenes for an actor or movie',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
alias: 'with-scenes',
|
alias: 'with-releases',
|
||||||
default: false,
|
default: false,
|
||||||
})
|
})
|
||||||
|
.option('with-movies', {
|
||||||
|
describe: 'Fetch movies for scenes',
|
||||||
|
type: 'boolean',
|
||||||
|
default: true,
|
||||||
|
})
|
||||||
.option('with-profiles', {
|
.option('with-profiles', {
|
||||||
describe: 'Scrape profiles for new actors after fetching scenes',
|
describe: 'Scrape profiles for new actors after fetching scenes',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
|
@ -44,12 +49,12 @@ const { argv } = yargs
|
||||||
.option('scene', {
|
.option('scene', {
|
||||||
describe: 'Scrape scene info from URL',
|
describe: 'Scrape scene info from URL',
|
||||||
type: 'array',
|
type: 'array',
|
||||||
alias: 'release',
|
alias: 'scenes',
|
||||||
})
|
})
|
||||||
.option('movie', {
|
.option('movie', {
|
||||||
describe: 'Scrape movie info from URL',
|
describe: 'Scrape movie info from URL',
|
||||||
type: 'array',
|
type: 'array',
|
||||||
alias: 'dvd',
|
alias: 'movies',
|
||||||
})
|
})
|
||||||
.option('sources', {
|
.option('sources', {
|
||||||
describe: 'Use these scrapers for actor data',
|
describe: 'Use these scrapers for actor data',
|
||||||
|
@ -121,11 +126,13 @@ const { argv } = yargs
|
||||||
describe: 'Include release posters',
|
describe: 'Include release posters',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
default: true,
|
default: true,
|
||||||
|
alias: 'poster',
|
||||||
})
|
})
|
||||||
.option('covers', {
|
.option('covers', {
|
||||||
describe: 'Include release covers',
|
describe: 'Include release covers',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
default: true,
|
default: true,
|
||||||
|
alias: 'cover',
|
||||||
})
|
})
|
||||||
.option('photos', {
|
.option('photos', {
|
||||||
describe: 'Include release photos',
|
describe: 'Include release photos',
|
||||||
|
@ -136,11 +143,13 @@ const { argv } = yargs
|
||||||
describe: 'Include release trailers',
|
describe: 'Include release trailers',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
default: true,
|
default: true,
|
||||||
|
alias: 'trailer',
|
||||||
})
|
})
|
||||||
.option('teasers', {
|
.option('teasers', {
|
||||||
describe: 'Include release teasers',
|
describe: 'Include release teasers',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
default: true,
|
default: true,
|
||||||
|
alias: 'teaser',
|
||||||
})
|
})
|
||||||
.option('avatars', {
|
.option('avatars', {
|
||||||
describe: 'Include actor avatars',
|
describe: 'Include actor avatars',
|
||||||
|
|
98
src/media.js
98
src/media.js
|
@ -85,87 +85,91 @@ async function extractItem(source) {
|
||||||
const res = await get(source.src);
|
const res = await get(source.src);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
const { q } = ex(res.body.toString());
|
const { qu } = ex(res.body.toString());
|
||||||
|
|
||||||
return source.extract(q);
|
return source.extract(qu);
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function fetchSource(source, domain, role, originalSource) {
|
||||||
|
logger.verbose(`Fetching ${domain} ${role} from ${source.src || source}`);
|
||||||
|
|
||||||
|
// const res = await bhttp.get(source.src || source);
|
||||||
|
const res = await get(source.src || source, {
|
||||||
|
headers: {
|
||||||
|
...(source.referer && { referer: source.referer }),
|
||||||
|
...(source.host && { host: source.host }),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res.statusCode === 200) {
|
||||||
|
const { pathname } = new URL(source.src || source);
|
||||||
|
const mimetype = mime.getType(pathname);
|
||||||
|
const extension = mime.getExtension(mimetype);
|
||||||
|
const hash = getHash(res.body);
|
||||||
|
const { entropy, size, width, height } = /image/.test(mimetype) ? await getMeta(res.body) : {};
|
||||||
|
|
||||||
|
logger.verbose(`Fetched media item from ${source.src || source}`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
file: res.body,
|
||||||
|
mimetype,
|
||||||
|
extension,
|
||||||
|
hash,
|
||||||
|
entropy: entropy || null,
|
||||||
|
size: size || null,
|
||||||
|
width: width || null,
|
||||||
|
height: height || null,
|
||||||
|
quality: source.quality || null,
|
||||||
|
source: originalSource?.src || originalSource || source.src || source,
|
||||||
|
scraper: source.scraper,
|
||||||
|
copyright: source.copyright,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`Response ${res.statusCode} not OK`);
|
||||||
|
}
|
||||||
|
|
||||||
async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null, sourceIndex = 0) {
|
async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null, sourceIndex = 0) {
|
||||||
if (!source) return null;
|
if (!source) return null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (Array.isArray(source)) {
|
if (Array.isArray(source)) {
|
||||||
if (source.every(sourceX => !!sourceX.quality)) {
|
if (source.every(sourceX => sourceX.quality)) {
|
||||||
// various video qualities provided
|
// various video qualities provided
|
||||||
const selectedSource = pickQuality(source);
|
const selectedSource = pickQuality(source);
|
||||||
return fetchItem(selectedSource, index, existingItemsBySource, domain, role, attempt, originalSource);
|
return fetchItem(selectedSource, index, existingItemsBySource, domain, role, attempt, originalSource);
|
||||||
}
|
}
|
||||||
|
|
||||||
// fallbacks provided
|
// fallbacks provided
|
||||||
return source.reduce(
|
return source.reduce((outcome, sourceX, sourceIndexX) => outcome.catch(
|
||||||
(outcome, sourceX, sourceIndexX) => outcome.catch(async () => fetchItem(sourceX, index, existingItemsBySource, domain, role, attempt, originalSource, sourceIndexX)),
|
async () => fetchItem(sourceX, index, existingItemsBySource, domain, role, attempt, source, sourceIndexX),
|
||||||
Promise.reject(new Error()),
|
), Promise.reject(new Error()));
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (source.src && source.extract) {
|
if (source.src && source.extract) {
|
||||||
// source links to page containing a (presumably) tokenized photo
|
// source links to page containing a (presumably) tokenized photo
|
||||||
const itemSource = await extractItem(source);
|
const itemSource = await extractItem(source);
|
||||||
|
|
||||||
return fetchItem(itemSource, index, existingItemsBySource, domain, role, attempt, source);
|
return fetchItem(itemSource, index, existingItemsBySource, domain, role, attempt, source, sourceIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (existingItemsBySource[source]) {
|
if (existingItemsBySource[source]) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.verbose(`Fetching ${domain} ${role} from ${source.src || source}`);
|
return fetchSource(source, domain, role, originalSource);
|
||||||
|
|
||||||
// const res = await bhttp.get(source.src || source);
|
|
||||||
const res = await get(source.src || source, {
|
|
||||||
headers: {
|
|
||||||
...(source.referer && { referer: source.referer }),
|
|
||||||
...(source.host && { host: source.host }),
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
|
||||||
const { pathname } = new URL(source.src || source);
|
|
||||||
const mimetype = mime.getType(pathname);
|
|
||||||
const extension = mime.getExtension(mimetype);
|
|
||||||
const hash = getHash(res.body);
|
|
||||||
const { entropy, size, width, height } = /image/.test(mimetype) ? await getMeta(res.body) : {};
|
|
||||||
|
|
||||||
logger.verbose(`Fetched media item from ${source.src || source}`);
|
|
||||||
|
|
||||||
return {
|
|
||||||
file: res.body,
|
|
||||||
mimetype,
|
|
||||||
extension,
|
|
||||||
hash,
|
|
||||||
entropy: entropy || null,
|
|
||||||
size: size || null,
|
|
||||||
width: width || null,
|
|
||||||
height: height || null,
|
|
||||||
quality: source.quality || null,
|
|
||||||
source: originalSource?.src || originalSource || source.src || source,
|
|
||||||
scraper: source.scraper,
|
|
||||||
copyright: source.copyright,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new Error(`Response ${res.statusCode} not OK`);
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.warn(`Failed attempt ${attempt}/3 to fetch ${domain} ${role} ${index + 1} (${source.src || source}): ${error}`);
|
logger.warn(`Failed attempt ${attempt}/3 to fetch ${domain} ${role} ${index + 1} (${source.src || source}): ${error}`);
|
||||||
|
|
||||||
|
/*
|
||||||
if (attempt < 3) {
|
if (attempt < 3) {
|
||||||
await Promise.delay(5000);
|
await Promise.delay(5000);
|
||||||
return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1, originalSource);
|
return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1, originalSource, sourceIndex);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
if (originalSource && sourceIndex < originalSource.length) {
|
if (originalSource && sourceIndex < originalSource.length) {
|
||||||
throw error;
|
throw error;
|
||||||
|
@ -351,7 +355,7 @@ function associateTargetMedia(targetId, sources, mediaBySource, domain, role, pr
|
||||||
if (!source) return null;
|
if (!source) return null;
|
||||||
|
|
||||||
const mediaItem = Array.isArray(source)
|
const mediaItem = Array.isArray(source)
|
||||||
? source.reduce((acc, sourceX) => acc || mediaBySource[sourceX.src || sourceX], null)
|
? mediaBySource[source.map(sourceX => sourceX.src || sourceX).toString()]
|
||||||
: mediaBySource[source.src || source];
|
: mediaBySource[source.src || source];
|
||||||
|
|
||||||
// return mediaItem && { [`${domain}_id`]: targetId, media_id: mediaItem.id };
|
// return mediaItem && { [`${domain}_id`]: targetId, media_id: mediaItem.id };
|
||||||
|
|
|
@ -214,7 +214,6 @@ async function curateReleaseEntry(release, batchId, existingRelease) {
|
||||||
studio_id: release.studio ? release.studio.id : null,
|
studio_id: release.studio ? release.studio.id : null,
|
||||||
shoot_id: release.shootId || null,
|
shoot_id: release.shootId || null,
|
||||||
entry_id: release.entryId || null,
|
entry_id: release.entryId || null,
|
||||||
parent_id: release.parentId,
|
|
||||||
type: release.type,
|
type: release.type,
|
||||||
url: release.url,
|
url: release.url,
|
||||||
title: release.title,
|
title: release.title,
|
||||||
|
@ -327,21 +326,6 @@ function accumulateActors(releases) {
|
||||||
}, {});
|
}, {});
|
||||||
}
|
}
|
||||||
|
|
||||||
function accumulateMovies(releases) {
|
|
||||||
return releases.reduce((acc, release) => {
|
|
||||||
if (release.movie) {
|
|
||||||
if (acc[release.movie]) {
|
|
||||||
acc[release.movie] = acc[release.movie].concat(release.id);
|
|
||||||
return acc;
|
|
||||||
}
|
|
||||||
|
|
||||||
acc[release.movie] = [release.id];
|
|
||||||
}
|
|
||||||
|
|
||||||
return acc;
|
|
||||||
}, {});
|
|
||||||
}
|
|
||||||
|
|
||||||
async function storeReleaseAssets(releases) {
|
async function storeReleaseAssets(releases) {
|
||||||
if (!argv.media) {
|
if (!argv.media) {
|
||||||
return;
|
return;
|
||||||
|
@ -385,24 +369,19 @@ async function storeReleaseAssets(releases) {
|
||||||
async function updateReleasesSearch(releaseIds) {
|
async function updateReleasesSearch(releaseIds) {
|
||||||
const documents = await knex.raw(`
|
const documents = await knex.raw(`
|
||||||
SELECT
|
SELECT
|
||||||
releases.id as release_id,
|
releases.id AS release_id,
|
||||||
to_tsvector(
|
TO_TSVECTOR(
|
||||||
'traxxx',
|
'traxxx',
|
||||||
releases.title || ' ' ||
|
releases.title || ' ' ||
|
||||||
sites.name || ' ' ||
|
sites.name || ' ' ||
|
||||||
sites.slug || ' ' ||
|
sites.slug || ' ' ||
|
||||||
networks.name || ' ' ||
|
networks.name || ' ' ||
|
||||||
networks.slug || ' ' ||
|
networks.slug || ' ' ||
|
||||||
coalesce(releases.shoot_id, '') || ' ' ||
|
COALESCE(releases.shoot_id, '') || ' ' ||
|
||||||
EXTRACT(YEAR FROM releases.date) || ' ' ||
|
COALESCE(TO_CHAR(releases.date, 'YYYY YY MM FMMM FMmonth mon DD FMD'), '') || ' ' ||
|
||||||
CAST(EXTRACT(MONTH FROM releases.date) AS VARCHAR) || ' ' ||
|
STRING_AGG(COALESCE(actors.name, ''), ' ') || ' ' ||
|
||||||
CAST(EXTRACT(DAY FROM releases.date) AS VARCHAR) || ' ' ||
|
STRING_AGG(COALESCE(tags.name, ''), ' ') || ' ' ||
|
||||||
SUBSTRING(CAST(EXTRACT(YEAR FROM releases.date) AS VARCHAR) FROM 3 for 2) || ' ' ||
|
STRING_AGG(COALESCE(tags_aliases.name, ''), ' ')
|
||||||
LPAD(CAST(EXTRACT(MONTH FROM releases.date) AS VARCHAR), 2, '0') || ' ' ||
|
|
||||||
LPAD(CAST(EXTRACT(DAY FROM releases.date) AS VARCHAR), 2, '0') || ' ' ||
|
|
||||||
string_agg(coalesce(actors.name, ''), ' ') || ' ' ||
|
|
||||||
string_agg(coalesce(tags.name, ''), ' ') || ' ' ||
|
|
||||||
string_agg(coalesce(tags_aliases.name, ''), ' ')
|
|
||||||
) as document
|
) as document
|
||||||
FROM releases
|
FROM releases
|
||||||
LEFT JOIN sites ON releases.site_id = sites.id
|
LEFT JOIN sites ON releases.site_id = sites.id
|
||||||
|
@ -501,7 +480,6 @@ async function storeReleases(releases) {
|
||||||
logger.info(`Stored ${storedReleases.length} new releases`);
|
logger.info(`Stored ${storedReleases.length} new releases`);
|
||||||
|
|
||||||
const actors = accumulateActors(storedReleases);
|
const actors = accumulateActors(storedReleases);
|
||||||
const movies = accumulateMovies(storedReleases);
|
|
||||||
|
|
||||||
await associateActors(actors, storedReleases);
|
await associateActors(actors, storedReleases);
|
||||||
|
|
||||||
|
@ -518,7 +496,6 @@ async function storeReleases(releases) {
|
||||||
return {
|
return {
|
||||||
releases: storedReleases,
|
releases: storedReleases,
|
||||||
actors,
|
actors,
|
||||||
movies,
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,8 @@ const Promise = require('bluebird');
|
||||||
|
|
||||||
const logger = require('./logger')(__filename);
|
const logger = require('./logger')(__filename);
|
||||||
const argv = require('./argv');
|
const argv = require('./argv');
|
||||||
|
const include = require('./utils/argv-include')(argv);
|
||||||
|
const knex = require('./knex');
|
||||||
const scrapers = require('./scrapers/scrapers');
|
const scrapers = require('./scrapers/scrapers');
|
||||||
const { findSiteByUrl } = require('./sites');
|
const { findSiteByUrl } = require('./sites');
|
||||||
const { findNetworkByUrl } = require('./networks');
|
const { findNetworkByUrl } = require('./networks');
|
||||||
|
@ -33,7 +35,7 @@ async function findSite(url, release) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeRelease(source, basicRelease = null, type = 'scene', preflight) {
|
async function scrapeRelease(source, basicRelease = null, type = 'scene', beforeFetchLatest) {
|
||||||
// profile scraper may return either URLs or pre-scraped scenes
|
// profile scraper may return either URLs or pre-scraped scenes
|
||||||
const sourceIsUrlOrEmpty = typeof source === 'string' || source === undefined;
|
const sourceIsUrlOrEmpty = typeof source === 'string' || source === undefined;
|
||||||
const url = sourceIsUrlOrEmpty ? source : source?.url;
|
const url = sourceIsUrlOrEmpty ? source : source?.url;
|
||||||
|
@ -72,8 +74,8 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene', prefli
|
||||||
}
|
}
|
||||||
|
|
||||||
const scrapedRelease = type === 'scene'
|
const scrapedRelease = type === 'scene'
|
||||||
? await scraper.fetchScene(url, site, release, preflight)
|
? await scraper.fetchScene(url, site, release, beforeFetchLatest, include)
|
||||||
: await scraper.fetchMovie(url, site, release, preflight);
|
: await scraper.fetchMovie(url, site, release, beforeFetchLatest, include);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...release,
|
...release,
|
||||||
|
@ -85,8 +87,42 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene', prefli
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeReleases(sources, release = null, type = 'scene', preflight = null) {
|
async function accumulateMovies(releases) {
|
||||||
const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, release, type, preflight), {
|
if (!argv.withMovies) return [];
|
||||||
|
|
||||||
|
const moviesByUrl = releases.reduce((acc, release) => {
|
||||||
|
if (!release.movie) return acc;
|
||||||
|
const movie = release.movie.url ? release.movie : { url: release.movie };
|
||||||
|
|
||||||
|
if (!acc[movie.url]) {
|
||||||
|
acc[movie.url] = {
|
||||||
|
...movie,
|
||||||
|
type: 'movie',
|
||||||
|
sceneIds: [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
acc[movie.url].sceneIds = acc[movie.url].sceneIds.concat(release.id);
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
const movies = await Promise.map(Object.values(moviesByUrl), async movie => scrapeRelease(movie, null, 'movie'));
|
||||||
|
const { releases: storedMovies } = await storeReleases(movies);
|
||||||
|
|
||||||
|
const movieAssociations = storedMovies.reduce((acc, movie) => acc.concat(movie.sceneIds.map(sceneId => ({
|
||||||
|
movie_id: movie.id,
|
||||||
|
scene_id: sceneId,
|
||||||
|
}))), []);
|
||||||
|
|
||||||
|
await knex('releases_movies').insert(movieAssociations);
|
||||||
|
|
||||||
|
// console.log(moviesByUrl);
|
||||||
|
return movies;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrapeReleases(sources, type = 'scene') {
|
||||||
|
const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, null, type), {
|
||||||
concurrency: 5,
|
concurrency: 5,
|
||||||
}).filter(Boolean);
|
}).filter(Boolean);
|
||||||
|
|
||||||
|
@ -97,26 +133,26 @@ async function scrapeReleases(sources, release = null, type = 'scene', preflight
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argv.save) {
|
if (argv.save) {
|
||||||
/*
|
|
||||||
const movie = scrapedRelease.movie
|
|
||||||
? await scrapeRelease(scrapedRelease.movie, null, false, 'movie')
|
|
||||||
: null;
|
|
||||||
|
|
||||||
if (movie) {
|
|
||||||
const { releases: [storedMovie] } = await storeReleases([movie]);
|
|
||||||
curatedRelease.parentId = storedMovie.id;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
const { releases: storedReleases } = await storeReleases(curatedReleases);
|
const { releases: storedReleases } = await storeReleases(curatedReleases);
|
||||||
const movieScenes = storedReleases.map(movie => movie.scenes).flat();
|
|
||||||
|
|
||||||
// console.log(movieScenes);
|
await accumulateMovies(storedReleases);
|
||||||
|
|
||||||
if (storedReleases) {
|
if (storedReleases) {
|
||||||
logger.info(storedReleases.map(storedRelease => `\nhttp://${config.web.host}:${config.web.port}/scene/${storedRelease.id}/${storedRelease.slug}`).join(''));
|
logger.info(storedReleases.map(storedRelease => `\nhttp://${config.web.host}:${config.web.port}/scene/${storedRelease.id}/${storedRelease.slug}`).join(''));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return storedReleases;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return curatedReleases;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrapeScenes(sources) {
|
||||||
|
return scrapeReleases(sources, 'scene');
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrapeMovies(sources) {
|
||||||
|
return scrapeReleases(sources, 'movie');
|
||||||
}
|
}
|
||||||
|
|
||||||
async function deepFetchReleases(baseReleases, beforeFetchLatest) {
|
async function deepFetchReleases(baseReleases, beforeFetchLatest) {
|
||||||
|
@ -151,13 +187,13 @@ async function deepFetchReleases(baseReleases, beforeFetchLatest) {
|
||||||
concurrency: 2,
|
concurrency: 2,
|
||||||
});
|
});
|
||||||
|
|
||||||
// console.log(deepReleases);
|
|
||||||
|
|
||||||
return deepReleases;
|
return deepReleases;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
deepFetchReleases,
|
deepFetchReleases,
|
||||||
|
scrapeMovies,
|
||||||
scrapeRelease,
|
scrapeRelease,
|
||||||
scrapeReleases,
|
scrapeReleases,
|
||||||
|
scrapeScenes,
|
||||||
};
|
};
|
||||||
|
|
|
@ -4,6 +4,7 @@ const Promise = require('bluebird');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
const argv = require('./argv');
|
const argv = require('./argv');
|
||||||
|
const include = require('./utils/argv-include')(argv);
|
||||||
const logger = require('./logger')(__filename);
|
const logger = require('./logger')(__filename);
|
||||||
const knex = require('./knex');
|
const knex = require('./knex');
|
||||||
const { fetchIncludedSites } = require('./sites');
|
const { fetchIncludedSites } = require('./sites');
|
||||||
|
@ -42,7 +43,7 @@ async function scrapeUniqueReleases(scraper, site, beforeFetchLatest, accSiteRel
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
const latestReleases = await scraper.fetchLatest(site, page, beforeFetchLatest, accSiteReleases);
|
const latestReleases = await scraper.fetchLatest(site, page, beforeFetchLatest, accSiteReleases, include);
|
||||||
|
|
||||||
if (!Array.isArray(latestReleases)) {
|
if (!Array.isArray(latestReleases)) {
|
||||||
logger.warn(`Scraper returned ${latestReleases || 'null'} when fetching latest from '${site.name}' on '${site.network.name}'`);
|
logger.warn(`Scraper returned ${latestReleases || 'null'} when fetching latest from '${site.name}' on '${site.network.name}'`);
|
||||||
|
@ -89,7 +90,7 @@ async function scrapeUniqueReleases(scraper, site, beforeFetchLatest, accSiteRel
|
||||||
|
|
||||||
async function scrapeUpcomingReleases(scraper, site, beforeFetchLatest) {
|
async function scrapeUpcomingReleases(scraper, site, beforeFetchLatest) {
|
||||||
if (argv.upcoming && scraper.fetchUpcoming) {
|
if (argv.upcoming && scraper.fetchUpcoming) {
|
||||||
const upcomingReleases = await scraper.fetchUpcoming(site, 1, beforeFetchLatest);
|
const upcomingReleases = await scraper.fetchUpcoming(site, 1, beforeFetchLatest, include);
|
||||||
|
|
||||||
return upcomingReleases
|
return upcomingReleases
|
||||||
? upcomingReleases.map(release => ({ ...release, site, upcoming: true }))
|
? upcomingReleases.map(release => ({ ...release, site, upcoming: true }))
|
||||||
|
|
|
@ -5,6 +5,7 @@ const bhttp = require('bhttp');
|
||||||
const cheerio = require('cheerio');
|
const cheerio = require('cheerio');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
|
const logger = require('../logger')(__filename);
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
const { ex } = require('../utils/q');
|
const { ex } = require('../utils/q');
|
||||||
|
|
||||||
|
@ -105,7 +106,10 @@ function scrapeScene(html, url, _site) {
|
||||||
release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
|
release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
|
||||||
|
|
||||||
const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/);
|
const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/);
|
||||||
release.channel = channel === 'bangcasting' ? 'bangbroscasting' : channel;
|
|
||||||
|
if (channel === 'bangcasting') release.channel = 'bangbroscasting';
|
||||||
|
if (channel === 'remaster') release.channel = 'bangbrosremastered';
|
||||||
|
else release.channel = channel;
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
@ -123,8 +127,8 @@ function scrapeProfile(html) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeProfileSearch(html, actorName) {
|
function scrapeProfileSearch(html, actorName) {
|
||||||
const { q } = ex(html);
|
const { qu } = ex(html);
|
||||||
const actorLink = q(`a[title="${actorName}"]`, 'href');
|
const actorLink = qu.url(`a[title="${actorName}" i][href*="model"]`);
|
||||||
|
|
||||||
return actorLink ? `https://bangbros.com${actorLink}` : null;
|
return actorLink ? `https://bangbros.com${actorLink}` : null;
|
||||||
}
|
}
|
||||||
|
@ -145,7 +149,7 @@ async function fetchUpcoming(site) {
|
||||||
|
|
||||||
async function fetchScene(url, site, release) {
|
async function fetchScene(url, site, release) {
|
||||||
if (!release?.date) {
|
if (!release?.date) {
|
||||||
throw new Error(`Cannot fetch Bang Bros scenes from argument URL, as scene pages do not have release dates: ${url}`);
|
logger.warn(`Scraping Bang Bros scene from URL without release date: ${url}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const { origin } = new URL(url);
|
const { origin } = new URL(url);
|
||||||
|
|
|
@ -5,11 +5,11 @@ const bhttp = require('bhttp');
|
||||||
const { ex } = require('../utils/q');
|
const { ex } = require('../utils/q');
|
||||||
|
|
||||||
function scrapeProfile(html) {
|
function scrapeProfile(html) {
|
||||||
const { q, qa, qd, qi, qus } = ex(html); /* eslint-disable-line object-curly-newline */
|
const { qu } = ex(html); /* eslint-disable-line object-curly-newline */
|
||||||
const profile = {};
|
const profile = {};
|
||||||
|
|
||||||
const bio = qa('.infobox tr[valign="top"]')
|
const bio = qu.all('.infobox tr[valign="top"]')
|
||||||
.map(detail => qa(detail, 'td', true))
|
.map(detail => qu.all(detail, 'td', true))
|
||||||
.reduce((acc, [key, value]) => ({ ...acc, [key.slice(0, -1).replace(/[\s+|/]/g, '_')]: value }), {});
|
.reduce((acc, [key, value]) => ({ ...acc, [key.slice(0, -1).replace(/[\s+|/]/g, '_')]: value }), {});
|
||||||
|
|
||||||
|
|
||||||
|
@ -19,9 +19,9 @@ function scrapeProfile(html) {
|
||||||
profile.gender = isTrans ? 'transsexual' : 'female';
|
profile.gender = isTrans ? 'transsexual' : 'female';
|
||||||
*/
|
*/
|
||||||
|
|
||||||
profile.birthdate = qd('.bday', 'YYYY-MM-DD');
|
profile.birthdate = qu.date('.bday', 'YYYY-MM-DD');
|
||||||
|
|
||||||
profile.description = q('#mw-content-text > p', true);
|
profile.description = qu.q('#mw-content-text > p', true);
|
||||||
|
|
||||||
if (bio.Born) profile.birthPlace = bio.Born.slice(bio.Born.lastIndexOf(')') + 1);
|
if (bio.Born) profile.birthPlace = bio.Born.slice(bio.Born.lastIndexOf(')') + 1);
|
||||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||||
|
@ -62,7 +62,7 @@ function scrapeProfile(html) {
|
||||||
if (bio.Blood_group) profile.blood = bio.Blood_group;
|
if (bio.Blood_group) profile.blood = bio.Blood_group;
|
||||||
if (bio.Also_known_as) profile.aliases = bio.Also_known_as.split(', ');
|
if (bio.Also_known_as) profile.aliases = bio.Also_known_as.split(', ');
|
||||||
|
|
||||||
const avatarThumbPath = qi('.image img');
|
const avatarThumbPath = qu.img('.image img');
|
||||||
|
|
||||||
if (avatarThumbPath && !/NoImageAvailable/.test(avatarThumbPath)) {
|
if (avatarThumbPath && !/NoImageAvailable/.test(avatarThumbPath)) {
|
||||||
const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', '');
|
const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', '');
|
||||||
|
@ -73,7 +73,7 @@ function scrapeProfile(html) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
profile.social = qus('.infobox a.external');
|
profile.social = qu.urls('.infobox a.external');
|
||||||
|
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,26 +27,26 @@ function scrapeAll(html, site, origin) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeScene(html, url, _site) {
|
async function scrapeScene(html, url, _site) {
|
||||||
const { q, qa, qd, qm, qp, qus } = ex(html);
|
const { qu } = ex(html);
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
[release.entryId] = url.split('/').slice(-1);
|
[release.entryId] = url.split('/').slice(-1);
|
||||||
|
|
||||||
release.title = qm('itemprop=name');
|
release.title = qu.meta('itemprop=name');
|
||||||
release.description = q('.descr-box p', true);
|
release.description = qu.q('.descr-box p', true);
|
||||||
release.date = qd('meta[itemprop=uploadDate]', 'YYYY-MM-DD', null, 'content')
|
release.date = qu.date('meta[itemprop=uploadDate]', 'YYYY-MM-DD', null, 'content')
|
||||||
|| qd('.title-border:nth-child(2) p', 'MM.DD.YYYY');
|
|| qu.date('.title-border:nth-child(2) p', 'MM.DD.YYYY');
|
||||||
|
|
||||||
release.actors = qa('.pornstar-card > a', 'title');
|
release.actors = qu.all('.pornstar-card > a', 'title');
|
||||||
release.tags = qa('.tags-tab .tags a', true);
|
release.tags = qu.all('.tags-tab .tags a', true);
|
||||||
|
|
||||||
release.duration = parseInt(q('.icon-video-red + span', true), 10) * 60;
|
release.duration = parseInt(qu.q('.icon-video-red + span', true), 10) * 60;
|
||||||
release.likes = Number(q('.icon-like-red + span', true));
|
release.likes = Number(qu.q('.icon-like-red + span', true));
|
||||||
|
|
||||||
release.poster = qp();
|
release.poster = qu.poster();
|
||||||
release.photos = qus('.photo-slider-guest .card a');
|
release.photos = qu.urls('.photo-slider-guest .card a');
|
||||||
|
|
||||||
release.trailer = qa('source[type="video/mp4"]').map(trailer => ({
|
release.trailer = qu.all('source[type="video/mp4"]').map(trailer => ({
|
||||||
src: trailer.src,
|
src: trailer.src,
|
||||||
quality: Number(trailer.attributes.res.value),
|
quality: Number(trailer.attributes.res.value),
|
||||||
}));
|
}));
|
||||||
|
@ -72,10 +72,10 @@ async function fetchActorReleases(urls) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeProfile(html, _url, actorName) {
|
async function scrapeProfile(html, _url, actorName) {
|
||||||
const { q, qa, qus } = ex(html);
|
const { qu } = ex(html);
|
||||||
|
|
||||||
const keys = qa('.about-title', true).map(key => slugify(key, { delimiter: '_' }));
|
const keys = qu.all('.about-title', true).map(key => slugify(key, { delimiter: '_' }));
|
||||||
const values = qa('.about-info').map((el) => {
|
const values = qu.all('.about-info').map((el) => {
|
||||||
if (el.children.length > 0) {
|
if (el.children.length > 0) {
|
||||||
return Array.from(el.children, child => child.textContent.trim()).join(', ');
|
return Array.from(el.children, child => child.textContent.trim()).join(', ');
|
||||||
}
|
}
|
||||||
|
@ -96,7 +96,7 @@ async function scrapeProfile(html, _url, actorName) {
|
||||||
name: actorName,
|
name: actorName,
|
||||||
};
|
};
|
||||||
|
|
||||||
profile.description = q('.description-box', true);
|
profile.description = qu.q('.description-box', true);
|
||||||
profile.birthdate = ed(bio.birthday, 'MMMM DD, YYYY');
|
profile.birthdate = ed(bio.birthday, 'MMMM DD, YYYY');
|
||||||
|
|
||||||
if (bio.nationality) profile.nationality = bio.nationality;
|
if (bio.nationality) profile.nationality = bio.nationality;
|
||||||
|
@ -118,10 +118,10 @@ async function scrapeProfile(html, _url, actorName) {
|
||||||
|
|
||||||
if (bio.shoe_size) profile.shoes = Number(bio.shoe_size.split('|')[1]);
|
if (bio.shoe_size) profile.shoes = Number(bio.shoe_size.split('|')[1]);
|
||||||
|
|
||||||
const avatarEl = q('.pornstar-details .card-img-top');
|
const avatarEl = qu.q('.pornstar-details .card-img-top');
|
||||||
if (avatarEl && avatarEl.dataset.src.match('^//')) profile.avatar = `https:${avatarEl.dataset.src}`;
|
if (avatarEl && avatarEl.dataset.src.match('^//')) profile.avatar = `https:${avatarEl.dataset.src}`;
|
||||||
|
|
||||||
profile.releases = await fetchActorReleases(qus('.find-me-tab li a'));
|
profile.releases = await fetchActorReleases(qu.urls('.find-me-tab li a'));
|
||||||
|
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
|
|
@ -135,10 +135,10 @@ function getEntryId(html) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeAll(scenes, site) {
|
function scrapeAll(scenes, site) {
|
||||||
return scenes.map(({ qu }) => {
|
return scenes.map(({ el, qu }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.entryId = qu.el.dataset.setid || qu.q('.rating_box')?.dataset.id;
|
release.entryId = el.dataset.setid || qu.q('.rating_box')?.dataset.id;
|
||||||
|
|
||||||
release.url = qu.url('.update_title, .dvd_info > a, a ~ a');
|
release.url = qu.url('.update_title, .dvd_info > a, a ~ a');
|
||||||
release.title = qu.q('.update_title, .dvd_info > a, a ~ a', true);
|
release.title = qu.q('.update_title, .dvd_info > a, a ~ a', true);
|
||||||
|
@ -160,7 +160,7 @@ function scrapeAll(scenes, site) {
|
||||||
} : null;
|
} : null;
|
||||||
}).filter(Boolean);
|
}).filter(Boolean);
|
||||||
|
|
||||||
const teaserScript = qu.content('script');
|
const teaserScript = qu.html('script');
|
||||||
if (teaserScript) {
|
if (teaserScript) {
|
||||||
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
||||||
if (src) release.teaser = { src };
|
if (src) release.teaser = { src };
|
||||||
|
@ -220,17 +220,19 @@ function scrapeUpcoming(html, site) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeScene({ qu }, url, site) {
|
async function scrapeScene({ html, qu }, url, site, include) {
|
||||||
const release = { url, site };
|
const release = { url, site };
|
||||||
|
|
||||||
release.entryId = getEntryId(qu.html);
|
release.entryId = getEntryId(html);
|
||||||
release.title = qu.q('.title_bar_hilite', true);
|
release.title = qu.q('.title_bar_hilite', true);
|
||||||
release.description = qu.q('.update_description', true);
|
release.description = qu.q('.update_description', true);
|
||||||
|
|
||||||
release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
|
release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
|
||||||
release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
|
|
||||||
|
|
||||||
const posterPath = qu.html.match(/useimage = "(.*)"/)?.[1];
|
release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
|
||||||
|
release.tags = qu.all('.update_tags a', true);
|
||||||
|
|
||||||
|
const posterPath = html.match(/useimage = "(.*)"/)?.[1];
|
||||||
|
|
||||||
if (posterPath) {
|
if (posterPath) {
|
||||||
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
|
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
|
||||||
|
@ -243,8 +245,8 @@ async function scrapeScene({ qu }, url, site) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (site.slug !== 'manuelferrara') {
|
if (include.trailer && site.slug !== 'manuelferrara') {
|
||||||
const trailerLines = qu.html.split('\n').filter(line => /movie\["trailer\w*"\]\[/i.test(line));
|
const trailerLines = html.split('\n').filter(line => /movie\["trailer\w*"\]\[/i.test(line));
|
||||||
|
|
||||||
if (trailerLines.length) {
|
if (trailerLines.length) {
|
||||||
release.trailer = trailerLines.map((trailerLine) => {
|
release.trailer = trailerLines.map((trailerLine) => {
|
||||||
|
@ -259,8 +261,7 @@ async function scrapeScene({ qu }, url, site) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
release.photos = await getPhotos(release.entryId, site);
|
if (include.photos) release.photos = await getPhotos(release.entryId, site);
|
||||||
release.tags = qu.all('.update_tags a', true);
|
|
||||||
|
|
||||||
if (qu.exists('.update_dvds a')) {
|
if (qu.exists('.update_dvds a')) {
|
||||||
release.movie = {
|
release.movie = {
|
||||||
|
@ -275,27 +276,27 @@ async function scrapeScene({ qu }, url, site) {
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeMovie({ el, q, qus }, url, site) {
|
function scrapeMovie({ el, qu }, url, site) {
|
||||||
const movie = { url, site };
|
const movie = { url, site };
|
||||||
|
|
||||||
movie.entryId = q('.dvd_details_overview .rating_box').dataset.id;
|
movie.entryId = qu.q('.dvd_details_overview .rating_box').dataset.id;
|
||||||
movie.title = q('.title_bar span', true);
|
movie.title = qu.q('.title_bar span', true);
|
||||||
movie.covers = qus('#dvd-cover-flip > a');
|
movie.covers = qu.urls('#dvd-cover-flip > a');
|
||||||
movie.channel = q('.update_date a', true);
|
movie.channel = qu.q('.update_date a', true);
|
||||||
|
|
||||||
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
|
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
|
||||||
const sceneQs = ctxa(el, '.dvd_details');
|
const sceneQus = ctxa(el, '.dvd_details');
|
||||||
const scenes = scrapeAll(sceneQs, site);
|
const scenes = scrapeAll(sceneQus, site);
|
||||||
|
|
||||||
const curatedScenes = scenes
|
const curatedScenes = scenes
|
||||||
.map(scene => ({ ...scene, movie }))
|
?.map(scene => ({ ...scene, movie }))
|
||||||
.sort((sceneA, sceneB) => sceneA.date - sceneB.date);
|
.sort((sceneA, sceneB) => sceneA.date - sceneB.date);
|
||||||
|
|
||||||
movie.date = curatedScenes[0].date;
|
movie.date = curatedScenes?.[0].date;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...movie,
|
...movie,
|
||||||
scenes: curatedScenes,
|
...(curatedScenes && { scenes: curatedScenes }),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -358,10 +359,10 @@ async function fetchUpcoming(site) {
|
||||||
return res.statusCode;
|
return res.statusCode;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site, baseRelease, preflight, include) {
|
||||||
const res = await get(url);
|
const res = await get(url);
|
||||||
|
|
||||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
return res.ok ? scrapeScene(res.item, url, site, include) : res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchMovie(url, site) {
|
async function fetchMovie(url, site) {
|
||||||
|
|
|
@ -101,22 +101,24 @@ function scrapeScene(html, url, site) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchActorReleases(url) {
|
async function fetchActorReleases(url) {
|
||||||
const { qus } = await get(url);
|
const res = await get(url);
|
||||||
|
|
||||||
return qus('.contain-block:not(.live-scenes) .scene-item > a:first-child'); // live scenes repeat on all pages
|
return res.ok
|
||||||
|
? res.item.qu.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
|
||||||
|
: [];
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeProfile(html) {
|
async function scrapeProfile(html) {
|
||||||
const { q, qus } = ex(html);
|
const { qu } = ex(html);
|
||||||
const profile = {};
|
const profile = {};
|
||||||
|
|
||||||
profile.description = q('.bio_about_text', true);
|
profile.description = qu.q('.bio_about_text', true);
|
||||||
|
|
||||||
const avatar = q('img.performer-pic', 'src');
|
const avatar = qu.q('img.performer-pic', 'src');
|
||||||
if (avatar) profile.avatar = `https:${avatar}`;
|
if (avatar) profile.avatar = `https:${avatar}`;
|
||||||
|
|
||||||
const releases = qus('.scene-item > a:first-child');
|
const releases = qu.urls('.scene-item > a:first-child');
|
||||||
const otherPages = qus('.pagination a:not([rel=next]):not([rel=prev])');
|
const otherPages = qu.urls('.pagination a:not([rel=next]):not([rel=prev])');
|
||||||
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
|
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
|
||||||
|
|
||||||
profile.releases = releases.concat(olderReleases.flat());
|
profile.releases = releases.concat(olderReleases.flat());
|
||||||
|
|
|
@ -58,7 +58,7 @@ function scrapeAll(scenes, site, origin) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeScene(qu, url, site) {
|
async function scrapeScene({ qu }, url, site) {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
const { origin, pathname } = new URL(url);
|
const { origin, pathname } = new URL(url);
|
||||||
|
|
|
@ -116,23 +116,23 @@ function scrapeLatest(html) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene(html, url) {
|
function scrapeScene(html, url) {
|
||||||
const { q, qa, qd, qus, ql, qm } = ex(html);
|
const { qu } = ex(html);
|
||||||
const release = { url };
|
const release = { url };
|
||||||
|
|
||||||
// release.entryId = slugify(release.title);
|
// release.entryId = slugify(release.title);
|
||||||
[release.entryId] = q('link[rel="canonical"]').href.match(/\d+/);
|
[release.entryId] = qu.q('link[rel="canonical"]').href.match(/\d+/);
|
||||||
|
|
||||||
release.title = qm('meta[property="og:title"]') || q('.video-page-header h1', true);
|
release.title = qu.meta('meta[property="og:title"]') || qu.q('.video-page-header h1', true);
|
||||||
|
release.description = qu.meta('meta[property="og:description"]') || qu.q('.info-video-description', true);
|
||||||
|
|
||||||
release.description = qm('meta[property="og:description"]') || q('.info-video-description', true);
|
release.date = qu.date('.info-video-details li:first-child span', 'MMM DD, YYYY');
|
||||||
release.date = qd('.info-video-details li:first-child span', 'MMM DD, YYYY');
|
release.duration = qu.dur('.info-video-details li:nth-child(2) span');
|
||||||
release.duration = ql('.info-video-details li:nth-child(2) span');
|
|
||||||
|
|
||||||
release.actors = qa('.info-video-models a', true);
|
release.actors = qu.all('.info-video-models a', true);
|
||||||
release.tags = qa('.info-video-category a', true);
|
release.tags = qu.all('.info-video-category a', true);
|
||||||
|
|
||||||
release.photos = qus('.swiper-wrapper .swiper-slide a').map(source => source.replace('.jpg/', '.jpg'));
|
release.photos = qu.urls('.swiper-wrapper .swiper-slide a').map(source => source.replace('.jpg/', '.jpg'));
|
||||||
release.poster = qm('meta[property="og:image"');
|
release.poster = qu.meta('meta[property="og:image"');
|
||||||
|
|
||||||
if (!release.poster) {
|
if (!release.poster) {
|
||||||
const previewStart = html.indexOf('preview_url');
|
const previewStart = html.indexOf('preview_url');
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
function include(argv) {
|
||||||
|
return {
|
||||||
|
covers: argv.media && argv.covers,
|
||||||
|
media: argv.media,
|
||||||
|
photos: argv.media && argv.photos,
|
||||||
|
poster: argv.media && argv.posters,
|
||||||
|
posters: argv.media && argv.posters,
|
||||||
|
teaser: argv.media && argv.videos && argv.teasers,
|
||||||
|
teasers: argv.media && argv.videos && argv.teasers,
|
||||||
|
trailer: argv.media && argv.videos && argv.trailers,
|
||||||
|
trailers: argv.media && argv.videos && argv.trailers,
|
||||||
|
videos: argv.videos,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = include;
|
|
@ -70,7 +70,7 @@ function exists(context, selector) {
|
||||||
return !!q(context, selector);
|
return !!q(context, selector);
|
||||||
}
|
}
|
||||||
|
|
||||||
function content(context, selector) {
|
function html(context, selector) {
|
||||||
const el = q(context, selector, null, true);
|
const el = q(context, selector, null, true);
|
||||||
|
|
||||||
return el && el.innerHTML;
|
return el && el.innerHTML;
|
||||||
|
@ -176,8 +176,8 @@ const legacyFuncs = {
|
||||||
qall: all,
|
qall: all,
|
||||||
qd: date,
|
qd: date,
|
||||||
qdate: date,
|
qdate: date,
|
||||||
qh: content,
|
qh: html,
|
||||||
qhtml: content,
|
qhtml: html,
|
||||||
qi: image,
|
qi: image,
|
||||||
qimage: image,
|
qimage: image,
|
||||||
qimages: images,
|
qimages: images,
|
||||||
|
@ -207,8 +207,7 @@ const legacyFuncs = {
|
||||||
|
|
||||||
const quFuncs = {
|
const quFuncs = {
|
||||||
all,
|
all,
|
||||||
body: content,
|
html,
|
||||||
content,
|
|
||||||
date,
|
date,
|
||||||
dur: duration,
|
dur: duration,
|
||||||
duration,
|
duration,
|
||||||
|
@ -217,7 +216,6 @@ const quFuncs = {
|
||||||
images,
|
images,
|
||||||
img: image,
|
img: image,
|
||||||
imgs: images,
|
imgs: images,
|
||||||
inner: content,
|
|
||||||
length: duration,
|
length: duration,
|
||||||
meta,
|
meta,
|
||||||
poster,
|
poster,
|
||||||
|
|
Loading…
Reference in New Issue