forked from DebaucheryLibrarian/traxxx
Added Bang! scraper. Showing studio on tile. Added favicons to tiles.
This commit is contained in:
@@ -180,6 +180,23 @@ async function attachChannelSite(release) {
|
||||
};
|
||||
}
|
||||
|
||||
async function attachStudio(release) {
|
||||
if (!release.studio) {
|
||||
return release;
|
||||
}
|
||||
|
||||
const studio = await knex('studios')
|
||||
.where('name', release.studio)
|
||||
.orWhere('slug', release.studio)
|
||||
.orWhere('url', release.studio)
|
||||
.first();
|
||||
|
||||
return {
|
||||
...release,
|
||||
studio,
|
||||
};
|
||||
}
|
||||
|
||||
async function curateReleaseEntry(release) {
|
||||
const curatedRelease = {
|
||||
site_id: release.site.id,
|
||||
@@ -295,27 +312,31 @@ async function storeReleaseAssets(release, releaseId) {
|
||||
await createMediaDirectory('releases', subpath);
|
||||
|
||||
try {
|
||||
await Promise.all([
|
||||
storePhotos(release.photos, {
|
||||
targetId: releaseId,
|
||||
subpath,
|
||||
primaryRole: release.poster ? null : 'poster',
|
||||
}, identifier),
|
||||
release.poster && storePhotos([release.poster], {
|
||||
// don't use Promise.all to prevent concurrency issues with duplicate detection
|
||||
if (release.poster) {
|
||||
await storePhotos([release.poster], {
|
||||
role: 'poster',
|
||||
targetId: releaseId,
|
||||
subpath,
|
||||
}, identifier),
|
||||
storePhotos(release.covers, {
|
||||
role: 'cover',
|
||||
targetId: releaseId,
|
||||
subpath,
|
||||
}, identifier),
|
||||
storeTrailer(release.trailer, {
|
||||
targetId: releaseId,
|
||||
subpath,
|
||||
}, identifier),
|
||||
]);
|
||||
}, identifier);
|
||||
}
|
||||
|
||||
await storePhotos(release.photos, {
|
||||
targetId: releaseId,
|
||||
subpath,
|
||||
primaryRole: release.poster ? null : 'poster',
|
||||
}, identifier);
|
||||
|
||||
await storePhotos(release.covers, {
|
||||
role: 'cover',
|
||||
targetId: releaseId,
|
||||
subpath,
|
||||
}, identifier);
|
||||
|
||||
await storeTrailer(release.trailer, {
|
||||
targetId: releaseId,
|
||||
subpath,
|
||||
}, identifier);
|
||||
} catch (error) {
|
||||
console.log(release.url, error);
|
||||
}
|
||||
@@ -363,7 +384,8 @@ async function storeReleases(releases) {
|
||||
const storedReleases = await Promise.map(releases, async (release) => {
|
||||
try {
|
||||
const releaseWithChannelSite = await attachChannelSite(release);
|
||||
const releaseId = await storeRelease(releaseWithChannelSite);
|
||||
const releaseWithStudio = await attachStudio(release);
|
||||
const releaseId = await storeRelease(releaseWithStudio);
|
||||
|
||||
return {
|
||||
id: releaseId,
|
||||
|
||||
127
src/scrapers/bang.js
Normal file
127
src/scrapers/bang.js
Normal file
@@ -0,0 +1,127 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
|
||||
function encodeId(id) {
|
||||
return Buffer
|
||||
.from(id, 'hex')
|
||||
.toString('base64')
|
||||
.replace(/\+/g, '-')
|
||||
.replace(/\//g, '_')
|
||||
.replace(/=/g, ',');
|
||||
}
|
||||
|
||||
function scrapeLatest(scenes, site) {
|
||||
return scenes.map(({ _source: scene }) => {
|
||||
const release = {
|
||||
site,
|
||||
entryId: encodeId(scene.id),
|
||||
title: scene.name,
|
||||
description: scene.description,
|
||||
actors: scene.actors.map(actor => actor.name),
|
||||
tags: scene.genres.concat(scene.actions).map(genre => genre.name),
|
||||
duration: scene.duration,
|
||||
};
|
||||
|
||||
const slug = release.title.toLowerCase().trim().replace(/\s+/g, '-');
|
||||
release.url = `https://www.bang.com/video/${release.entryId}/${slug}`;
|
||||
|
||||
const date = new Date(scene.releaseDate);
|
||||
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
|
||||
|
||||
if (scene.is4k) release.tags.push('4k');
|
||||
if (scene.gay) release.tags.push('gay');
|
||||
|
||||
const defaultPoster = scene.screenshots.find(photo => photo.default === true);
|
||||
const photoset = scene.screenshots.filter(photo => photo.default === false);
|
||||
|
||||
const photos = defaultPoster ? photoset : photoset.slice(1);
|
||||
const poster = defaultPoster || photoset[0];
|
||||
|
||||
release.poster = `https://i.bang.com/screenshots/${scene.dvd.id}/movie/1/${poster.screenId}.jpg`;
|
||||
release.photos = photos.map(photo => `https://i.bang.com/screenshots/${scene.dvd.id}/movie/1/${photo.screenId}.jpg`);
|
||||
|
||||
release.trailer = {
|
||||
src: `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`,
|
||||
};
|
||||
|
||||
release.studio = scene.series.name
|
||||
.replace(/[! .]/g, '')
|
||||
.replace('&', 'and');
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const clusterId = '617fb597b659459bafe6472470d9073a';
|
||||
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
|
||||
|
||||
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
from: (page - 1) * 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: {
|
||||
releaseDate: {
|
||||
lte: 'now',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
nested: {
|
||||
path: 'studio',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'studio.name': {
|
||||
operator: 'AND',
|
||||
query: 'bang! originals',
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeLatest(res.body.hits.hits, site);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
// fetchScene,
|
||||
};
|
||||
@@ -4,9 +4,6 @@ const bhttp = require('bhttp');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
const knex = require('../knex');
|
||||
|
||||
const { matchTags } = require('../tags');
|
||||
|
||||
function extractTitle(originalTitle) {
|
||||
const titleComponents = originalTitle.split(' ');
|
||||
@@ -102,16 +99,8 @@ async function scrapeScene(html, url, site, useGallery) {
|
||||
const trailer = data.clip.qualities.find(clip => clip.quality === 'vga' || clip.quality === 'hd');
|
||||
|
||||
const studioName = $('.watchpage-studioname').first().text().trim();
|
||||
const studioSlug = studioName.replace(/\s+/g, '').toLowerCase();
|
||||
const rawTags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
|
||||
const [studio, tags] = await Promise.all([
|
||||
knex('studios')
|
||||
.where({ name: studioName })
|
||||
.orWhere({ slug: studioSlug })
|
||||
.first(),
|
||||
matchTags(rawTags),
|
||||
]);
|
||||
const studio = studioName.replace(/[\s.']+/g, '').toLowerCase();
|
||||
const tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
|
||||
return {
|
||||
url,
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
// releases
|
||||
const twentyonesextury = require('./21sextury');
|
||||
const bang = require('./bang');
|
||||
const bangbros = require('./bangbros');
|
||||
const blowpass = require('./blowpass');
|
||||
const dogfart = require('./dogfart');
|
||||
@@ -31,6 +32,7 @@ const pornhub = require('./pornhub');
|
||||
module.exports = {
|
||||
releases: {
|
||||
'21sextury': twentyonesextury,
|
||||
bang,
|
||||
bangbros,
|
||||
blowpass,
|
||||
brazzers,
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
const path = require('path');
|
||||
const Promise = require('bluebird');
|
||||
const fs = require('fs-extra');
|
||||
const fetchScene = require('../scrape-release');
|
||||
const fetchScene = require('../scrape-releases');
|
||||
|
||||
const argv = require('../argv');
|
||||
|
||||
|
||||
Reference in New Issue
Block a user