Switched to tabs. Adding missing actor entries when scraping actors, with batch ID.
This commit is contained in:
@@ -8,99 +8,99 @@ const clusterId = '617fb597b659459bafe6472470d9073a';
|
||||
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
|
||||
|
||||
const genderMap = {
|
||||
M: 'male',
|
||||
F: 'female',
|
||||
M: 'male',
|
||||
F: 'female',
|
||||
};
|
||||
|
||||
function getScreenUrl(item, scene) {
|
||||
return `https://i.bang.com/screenshots/${scene.dvd.id}/movie/${scene.order}/${item.screenId}.jpg`;
|
||||
return `https://i.bang.com/screenshots/${scene.dvd.id}/movie/${scene.order}/${item.screenId}.jpg`;
|
||||
}
|
||||
|
||||
function encodeId(id) {
|
||||
return Buffer
|
||||
.from(id, 'hex')
|
||||
.toString('base64')
|
||||
.replace(/\+/g, '-')
|
||||
.replace(/\//g, '_')
|
||||
.replace(/=/g, ',');
|
||||
return Buffer
|
||||
.from(id, 'hex')
|
||||
.toString('base64')
|
||||
.replace(/\+/g, '-')
|
||||
.replace(/\//g, '_')
|
||||
.replace(/=/g, ',');
|
||||
}
|
||||
|
||||
function decodeId(id) {
|
||||
const restoredId = id
|
||||
.replace(/-/g, '+')
|
||||
.replace(/_/g, '/')
|
||||
.replace(/,/g, '=');
|
||||
const restoredId = id
|
||||
.replace(/-/g, '+')
|
||||
.replace(/_/g, '/')
|
||||
.replace(/,/g, '=');
|
||||
|
||||
return Buffer
|
||||
.from(restoredId, 'base64')
|
||||
.toString('hex');
|
||||
return Buffer
|
||||
.from(restoredId, 'base64')
|
||||
.toString('hex');
|
||||
}
|
||||
|
||||
function scrapeScene(scene, site) {
|
||||
const release = {
|
||||
site,
|
||||
entryId: scene.id,
|
||||
title: scene.name,
|
||||
description: scene.description,
|
||||
tags: scene.genres.concat(scene.actions).map(genre => genre.name),
|
||||
duration: scene.duration,
|
||||
};
|
||||
const release = {
|
||||
site,
|
||||
entryId: scene.id,
|
||||
title: scene.name,
|
||||
description: scene.description,
|
||||
tags: scene.genres.concat(scene.actions).map(genre => genre.name),
|
||||
duration: scene.duration,
|
||||
};
|
||||
|
||||
const slug = slugify(release.title);
|
||||
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
|
||||
const slug = slugify(release.title);
|
||||
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
|
||||
|
||||
const date = new Date(scene.releaseDate);
|
||||
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
|
||||
const date = new Date(scene.releaseDate);
|
||||
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
|
||||
|
||||
release.actors = scene.actors.map(actor => ({ name: actor.name, gender: genderMap[actor.gender] }));
|
||||
release.actors = scene.actors.map(actor => ({ name: actor.name, gender: genderMap[actor.gender] }));
|
||||
|
||||
if (scene.is4k) release.tags.push('4k');
|
||||
if (scene.gay) release.tags.push('gay');
|
||||
if (scene.is4k) release.tags.push('4k');
|
||||
if (scene.gay) release.tags.push('gay');
|
||||
|
||||
const defaultPoster = scene.screenshots.find(photo => photo.default === true);
|
||||
const photoset = scene.screenshots.filter(photo => photo.default === false);
|
||||
const defaultPoster = scene.screenshots.find(photo => photo.default === true);
|
||||
const photoset = scene.screenshots.filter(photo => photo.default === false);
|
||||
|
||||
const photos = defaultPoster ? photoset : photoset.slice(1);
|
||||
const poster = defaultPoster || photoset[0];
|
||||
const photos = defaultPoster ? photoset : photoset.slice(1);
|
||||
const poster = defaultPoster || photoset[0];
|
||||
|
||||
release.poster = getScreenUrl(poster, scene);
|
||||
release.photos = photos.map(photo => getScreenUrl(photo, scene));
|
||||
release.poster = getScreenUrl(poster, scene);
|
||||
release.photos = photos.map(photo => getScreenUrl(photo, scene));
|
||||
|
||||
release.trailer = {
|
||||
src: `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`,
|
||||
};
|
||||
release.trailer = {
|
||||
src: `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`,
|
||||
};
|
||||
|
||||
release.channel = scene.series.name
|
||||
.replace(/[! .]/g, '')
|
||||
.replace('&', 'and');
|
||||
release.channel = scene.series.name
|
||||
.replace(/[! .]/g, '')
|
||||
.replace('&', 'and');
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeLatest(scenes, site) {
|
||||
return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
|
||||
return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
from: (page - 1) * 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: {
|
||||
releaseDate: {
|
||||
lte: 'now',
|
||||
},
|
||||
},
|
||||
},
|
||||
/*
|
||||
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
from: (page - 1) * 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: {
|
||||
releaseDate: {
|
||||
lte: 'now',
|
||||
},
|
||||
},
|
||||
},
|
||||
/*
|
||||
* global fetch
|
||||
{
|
||||
nested: {
|
||||
@@ -122,66 +122,66 @@ async function fetchLatest(site, page = 1) {
|
||||
},
|
||||
},
|
||||
*/
|
||||
{
|
||||
nested: {
|
||||
path: 'series',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'series.id': {
|
||||
operator: 'AND',
|
||||
query: site.parameters.siteId,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
{
|
||||
nested: {
|
||||
path: 'series',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'series.id': {
|
||||
operator: 'AND',
|
||||
query: site.parameters.siteId,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeLatest(res.body.hits.hits, site);
|
||||
return scrapeLatest(res.body.hits.hits, site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const encodedId = new URL(url).pathname.split('/')[2];
|
||||
const entryId = decodeId(encodedId);
|
||||
const encodedId = new URL(url).pathname.split('/')[2];
|
||||
const entryId = decodeId(encodedId);
|
||||
|
||||
const res = await bhttp.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
const res = await bhttp.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
|
||||
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user