Added Bang Bros data (no scraper yet). Added 21Sextury scene fetch date fallback.

This commit is contained in:
ThePendulum 2019-04-10 03:42:20 +02:00
parent 194c6d1cca
commit bc01cbe6dc
10 changed files with 566 additions and 11 deletions

1
.gitignore vendored
View File

@ -2,4 +2,5 @@ node_modules/
dist/
config/*
!config/default.js
src/utils/link-result.json
db.sqlite

View File

@ -47,6 +47,10 @@ module.exports = {
format: 'MMM DD, YYYY',
width: 14,
},
{
value: 'network',
width: 20,
},
{
value: 'site',
width: 30,

View File

@ -10,6 +10,12 @@ exports.seed = knex => Promise.resolve()
url: 'https://www.21sextury.com',
description: 'Watch all the latest scenes and porn video updates on 21Sextury.com, the best European porn site with the hottest pornstars from all over the world! Watch porn videos from the large network here.',
},
{
id: 'bangbros',
name: 'Bang Bros',
url: 'https://bangbros.com',
description: 'Here at Bang Bros, we only film the best highest quality porn with the sexiest Amateur girls and the top pornstars. Updated daily on Bangbros.com.',
},
{
id: 'blowpass',
name: 'Blowpass',

View File

@ -85,6 +85,475 @@ exports.seed = knex => Promise.resolve()
network_id: '21sextury',
parameters: JSON.stringify({ filter: true }),
},
// BANGBROS
{
id: 'assparade',
label: null,
network_id: 'bangbros',
name: 'Ass Parade',
url: 'http://assparade.com',
description: null,
parameters: null,
},
{
id: 'avaspice',
label: null,
network_id: 'bangbros',
name: 'AvaSpice',
url: 'https://bangbros.com/websites/avaspice',
description: null,
parameters: null,
},
{
id: 'backroomfacials',
label: null,
network_id: 'bangbros',
name: 'Back Room Facials',
url: 'http://backroomfacials.com',
description: null,
parameters: null,
},
{
id: 'backroommilf',
label: null,
network_id: 'bangbros',
name: 'Backroom MILF',
url: 'http://backroommilf.com',
description: null,
parameters: null,
},
{
id: 'ballhoneys',
label: null,
network_id: 'bangbros',
name: 'Ball Honeys',
url: 'https://bangbros.com/websites/ballhoneys',
description: null,
parameters: null,
},
{
id: 'bangbros18',
label: null,
network_id: 'bangbros',
name: 'BangBros 18',
url: 'http://bangbros18.com',
description: null,
parameters: null,
},
{
id: 'bangbrosangels',
label: null,
network_id: 'bangbros',
name: 'BangBros Angels',
url: 'https://bangbros.com/websites/bangbrosangels',
description: null,
parameters: null,
},
{
id: 'bangbrosclips',
label: null,
network_id: 'bangbros',
name: 'Bangbros Clips',
url: 'http://bangbrosclips.com',
description: null,
parameters: null,
},
{
id: 'bangbrosremastered',
label: null,
network_id: 'bangbros',
name: 'BangBros Remastered',
url: 'https://bangbros.com/websites/remaster',
description: null,
parameters: null,
},
{
id: 'bangbus',
label: null,
network_id: 'bangbros',
name: 'Bang Bus',
url: 'http://bangbus.com',
description: null,
parameters: null,
},
{
id: 'bangcasting',
label: null,
network_id: 'bangbros',
name: 'Bang Casting',
url: 'https://bangbros.com/websites/bangcasting',
description: null,
parameters: null,
},
{
id: 'bangpov',
label: null,
network_id: 'bangbros',
name: 'Bang POV',
url: 'http://bangpov.com',
description: null,
parameters: null,
},
{
id: 'bangtryouts',
label: null,
network_id: 'bangbros',
name: 'Bang Tryouts',
url: 'https://bangbros.com/websites/bangtryouts',
description: null,
parameters: null,
},
{
id: 'bigmouthfuls',
label: null,
network_id: 'bangbros',
name: 'Big Mouthfuls',
url: 'http://bigmouthfuls.com',
description: null,
parameters: null,
},
{
id: 'bigtitcreampie',
label: null,
network_id: 'bangbros',
name: 'Big Tit Cream Pie',
url: 'http://bigtitcreampie.com',
description: null,
parameters: null,
},
{
id: 'bigtitsroundasses',
label: null,
network_id: 'bangbros',
name: 'Big Tits, Round Asses',
url: 'http://bigtitsroundasses.com',
description: null,
parameters: null,
},
{
id: 'blowjobfridays',
label: null,
network_id: 'bangbros',
name: 'BlowJob Fridays',
url: 'http://blowjobfridays.com',
description: null,
parameters: null,
},
{
id: 'blowjobninjas',
label: null,
network_id: 'bangbros',
name: 'Blowjob Ninjas',
url: 'http://blowjobninjas.com',
description: null,
parameters: null,
},
{
id: 'boobsquad',
label: null,
network_id: 'bangbros',
name: 'Boob Squad',
url: 'http://boobsquad.com',
description: null,
parameters: null,
},
{
id: 'brownbunnies',
label: null,
network_id: 'bangbros',
name: 'Brown Bunnies',
url: 'http://brownbunnies.com',
description: null,
parameters: null,
},
{
id: 'canhescore',
label: null,
network_id: 'bangbros',
name: 'Can He Score?',
url: 'http://canhescore.com',
description: null,
parameters: null,
},
{
id: 'casting',
label: null,
network_id: 'bangbros',
name: 'Casting',
url: 'https://bangbros.com/websites/casting',
description: null,
parameters: null,
},
{
id: 'chongas',
label: null,
network_id: 'bangbros',
name: 'Chongas',
url: 'http://chongas.com',
description: null,
parameters: null,
},
{
id: 'colombiafuckfest',
label: null,
network_id: 'bangbros',
name: 'Colombia Fuck Fest',
url: 'http://colombiafuckfest.com',
description: null,
parameters: null,
},
{
id: 'dirtyworldtour',
label: null,
network_id: 'bangbros',
name: 'Dirty World Tour',
url: 'https://bangbros.com/websites/dirtyworldtour',
description: null,
parameters: null,
},
{
id: 'dorminvasion',
label: null,
network_id: 'bangbros',
name: 'Dorm Invasion',
url: 'http://dorminvasion.com',
description: null,
parameters: null,
},
{
id: 'facialfest',
label: null,
network_id: 'bangbros',
name: 'Facial Fest',
url: 'http://facialfest.com',
description: null,
parameters: null,
},
{
id: 'fuckteamfive',
label: null,
network_id: 'bangbros',
name: 'Fuck Team Five',
url: 'http://fuckteamfive.com',
description: null,
parameters: null,
},
{
id: 'gloryholeloads',
label: null,
network_id: 'bangbros',
name: 'Glory Hole Loads',
url: 'https://bangbros.com/websites/gloryholeloads',
description: null,
parameters: null,
},
{
id: 'latinarampage',
label: null,
network_id: 'bangbros',
name: 'Latina Rampage',
url: 'http://latinarampage.com',
description: null,
parameters: null,
},
{
id: 'livingwithanna',
label: null,
network_id: 'bangbros',
name: 'Living With Anna',
url: 'http://livingwithanna.com',
description: null,
parameters: null,
},
{
id: 'magicalfeet',
label: null,
network_id: 'bangbros',
name: 'Magical Feet',
url: 'http://magicalfeet.com',
description: null,
parameters: null,
},
{
id: 'milflessons',
label: null,
network_id: 'bangbros',
name: 'MILF Lessons',
url: 'https://bangbros.com/websites/milflessons',
description: null,
parameters: null,
},
{
id: 'milfsoup',
label: null,
network_id: 'bangbros',
name: 'Milf Soup',
url: 'http://milfsoup.com',
description: null,
parameters: null,
},
{
id: 'momishorny',
label: null,
network_id: 'bangbros',
name: 'MomIsHorny',
url: 'http://momishorny.com',
description: null,
parameters: null,
},
{
id: 'monstersofcock',
label: null,
network_id: 'bangbros',
name: 'Monsters of Cock',
url: 'http://monstersofcock.com',
description: null,
parameters: null,
},
{
id: 'mranal',
label: null,
network_id: 'bangbros',
name: 'Mr. Anal',
url: 'http://mranal.com',
description: null,
parameters: null,
},
{
id: 'mrcameltoe',
label: null,
network_id: 'bangbros',
name: 'Mr CamelToe',
url: 'https://bangbros.com/websites/mrcameltoe',
description: null,
parameters: null,
},
{
id: 'mydirtymaid',
label: null,
network_id: 'bangbros',
name: 'My Dirty Maid',
url: 'http://mydirtymaid.com',
description: null,
parameters: null,
},
{
id: 'mylifeinbrazil',
label: null,
network_id: 'bangbros',
name: 'My Life In Brazil',
url: 'http://mylifeinbrazil.com',
description: null,
parameters: null,
},
{
id: 'newbieblack',
label: null,
network_id: 'bangbros',
name: 'Newbie Black',
url: 'https://bangbros.com/websites/newbieblack',
description: null,
parameters: null,
},
{
id: 'partyof3',
label: null,
network_id: 'bangbros',
name: 'Party of 3',
url: 'http://partyof3.com',
description: null,
parameters: null,
},
{
id: 'pawg',
label: null,
network_id: 'bangbros',
name: 'Pawg',
url: 'http://pawg.com',
description: null,
parameters: null,
},
{
id: 'pennyshow',
label: null,
network_id: 'bangbros',
name: 'Penny Show',
url: 'https://bangbros.com/websites/pennyshow',
description: null,
parameters: null,
},
{
id: 'pornstarspa',
label: null,
network_id: 'bangbros',
name: 'Porn Star Spa',
url: 'http://pornstarspa.com',
description: null,
parameters: null,
},
{
id: 'powermunch',
label: null,
network_id: 'bangbros',
name: 'Power Munch',
url: 'https://bangbros.com/websites/powermunch',
description: null,
parameters: null,
},
{
id: 'publicbang',
label: null,
network_id: 'bangbros',
name: 'Public Bang',
url: 'http://publicbang.com',
description: null,
parameters: null,
},
{
id: 'sluttywhitegirls',
label: null,
network_id: 'bangbros',
name: 'Slutty White Girls',
url: 'https://bangbros.com/websites/sluttywhitegirls',
description: null,
parameters: null,
},
{
id: 'stepmomvideos',
label: null,
network_id: 'bangbros',
name: 'Stepmom Videos',
url: 'http://stepmomvideos.com',
description: null,
parameters: null,
},
{
id: 'streetranger',
label: null,
network_id: 'bangbros',
name: 'Street Ranger',
url: 'https://bangbros.com/websites/thewheeler',
description: null,
parameters: null,
},
{
id: 'tugjobs',
label: null,
network_id: 'bangbros',
name: 'Tugjobs',
url: 'http://tugjobs.com',
description: null,
parameters: null,
},
{
id: 'workinglatinas',
label: null,
network_id: 'bangbros',
name: 'Working Latinas',
url: 'http://workinglatinas.com',
description: null,
parameters: null,
},
// BLOWPASS
{
id: '1000facials',

View File

@ -33,7 +33,10 @@ function curateSites(sites) {
name: site.name,
description: site.description,
url: site.url,
networkId: site.network_id,
network: {
id: site.network_id,
name: site.network_name,
},
parameters: JSON.parse(site.parameters),
}));
}
@ -41,8 +44,10 @@ function curateSites(sites) {
async function accumulateIncludedSites() {
if (argv.networks || argv.sites) {
const rawSites = await knex('sites')
.whereIn('id', argv.sites || [])
.orWhereIn('network_id', argv.networks || []);
.select('sites.*', 'networks.name as network_name')
.whereIn('sites.id', argv.sites || [])
.orWhereIn('network_id', argv.networks || [])
.leftJoin('networks', 'sites.network_id', 'networks.id');
return curateSites(rawSites);
}
@ -50,8 +55,10 @@ async function accumulateIncludedSites() {
const included = destructConfigNetworks(config.include);
const rawSites = await knex('sites')
.whereIn('id', included.sites)
.orWhereIn('network_id', included.networks);
.select('sites.*', 'networks.name as network_name')
.whereIn('sites.id', included.sites)
.orWhereIn('network_id', included.networks)
.leftJoin('networks', 'sites.network_id', 'networks.id');
return curateSites(rawSites);
}
@ -127,7 +134,7 @@ async function fetchReleases() {
const sites = await accumulateIncludedSites();
const scenesPerSite = await Promise.all(sites.map(async (site) => {
const scraper = scrapers[site.id] || scrapers[site.networkId];
const scraper = scrapers[site.id] || scrapers[site.network.id];
if (scraper) {
try {
@ -144,7 +151,17 @@ async function fetchReleases() {
await storeReleases(newReleases);
}
return [...newReleases, ...upcomingReleases.map(release => ({ ...release, upcoming: true }))];
return [
...newReleases.map(release => ({
...release,
network: site.network,
})),
...upcomingReleases.map(release => ({
...release,
network: site.network,
upcoming: true,
})),
];
} catch (error) {
if (argv.debug) {
console.error(`${site.id}: Failed to fetch releases`, error);

View File

@ -35,7 +35,9 @@ async function findSite(url) {
name: site.name,
description: site.description,
url: site.url,
networkId: site.network_id || site.id,
network: {
id: site.network_id || site.id,
},
parameters: site.parameters && JSON.parse(site.parameters),
isFallback: site.network_id === undefined,
};
@ -72,7 +74,7 @@ function deriveFilename(scene) {
async function fetchScene(url) {
const site = await findSite(url);
const scraper = scrapers[site.id] || scrapers[site.networkId];
const scraper = scrapers[site.id] || scrapers[site.network.id];
if (!scraper) {
throw new Error('Could not find scraper for URL');

View File

@ -63,7 +63,11 @@ async function scrapeScene(html, url, site) {
const entryId = new URL(url).pathname.split('/').slice(-1)[0];
const title = data.name;
const date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate();
const dataDate = moment.utc(data.dateCreated, 'YYYY-MM-DD');
const date = dataDate.isValid()
? dataDate.toDate()
: moment.utc(sceneElement.find('.updatedDate').text().trim(), 'MM-DD-YYYY').toDate();
const actors = data.actor
.sort(({ gender: genderA }, { gender: genderB }) => {

View File

@ -101,7 +101,7 @@ async function fetchScene(url, site) {
},
});
return scrapeScene(res.body.result.parent, url, site);
return scrapeScene(res.body.result.parent || res.body.result, url, site);
}
module.exports = {

View File

@ -4,6 +4,7 @@ const moment = require('moment');
const formatters = {
site: site => site.name,
network: network => network.name,
date: (date, column) => moment(date).format(column.format || 'MMM DD, YYYY'),
actors: actors => actors.join(', '),
rating: (rating) => {

51
src/utils/try-links.js Normal file
View File

@ -0,0 +1,51 @@
'use strict';
const Promise = require('bluebird');
const bhttp = require('bhttp');
const fs = Promise.promisifyAll(require('fs'));
const knex = require('../knex');
const argv = require('../argv');
const options = {
responseTimeout: 30000,
};
async function tryLinks() {
const sites = await knex('sites').whereIn('network_id', argv.network);
const results = await Promise.all(sites.map(async (site) => {
console.log(`Trying ${site.name} URLs`);
const [resHttp, resHttpWww, resHttps, resHttpsWww] = await Promise.all([
bhttp.get(`http://${site.id}.com/`, options).catch(error => ({ statusCode: error.message })),
bhttp.get(`http://www.${site.id}.com/`, options).catch(error => ({ statusCode: error.message })),
bhttp.get(`https://${site.id}.com/`, options).catch(error => ({ statusCode: error.message })),
bhttp.get(`https://www.${site.id}.com/`, options).catch(error => ({ statusCode: error.message })),
]);
console.log(`Got results for ${site.name}`);
return {
...site,
url: (resHttp.statusCode === 200 && `http://${site.id}.com`)
|| (resHttpWww.statusCode === 200 && `http://www.${site.id}.com`)
|| (resHttps.statusCode === 200 && `https://${site.id}.com`)
|| (resHttpsWww.statusCode === 200 && `https://www.${site.id}.com`)
|| site.url,
network_id: site.network_id,
};
}));
const sortedResults = results.sort((siteA, siteB) => {
if (siteA.id > siteB.id) return 1;
if (siteA.id < siteB.id) return -1;
return 0;
});
console.log(sortedResults);
await fs.writeFileAsync('./src/utils/link-results.json', JSON.stringify(sortedResults, null, 4));
}
tryLinks();