Added Vixen scraper. Added LegalPorno studio IDs.

This commit is contained in:
ThePendulum 2019-04-01 02:45:15 +02:00
parent d7ef14e427
commit 439d3225ec
8 changed files with 122 additions and 34 deletions

View File

@ -50,6 +50,9 @@ exports.up = knex => Promise.resolve()
.inTable('sites');
table.string('shoot_id');
table.unique(['site_id', 'shoot_id']);
table.string('url');
table.string('title');
table.date('date');
table.text('description');

View File

@ -33,4 +33,10 @@ exports.seed = knex => Promise.resolve()
url: 'https://www.xempire.com',
description: 'XEmpire.com brings you today\'s top pornstars in beautifully shot, HD sex scenes across 4 unique porn sites of gonzo porn, interracial, lesbian & erotica!',
},
{
id: 'vixen',
name: 'Vixen',
url: 'https://www.vixen.com/',
description: 'Vixen.com features the worlds finest cinematic adult films with 4K quality and high-end erotic photography.',
},
]));

View File

@ -358,4 +358,45 @@ exports.seed = knex => Promise.resolve()
url: 'https://www.lesbianx.com',
network_id: 'xempire',
},
// VIXEN
{
id: 'vixen',
name: 'Vixen',
label: 'vixen',
description: 'Vixen.com features the worlds finest cinematic adult films with 4K quality and high-end erotic photography.',
url: 'https://www.vixen.com',
network_id: 'vixen',
},
{
id: 'blacked',
name: 'Blacked',
label: 'blackd',
description: 'Porn videos of beautiful girls in first time interracial porn videos. BLACKED has the hottest pornstars in HD sex videos.',
url: 'https://www.blacked.com',
network_id: 'vixen',
},
{
id: 'tushy',
name: 'Tushy',
label: 'tushy',
description: 'Watch the world\'s best HD Anal videos! Featuring beautiful, never before seen girls in first time anal. Exclusively on Tushy.com',
url: 'https://www.tushy.com',
network_id: 'vixen',
},
{
id: 'blackedraw',
name: 'Blacked Raw',
label: 'blkraw',
description: 'Experience real women in interracial sex videos. Passionate sex with beautiful pornstars. No photoshop just the highest quality porn. Everything you see is real.',
url: 'https://www.blackedraw.com',
network_id: 'vixen',
},
{
id: 'tushyraw',
name: 'Tushy Raw',
label: 'tshraw',
description: 'Anal sex videos with beautiful models and pornstars being fucked in the ass. TUSHY RAW features famous pornstars in high quality anal porn videos.',
url: 'https://www.tushyraw.com',
network_id: 'vixen',
},
]));

View File

@ -80,6 +80,14 @@ exports.seed = knex => Promise.resolve()
tag: 'corporal punishment',
alias_for: null,
},
{
tag: 'cowgirl',
alias_for: null,
},
{
tag: 'reverse cowgirl',
alias_for: null,
},
{
tag: 'creampie',
alias_for: null,
@ -108,6 +116,10 @@ exports.seed = knex => Promise.resolve()
tag: 'double blowjob',
alias_for: null,
},
{
tag: 'doggy style',
alias_for: null,
},
{
tag: 'ebony',
alias_for: null,
@ -168,6 +180,10 @@ exports.seed = knex => Promise.resolve()
tag: 'MILF',
alias_for: null,
},
{
tag: 'missionary',
alias_for: null,
},
{
tag: 'natural',
alias_for: null,
@ -212,6 +228,10 @@ exports.seed = knex => Promise.resolve()
tag: 'squirting',
alias_for: null,
},
{
tag: 'standing doggy style',
alias_for: null,
},
{
tag: 'swallowing',
alias_for: null,
@ -262,18 +282,6 @@ exports.seed = knex => Promise.resolve()
tag: 'ass to mouth',
alias_for: 'ATM',
},
{
tag: 'atm',
alias_for: 'ATM',
},
{
tag: 'bbc',
alias_for: 'BBC',
},
{
tag: 'bdsm',
alias_for: 'BDSM',
},
{
tag: 'big ass',
alias_for: 'big butt',
@ -334,14 +342,18 @@ exports.seed = knex => Promise.resolve()
tag: 'cunnilingus',
alias_for: 'pussy licking',
},
{
tag: 'dap',
alias_for: 'DAP',
},
{
tag: 'deep throat',
alias_for: 'deepthroat',
},
{
tag: 'doggystyle',
alias_for: 'doggy style',
},
{
tag: 'doggie style',
alias_for: 'doggy style',
},
{
tag: 'double anal penetration',
alias_for: 'DAP',
@ -362,10 +374,6 @@ exports.seed = knex => Promise.resolve()
tag: 'double penetration (dp)',
alias_for: 'DP',
},
{
tag: 'dp',
alias_for: 'DP',
},
{
tag: 'DPP',
alias_for: 'DVP',
@ -394,10 +402,6 @@ exports.seed = knex => Promise.resolve()
tag: 'double pussy penetration (dpp)',
alias_for: 'DVP',
},
{
tag: 'dvp',
alias_for: 'DVP',
},
{
tag: 'gape',
alias_for: 'gaping',
@ -418,10 +422,6 @@ exports.seed = knex => Promise.resolve()
tag: 'red head',
alias_for: 'redhead',
},
{
tag: 'milf',
alias_for: 'MILF',
},
{
tag: 'rimming',
alias_for: 'ass licking',
@ -446,6 +446,10 @@ exports.seed = knex => Promise.resolve()
tag: 'small tits',
alias_for: 'small boobs',
},
{
tag: 'standing doggystyle',
alias_for: 'standing doggy style',
},
{
tag: 'swallow',
alias_for: 'swallowing',
@ -462,10 +466,6 @@ exports.seed = knex => Promise.resolve()
tag: 'toys',
alias_for: 'toy',
},
{
tag: 'tp',
alias_for: 'TP',
},
{
tag: 'triple penetration',
alias_for: 'TP',

View File

@ -47,8 +47,40 @@ async function accumulateIncludedSites() {
return curateSites(rawSites);
}
async function getExistingReleases() {
return knex('releases');
// .where('date', '>', new Date(2019, 2, 26));
}
async function storeReleases(releases) {
const curatedReleases = releases.map(release => ({
site_id: release.site.id,
shoot_id: release.shootId || null,
url: release.url,
title: release.title,
date: release.date,
description: release.description,
duration: release.duration,
likes: release.rating && release.rating.likes,
dislikes: release.rating && release.rating.dislikes,
rating: release.rating && release.rating.stars,
}));
if (curatedReleases.length) {
console.log(`Adding ${curatedReleases.length} releases to database (if unique)`);
const insertQuery = knex('releases').insert(curatedReleases).toString();
await knex.raw(insertQuery.replace('insert', 'INSERT OR IGNORE'));
return curatedReleases;
}
return [];
}
async function fetchReleases() {
const sites = await accumulateIncludedSites();
const releases = await getExistingReleases();
const scenesPerSite = await Promise.all(sites.map(async (site) => {
const scraper = scrapers[site.id] || scrapers[site.networkId];
@ -59,6 +91,10 @@ async function fetchReleases() {
scraper.fetchUpcoming ? scraper.fetchUpcoming(site) : [],
]);
console.log(`${latest.length} published releases and ${upcoming.length} upcoming releases found`);
await storeReleases(latest, releases);
return [...latest, ...upcoming];
}

View File

@ -5,7 +5,7 @@ const bhttp = require('bhttp');
const cheerio = require('cheerio');
const moment = require('moment');
const tagMap = {};
const { matchTags } = require('../tags');
function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });

View File

@ -5,6 +5,7 @@ const julesjordan = require('./julesjordan');
const kink = require('./kink');
const legalporno = require('./legalporno');
const pervcity = require('./pervcity');
const vixen = require('./vixen');
module.exports = {
xempire,
@ -12,4 +13,5 @@ module.exports = {
kink,
legalporno,
pervcity,
vixen,
};

View File

@ -8,7 +8,7 @@ const { matchTags } = require('../tags');
function extractTitle(originalTitle) {
const titleComponents = originalTitle.split(' ');
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(GP|SZ|IV|GIO|AA|GL|BZ|FS)\d+/); // detect studio prefixes
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS)\d+/); // detect studio prefixes
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;