From 439d3225ec8895727558bb755b8cd478758508fb Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Mon, 1 Apr 2019 02:45:15 +0200 Subject: [PATCH] Added Vixen scraper. Added LegalPorno studio IDs. --- migrations/20190325001339_releases.js | 3 ++ seeds/networks.js | 6 +++ seeds/sites.js | 41 +++++++++++++++++ seeds/tags.js | 64 +++++++++++++-------------- src/fetch-releases.js | 36 +++++++++++++++ src/scrapers/boilerplate.js | 2 +- src/scrapers/index.js | 2 + src/scrapers/legalporno.js | 2 +- 8 files changed, 122 insertions(+), 34 deletions(-) diff --git a/migrations/20190325001339_releases.js b/migrations/20190325001339_releases.js index 0a7b4b8a..c039d171 100644 --- a/migrations/20190325001339_releases.js +++ b/migrations/20190325001339_releases.js @@ -50,6 +50,9 @@ exports.up = knex => Promise.resolve() .inTable('sites'); table.string('shoot_id'); + table.unique(['site_id', 'shoot_id']); + + table.string('url'); table.string('title'); table.date('date'); table.text('description'); diff --git a/seeds/networks.js b/seeds/networks.js index 98c189f2..8927512c 100644 --- a/seeds/networks.js +++ b/seeds/networks.js @@ -33,4 +33,10 @@ exports.seed = knex => Promise.resolve() url: 'https://www.xempire.com', description: 'XEmpire.com brings you today\'s top pornstars in beautifully shot, HD sex scenes across 4 unique porn sites of gonzo porn, interracial, lesbian & erotica!', }, + { + id: 'vixen', + name: 'Vixen', + url: 'https://www.vixen.com/', + description: 'Vixen.com features the world’s finest cinematic adult films with 4K quality and high-end erotic photography.', + }, ])); diff --git a/seeds/sites.js b/seeds/sites.js index 1163e3c2..1d95ee5a 100644 --- a/seeds/sites.js +++ b/seeds/sites.js @@ -358,4 +358,45 @@ exports.seed = knex => Promise.resolve() url: 'https://www.lesbianx.com', network_id: 'xempire', }, + // VIXEN + { + id: 'vixen', + name: 'Vixen', + label: 'vixen', + description: 'Vixen.com features the world’s finest cinematic adult films with 4K quality and high-end erotic photography.', + url: 'https://www.vixen.com', + network_id: 'vixen', + }, + { + id: 'blacked', + name: 'Blacked', + label: 'blackd', + description: 'Porn videos of beautiful girls in first time interracial porn videos. BLACKED has the hottest pornstars in HD sex videos.', + url: 'https://www.blacked.com', + network_id: 'vixen', + }, + { + id: 'tushy', + name: 'Tushy', + label: 'tushy', + description: 'Watch the world\'s best HD Anal videos! Featuring beautiful, never before seen girls in first time anal. Exclusively on Tushy.com', + url: 'https://www.tushy.com', + network_id: 'vixen', + }, + { + id: 'blackedraw', + name: 'Blacked Raw', + label: 'blkraw', + description: 'Experience real women in interracial sex videos. Passionate sex with beautiful pornstars. No photoshop just the highest quality porn. Everything you see is real.', + url: 'https://www.blackedraw.com', + network_id: 'vixen', + }, + { + id: 'tushyraw', + name: 'Tushy Raw', + label: 'tshraw', + description: 'Anal sex videos with beautiful models and pornstars being fucked in the ass. TUSHY RAW features famous pornstars in high quality anal porn videos.', + url: 'https://www.tushyraw.com', + network_id: 'vixen', + }, ])); diff --git a/seeds/tags.js b/seeds/tags.js index cfd85452..ef3cd171 100644 --- a/seeds/tags.js +++ b/seeds/tags.js @@ -80,6 +80,14 @@ exports.seed = knex => Promise.resolve() tag: 'corporal punishment', alias_for: null, }, + { + tag: 'cowgirl', + alias_for: null, + }, + { + tag: 'reverse cowgirl', + alias_for: null, + }, { tag: 'creampie', alias_for: null, @@ -108,6 +116,10 @@ exports.seed = knex => Promise.resolve() tag: 'double blowjob', alias_for: null, }, + { + tag: 'doggy style', + alias_for: null, + }, { tag: 'ebony', alias_for: null, @@ -168,6 +180,10 @@ exports.seed = knex => Promise.resolve() tag: 'MILF', alias_for: null, }, + { + tag: 'missionary', + alias_for: null, + }, { tag: 'natural', alias_for: null, @@ -212,6 +228,10 @@ exports.seed = knex => Promise.resolve() tag: 'squirting', alias_for: null, }, + { + tag: 'standing doggy style', + alias_for: null, + }, { tag: 'swallowing', alias_for: null, @@ -262,18 +282,6 @@ exports.seed = knex => Promise.resolve() tag: 'ass to mouth', alias_for: 'ATM', }, - { - tag: 'atm', - alias_for: 'ATM', - }, - { - tag: 'bbc', - alias_for: 'BBC', - }, - { - tag: 'bdsm', - alias_for: 'BDSM', - }, { tag: 'big ass', alias_for: 'big butt', @@ -334,14 +342,18 @@ exports.seed = knex => Promise.resolve() tag: 'cunnilingus', alias_for: 'pussy licking', }, - { - tag: 'dap', - alias_for: 'DAP', - }, { tag: 'deep throat', alias_for: 'deepthroat', }, + { + tag: 'doggystyle', + alias_for: 'doggy style', + }, + { + tag: 'doggie style', + alias_for: 'doggy style', + }, { tag: 'double anal penetration', alias_for: 'DAP', @@ -362,10 +374,6 @@ exports.seed = knex => Promise.resolve() tag: 'double penetration (dp)', alias_for: 'DP', }, - { - tag: 'dp', - alias_for: 'DP', - }, { tag: 'DPP', alias_for: 'DVP', @@ -394,10 +402,6 @@ exports.seed = knex => Promise.resolve() tag: 'double pussy penetration (dpp)', alias_for: 'DVP', }, - { - tag: 'dvp', - alias_for: 'DVP', - }, { tag: 'gape', alias_for: 'gaping', @@ -418,10 +422,6 @@ exports.seed = knex => Promise.resolve() tag: 'red head', alias_for: 'redhead', }, - { - tag: 'milf', - alias_for: 'MILF', - }, { tag: 'rimming', alias_for: 'ass licking', @@ -446,6 +446,10 @@ exports.seed = knex => Promise.resolve() tag: 'small tits', alias_for: 'small boobs', }, + { + tag: 'standing doggystyle', + alias_for: 'standing doggy style', + }, { tag: 'swallow', alias_for: 'swallowing', @@ -462,10 +466,6 @@ exports.seed = knex => Promise.resolve() tag: 'toys', alias_for: 'toy', }, - { - tag: 'tp', - alias_for: 'TP', - }, { tag: 'triple penetration', alias_for: 'TP', diff --git a/src/fetch-releases.js b/src/fetch-releases.js index f9a120c8..bed247da 100644 --- a/src/fetch-releases.js +++ b/src/fetch-releases.js @@ -47,8 +47,40 @@ async function accumulateIncludedSites() { return curateSites(rawSites); } +async function getExistingReleases() { + return knex('releases'); + // .where('date', '>', new Date(2019, 2, 26)); +} + +async function storeReleases(releases) { + const curatedReleases = releases.map(release => ({ + site_id: release.site.id, + shoot_id: release.shootId || null, + url: release.url, + title: release.title, + date: release.date, + description: release.description, + duration: release.duration, + likes: release.rating && release.rating.likes, + dislikes: release.rating && release.rating.dislikes, + rating: release.rating && release.rating.stars, + })); + + if (curatedReleases.length) { + console.log(`Adding ${curatedReleases.length} releases to database (if unique)`); + + const insertQuery = knex('releases').insert(curatedReleases).toString(); + await knex.raw(insertQuery.replace('insert', 'INSERT OR IGNORE')); + + return curatedReleases; + } + + return []; +} + async function fetchReleases() { const sites = await accumulateIncludedSites(); + const releases = await getExistingReleases(); const scenesPerSite = await Promise.all(sites.map(async (site) => { const scraper = scrapers[site.id] || scrapers[site.networkId]; @@ -59,6 +91,10 @@ async function fetchReleases() { scraper.fetchUpcoming ? scraper.fetchUpcoming(site) : [], ]); + console.log(`${latest.length} published releases and ${upcoming.length} upcoming releases found`); + + await storeReleases(latest, releases); + return [...latest, ...upcoming]; } diff --git a/src/scrapers/boilerplate.js b/src/scrapers/boilerplate.js index bdf0b1c9..89b966d9 100644 --- a/src/scrapers/boilerplate.js +++ b/src/scrapers/boilerplate.js @@ -5,7 +5,7 @@ const bhttp = require('bhttp'); const cheerio = require('cheerio'); const moment = require('moment'); -const tagMap = {}; +const { matchTags } = require('../tags'); function scrapeLatest(html, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); diff --git a/src/scrapers/index.js b/src/scrapers/index.js index 046ec6fd..0c42a2ff 100644 --- a/src/scrapers/index.js +++ b/src/scrapers/index.js @@ -5,6 +5,7 @@ const julesjordan = require('./julesjordan'); const kink = require('./kink'); const legalporno = require('./legalporno'); const pervcity = require('./pervcity'); +const vixen = require('./vixen'); module.exports = { xempire, @@ -12,4 +13,5 @@ module.exports = { kink, legalporno, pervcity, + vixen, }; diff --git a/src/scrapers/legalporno.js b/src/scrapers/legalporno.js index 7930d8ea..f58804d3 100644 --- a/src/scrapers/legalporno.js +++ b/src/scrapers/legalporno.js @@ -8,7 +8,7 @@ const { matchTags } = require('../tags'); function extractTitle(originalTitle) { const titleComponents = originalTitle.split(' '); - const sceneIdMatch = titleComponents.slice(-1)[0].match(/(GP|SZ|IV|GIO|AA|GL|BZ|FS)\d+/); // detect studio prefixes + const sceneIdMatch = titleComponents.slice(-1)[0].match(/(GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS)\d+/); // detect studio prefixes const shootId = sceneIdMatch ? sceneIdMatch[0] : null; const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;