Added DDFNetwork scraper. Modified tag matching query to be case insensitive.

This commit is contained in:
ThePendulum 2019-04-07 05:01:06 +02:00
parent c56068ab99
commit c8abb302e4
12 changed files with 305 additions and 9 deletions

View File

@ -42,6 +42,16 @@ The latest releases from your favorite porn studios in one place.
* Teens Like It Big
* Teens Like It Black
* ZZ Series
* **DDF Network**
* 1By-Day
* DDF Busty
* DDF Network VR
* Euro Girls on Girls
* Euro Teen Erotica
* Hands on Hardcore
* Hot Legs and Feet
* House of Taboo
* Only Blowjob
* **Jules Jordan**
* **Kink**
* 30 Minutes of Torment
@ -76,6 +86,22 @@ The latest releases from your favorite porn studios in one place.
* Whipped Ass
* Wired Pussy
* **LegalPorno**
* **MOFOS**
* Blogs
* Don't Break Me
* Ebony Sex Tapes
* Girls Gone Pink
* I Know That Girl
* Latina Sex Tapes
* Lets Try Anal
* MOFOS Lab
* Mofos B Sides
* Pervs On Patrol
* Public Pickups
* Real Slut Party
* Share My BF
* She's A Freak
* Stranded Teens
* **Perv City**
* Anal Overdose
* Banging Beauties

View File

@ -2,6 +2,7 @@
module.exports = {
include: [
'blowpass',
['brazzers', [
'assesinpublic',
'babygotboobs',
@ -52,6 +53,7 @@ module.exports = {
'waterbondage',
]],
'legalporno',
'mofos',
'pervcity',
['private', [
'analintroductions',

View File

@ -16,6 +16,12 @@ exports.seed = knex => Promise.resolve()
url: 'https://www.brazzers.com',
description: 'Brazzers homepage is updated daily with official HD porn scenes. Our hottest videos and sex series are filled with big tits, sexy milf, top pornstars and special events.',
},
{
id: 'ddfnetwork',
name: 'DDF Network',
url: 'https://ddfnetwork.com',
description: 'European porn videos hub with exclusive VR, 4K and full HD XXX videos and hot sex photos of Europes finest porn star babes.',
},
{
id: 'julesjordan',
name: 'Jules Jordan',

View File

@ -310,6 +310,79 @@ exports.seed = knex => Promise.resolve()
description: "Giant black dicks paired with round asses and garnished with the tightest pussies of all colors. Butts and Blacks delivers on its name sake, only the biggest dicks rocking the thickest chicks. These round honeys can take it all in and bounce around like it's a pogo stick. Come check out these soft round asses getting the attention they deserve.",
network_id: 'brazzers',
},
// DDF NETWORK
{
id: 'ddfbusty',
name: 'DDF Busty',
label: 'ddfbus',
url: 'https://ddfbusty.com',
description: 'Gorgeous Babes with big tits and Euro pornstars with huge natural boobs filmed in Exclusive Full HD, 4K, & VR porn videos.',
network_id: 'ddfnetwork',
},
{
id: 'handsonhardcore',
name: 'Hands on Hardcore',
label: 'ddfhoh',
url: 'https://handsonhardcore.com',
description: 'Hardcore Sex & Anal Fucking Exclusive XXX Videos in VR, 4K and full HD with Hot European Pornstars',
network_id: 'ddfnetwork',
},
{
id: 'houseoftaboo',
name: 'House of Taboo',
label: 'htaboo',
url: 'https://houseoftaboo.com',
description: 'Exclusive BDSM Porn & Extreme Sex Videos Produced in VR, 4K and full HD with The Hottest European Fetish Pornstars',
network_id: 'ddfnetwork',
},
{
id: 'ddfnetworkvr',
name: 'DDF Network VR',
label: 'ddfnvr',
url: 'https://ddfnetworkvr.com',
description: 'VR Porn Videos shot Exclusively in 180 3D 4K Virtual Reality featuring the Hottest European & American VR Pornstar Babes',
network_id: 'ddfnetwork',
},
{
id: 'eurogirlsongirls',
name: 'Euro Girls on Girls',
label: 'eurgrl',
url: 'https://eurogirlsongirls.com',
description: 'Hot Lesbian Sex & Glamour Lesbian Porn Videos and Photos Starring Gorgeous European Pornstars in 4K and Full HD VR.',
network_id: 'ddfnetwork',
},
{
id: '1byday',
name: '1By-Day',
label: '1byday',
url: 'https://1by-day.com',
description: 'Ultra Sexy Exclusive Solo Masturbation Videos in VR, 4K and full HD showcasing Glamour Babes & Intense Orgasms',
network_id: 'ddfnetwork',
},
{
id: 'euroteenerotica',
name: 'Euro Teen Erotica',
label: 'eurero',
url: 'https://euroteenerotica.com',
description: 'Teen Threesomes & Barely Legal Porn Videos in 4K, VR and FULL HD with Hot Nymphomaniac Teen Babes',
network_id: 'ddfnetwork',
},
{
id: 'hotlegsandfeet',
name: 'Hot Legs and Feet',
label: 'hotleg',
url: 'https://hotlegsandfeet.com',
description: 'Foot Fetish & Sexy Legs Porn Videos with Hot and Sexy Euro Pornstars',
network_id: 'ddfnetwork',
},
{
id: 'onlyblowjob',
name: 'Only Blowjob',
label: 'onlbj',
url: 'https://onlyblowjob.com',
description: 'Fantasy Blowjobs & POV Cock Sucking Videos and Photos Produced in VR, 4K and full HD featuring Sexy European Pornstars',
network_id: 'ddfnetwork',
},
// JULES JORDAN
{
id: 'julesjordan',

View File

@ -3,6 +3,7 @@
/* eslint-disable max-len */
exports.seed = knex => Promise.resolve()
.then(() => knex('tags').del())
.then(() => knex('tags_groups').del())
.then(() => knex('tags_groups').insert([
{
group: 'penetration',
@ -91,6 +92,10 @@ exports.seed = knex => Promise.resolve()
tag: 'ATM',
alias_for: null,
},
{
tag: 'ball licking',
alias_for: null,
},
{
tag: 'bathroom',
alias_for: null,
@ -212,6 +217,11 @@ exports.seed = knex => Promise.resolve()
tag: 'doggy style',
alias_for: null,
},
{
tag: 'dress',
alias_for: null,
group_id: 'clothing',
},
{
tag: 'ebony',
alias_for: null,
@ -228,14 +238,17 @@ exports.seed = knex => Promise.resolve()
{
tag: 'European',
alias_for: null,
group_id: 'ethnicity',
},
{
tag: 'facefucking',
alias_for: null,
group_id: 'position',
},
{
tag: 'facesitting',
alias_for: null,
group_id: 'position',
},
{
tag: 'facial',
@ -293,6 +306,11 @@ exports.seed = knex => Promise.resolve()
alias_for: null,
group_id: 'clothing',
},
{
tag: 'Hungarian',
alias_for: null,
group_id: 'ethnicity',
},
{
tag: 'humiliation',
alias_for: null,
@ -301,6 +319,10 @@ exports.seed = knex => Promise.resolve()
tag: 'interracial',
alias_for: null,
},
{
tag: 'kissing',
alias_for: null,
},
{
tag: 'latex',
alias_for: null,
@ -326,6 +348,10 @@ exports.seed = knex => Promise.resolve()
alias_for: null,
group_id: 'clothing',
},
{
tag: 'masturbation',
alias_for: null,
},
{
tag: 'MILF',
alias_for: null,
@ -336,13 +362,18 @@ exports.seed = knex => Promise.resolve()
alias_for: null,
group_id: 'group',
},
{
tag: 'miniskirt',
alias_for: null,
group_id: 'clothing',
},
{
tag: 'missionary',
alias_for: null,
group_id: 'position',
},
{
tag: 'natural',
tag: 'natural boobs',
alias_for: null,
group_id: 'body',
},
@ -422,10 +453,25 @@ exports.seed = knex => Promise.resolve()
tag: 'shaved',
alias_for: null,
},
{
tag: 'shoes on',
alias_for: null,
group_id: 'clothing',
},
{
tag: 'skirt',
alias_for: null,
group_id: 'clothing',
},
{
tag: 'slapping',
alias_for: null,
},
{
tag: 'socks',
alias_for: null,
group_id: 'clothing',
},
{
tag: 'spanking',
alias_for: null,
@ -438,6 +484,10 @@ exports.seed = knex => Promise.resolve()
tag: 'small boobs',
alias_for: null,
},
{
tag: 'small butt',
alias_for: null,
},
{
tag: 'speculum',
alias_for: null,
@ -482,6 +532,10 @@ exports.seed = knex => Promise.resolve()
alias_for: null,
group_id: 'age',
},
{
tag: 'titty fuck',
alias_for: null,
},
{
tag: 'toys',
alias_for: null,
@ -759,6 +813,10 @@ exports.seed = knex => Promise.resolve()
tag: 'enhanced',
alias_for: 'enhanced boobs',
},
{
tag: 'enhanced tits',
alias_for: 'enhanced boobs',
},
{
tag: 'face sitting',
alias_for: 'facesitting',
@ -783,6 +841,10 @@ exports.seed = knex => Promise.resolve()
tag: 'foot fetish',
alias_for: 'feet',
},
{
tag: 'French kissing',
alias_for: 'kissing',
},
{
tag: 'gape',
alias_for: 'gaping',
@ -795,6 +857,10 @@ exports.seed = knex => Promise.resolve()
tag: 'gapes (gaping asshole)',
alias_for: 'gaping',
},
{
tag: 'group sex',
alias_for: 'orgy',
},
{
tag: 'flagellation',
alias_for: 'corporal punishment',
@ -815,6 +881,10 @@ exports.seed = knex => Promise.resolve()
tag: 'lezdom',
alias_for: 'lesbian',
},
{
tag: 'mini-skirt',
alias_for: 'miniskirt',
},
{
tag: 'MMF',
alias_for: 'MFM',
@ -823,6 +893,14 @@ exports.seed = knex => Promise.resolve()
tag: 'MFF',
alias_for: 'FMF',
},
{
tag: 'natural',
alias_for: 'natural boobs',
},
{
tag: 'natural tits',
alias_for: 'natural boobs',
},
{
tag: 'oral',
alias_for: 'blowjob',
@ -903,10 +981,18 @@ exports.seed = knex => Promise.resolve()
tag: 'shaved pussy',
alias_for: 'shaved',
},
{
tag: 'shoes',
alias_for: 'shoes on',
},
{
tag: 'slave',
alias_for: 'BDSM',
},
{
tag: 'small ass',
alias_for: 'small butt',
},
{
tag: 'small tits',
alias_for: 'small boobs',

View File

@ -121,7 +121,6 @@ async function fetchNewReleases(scraper, site, afterDate, accReleases = [], page
async function fetchReleases() {
const sites = await accumulateIncludedSites();
// const releases = await getExistingReleases();
const scenesPerSite = await Promise.all(sites.map(async (site) => {
const scraper = scrapers[site.id] || scrapers[site.networkId];

View File

@ -13,19 +13,19 @@ async function findSite(url) {
.where({ url: `${protocol}//www.${hostname}` })
.orWhere({ url: `${protocol}//${hostname}` })
.first()
// scenes might be listed on network site, let network scraper find channel site
// scene might use generic network URL, let network scraper determine channel site
|| await knex('networks')
.where({ url: `${protocol}//www.${hostname}` })
.orWhere({ url: `${protocol}//${hostname}` })
.first();
return {
id: site.id,
name: site.name,
description: site.description,
url: site.url,
networkId: site.network_id || site.id,
isFallback: site.network_id === undefined,
};
}

View File

@ -0,0 +1,91 @@
'use strict';
/* eslint-disable */
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const moment = require('moment');
const knex = require('../knex');
const { matchTags } = require('../tags');
function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.card.m-1').toArray();
return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('a').first();
const title = sceneLinkElement.attr('title');
const url = `${site.url}${sceneLinkElement.attr('href')}`;
const entryId = url.split('/').slice(-1)[0];
const date = moment.utc($(element).find('.card-footer .text-muted').text(), 'MMMM DD, YYYY').toDate();
const actors = $(element).find('.card-subtitle a').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray().filter(actor => actor);
const duration = Number($(element).find('.card-info div:nth-child(2) .card-text').text().slice(0, -4)) * 60;
return {
url,
entryId,
title,
actors,
date,
duration,
rating: null,
site,
};
});
}
async function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const entryId = url.split('/').slice(-1)[0];
const title = $('.video-title h1').text();
const description = $('.description-box .box-container').text();
const date = moment.utc($('.video-title .remain time').text(), 'MMMM DD, YYYY').toDate();
const actors = $('.pornstars-box .pornstar-card .card-title a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const likes = Number($('.info-panel.likes .likes').text());
const duration = Number($('.info-panel.duration .duration').text().slice(0, -4)) * 60;
const { origin } = new URL($('.pornstar-card meta[itemprop="url"]').first().attr('content'));
const rawTags = $('#tagsBox .tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const [channelSite, tags] = await Promise.all([
// don't find site if original is already specific
site.isFallback ? knex('sites').where({ url: origin }).first() : site,
matchTags(rawTags),
]);
return {
url: channelSite ? `${channelSite.url}${new URL(url).pathname}` : url,
entryId,
title,
actors,
date,
duration,
tags,
rating: {
likes,
},
site: channelSite || site,
};
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`https://ddfnetwork.com/videos/search/latest/ever/${new URL(site.url).hostname}/-/${page}`);
return scrapeLatest(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(`https://ddfnetwork.com${new URL(url).pathname}`);
return scrapeScene(res.body.toString(), url, site);
}
module.exports = {
fetchLatest,
fetchScene,
};

View File

@ -2,6 +2,7 @@
const blowpass = require('./blowpass');
const brazzers = require('./brazzers');
const ddfnetwork = require('./ddfnetwork');
const julesjordan = require('./julesjordan');
const kink = require('./kink');
const legalporno = require('./legalporno');
@ -14,6 +15,7 @@ const xempire = require('./xempire');
module.exports = {
blowpass,
brazzers,
ddfnetwork,
julesjordan,
kink,
legalporno,

View File

@ -49,8 +49,6 @@ async function scrapeScene(html, url, shootId, ratingRes, site) {
const title = $('h1.shoot-title span.favorite-button').attr('data-title');
const actorsRaw = $('.shoot-info p.starring');
const sitename = $('.shoot-logo a').attr('href').split('/')[2];
const date = moment.utc($(actorsRaw)
.prev()
.text()
@ -64,6 +62,7 @@ async function scrapeScene(html, url, shootId, ratingRes, site) {
const { average: stars } = ratingRes.body;
const sitename = $('.shoot-logo a').attr('href').split('/')[2];
const rawTags = $('.tag-list > a[href*="/tag"]').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const [channelSite, tags] = await Promise.all([

View File

@ -12,6 +12,8 @@ function scrapeLatest(html, site) {
const sceneElements = $('.scenes-latest').toArray();
return sceneElements.map((element) => {
const actors = $('.actors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
return {
url,
shootId,
@ -33,6 +35,8 @@ function scrapeUpcoming(html, site) {
const sceneElements = $('.scenes-upcoming').toArray();
return sceneElements.map((element) => {
const actors = $('.actors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
return {
url,
shootId,
@ -55,6 +59,11 @@ async function scrapeScene(html, url, site) {
const rawTags = [];
const tags = await matchTags(rawTags);
const actors = $('.actors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const rawTags = $('.tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const tags = await matchTags(rawTags);
return {
url,
shootId,

View File

@ -3,11 +3,14 @@
const knex = require('./knex');
async function matchTags(rawTags) {
const tagEntries = await knex('tags')
const tagQuery = knex('tags')
.select(knex.raw('ifnull(original.tag, tags.tag) as tag'), knex.raw('ifnull(original.tag, tags.tag) as tag'))
.whereIn('tags.tag', rawTags)
.orWhereIn('tags.tag', rawTags.map(tag => tag.toLowerCase()))
.leftJoin('tags as original', 'tags.alias_for', 'original.tag');
.leftJoin('tags as original', 'tags.alias_for', 'original.tag')
.toString()
.replace('where `tags`.`tag` in', 'where `tags`.`tag` collate NOCASE in');
const tagEntries = await knex.raw(tagQuery);
return Array.from(new Set(tagEntries.map(({ tag }) => tag))).sort(); // reduce to tag name and filter duplicates
}