Added parameters column to sites database, fixes Perv City scraper. Getting shoot ID from all existing scrapers.

This commit is contained in:
2019-03-26 01:26:47 +01:00
parent 6a20cbc721
commit 8421cd8648
11 changed files with 45 additions and 37 deletions

View File

@@ -33,6 +33,7 @@ function curateSites(sites) {
description: site.description,
url: site.url,
networkId: site.network_id,
parameters: JSON.parse(site.parameters),
}));
}
@@ -50,7 +51,7 @@ async function fetchReleases() {
const sites = await accumulateIncludedSites();
const scenesPerSite = await Promise.all(sites.map(async (site) => {
const scraper = scrapers[site.id] || scrapers[site.network];
const scraper = scrapers[site.id] || scrapers[site.networkId];
if (scraper) {
const [latest, upcoming] = await Promise.all([

View File

@@ -23,7 +23,7 @@ async function findSite(url) {
function deriveFilename(scene) {
const props = {
siteName: scene.site.name,
sceneId: scene.id,
sceneId: scene.shootId,
sceneTitle: scene.title,
sceneActors: scene.actors.join(config.filename.actorsJoin),
sceneDate: moment.utc(scene.date).format(config.filename.dateFormat),

View File

@@ -15,6 +15,8 @@ function scrapeLatest(html, site) {
const url = sceneLinkElement.attr('href');
const title = sceneLinkElement.text();
const shootId = $(element).attr('data-setid');
const date = moment
.utc($(element).find('.update_date').text(), 'MM/DD/YYYY')
.toDate();
@@ -25,6 +27,7 @@ function scrapeLatest(html, site) {
return {
url,
shootId,
title,
actors,
date,
@@ -38,6 +41,8 @@ function scrapeUpcoming(html, site) {
const scenesElements = $('#coming_soon_carousel').find('.table').toArray();
return scenesElements.map((element) => {
const shootId = $(element).find('.upcoming_updates_thumb').attr('id').match(/\d+/)[0];
const details = $(element).find('.update_details_comingsoon')
.eq(1)
.children()
@@ -59,6 +64,7 @@ function scrapeUpcoming(html, site) {
return {
url: null,
shootId,
title,
actors,
date,

View File

@@ -15,7 +15,7 @@ function scrapeLatest(html, site) {
const sceneLinkElement = $(element).find('.shoot-thumb-title a');
const href = sceneLinkElement.attr('href');
const url = `https://kink.com${href}`;
const id = href.split('/')[2];
const shootId = href.split('/')[2];
const title = sceneLinkElement.text();
const date = moment.utc($(element).find('.date').text(), 'MMM DD, YYYY').toDate();
@@ -28,7 +28,7 @@ function scrapeLatest(html, site) {
return {
url,
id,
shootId,
title,
actors,
date,
@@ -41,7 +41,7 @@ function scrapeLatest(html, site) {
});
}
async function scrapeScene(html, url, id, ratingRes, site) {
async function scrapeScene(html, url, shootId, ratingRes, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
// const title = $('h1.shoot-title').text().replace(/\ue800/, ''); // fallback, special character is 'like'-heart
@@ -70,7 +70,7 @@ async function scrapeScene(html, url, id, ratingRes, site) {
return {
url,
id,
shootId,
title,
date,
actors,
@@ -90,14 +90,14 @@ async function fetchLatest(site) {
}
async function fetchScene(url, site) {
const id = new URL(url).pathname.split('/')[2];
const shootId = new URL(url).pathname.split('/')[2];
const [res, ratingRes] = await Promise.all([
bhttp.get(url),
bhttp.get(`https://kink.com/api/ratings/${id}`),
bhttp.get(`https://kink.com/api/ratings/${shootId}`),
]);
return scrapeScene(res.body.toString(), url, id, ratingRes, site);
return scrapeScene(res.body.toString(), url, shootId, ratingRes, site);
}
module.exports = {

View File

@@ -6,29 +6,13 @@ const moment = require('moment');
const { matchTags } = require('../tags');
const tagMap = {
'3+ on 1': 'gangbang',
anal: 'anal',
bbc: 'big black cock',
'cum swallowing': 'swallowing',
rough: 'rough',
'deep throat': 'deepthroat',
'double penetration (DP)': 'DP',
'double anal (DAP)': 'DAP',
'double vaginal (DPP)': 'DVP',
'gapes (gaping asshole)': 'gaping',
'huge toys': 'toys',
interracial: 'interracial',
'triple penetration': 'TP',
};
function extractTitle(originalTitle) {
const titleComponents = originalTitle.split(' ');
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(GP|SZ|IV|GIO|AA|GL|BZ|FS)\d+/); // detect studio prefixes
const id = sceneIdMatch ? sceneIdMatch[0] : null;
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
return { id, title };
return { shootId, title };
}
function scrapeLatest(html, site) {
@@ -40,13 +24,13 @@ function scrapeLatest(html, site) {
const url = sceneLinkElement.attr('href');
const originalTitle = sceneLinkElement.attr('title');
const { id, title } = extractTitle(originalTitle);
const { shootId, title } = extractTitle(originalTitle);
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
return {
url,
id,
shootId,
title,
date,
site,
@@ -58,7 +42,7 @@ async function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const originalTitle = $('h1.watchpage-title').text().trim();
const { id, title } = extractTitle(originalTitle);
const { shootId, title } = extractTitle(originalTitle);
const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
@@ -74,7 +58,7 @@ async function scrapeScene(html, url, site) {
return {
url,
id,
shootId,
title,
date,
actors,

View File

@@ -7,6 +7,7 @@ const moment = require('moment');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const shootId = $('li').attr('id');
const sceneLinkElement = $('#scene_title_border a');
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
@@ -21,6 +22,7 @@ function scrape(html, site) {
return {
url,
shootId,
title,
actors,
date,

View File

@@ -8,13 +8,15 @@ const { matchTags } = require('../tags');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.sceneInfo').toArray();
const scenesElements = $('li[data-itemtype=scene]').toArray();
return scenesElements.map((element) => {
const sceneLinkElement = $(element).find('.sceneTitle a');
const url = `${site.url}${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title');
const shootId = $(element).attr('data-itemid');
const date = moment
.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY')
.toDate();
@@ -29,6 +31,7 @@ function scrape(html, site) {
return {
url,
shootId,
title,
actors,
date,
@@ -42,6 +45,7 @@ function scrape(html, site) {
}
async function scrapeSceneFallback($, url, site) {
const shootId = new URL(url).pathname.split('/').slice(-1)[0];
const title = $('h1.title').text();
const date = moment.utc($('.updatedDate').text(), 'MM-DD-YYYY').toDate();
const actors = $('.sceneColActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
@@ -54,6 +58,7 @@ async function scrapeSceneFallback($, url, site) {
return {
url,
shootId,
title,
date,
actors,
@@ -75,6 +80,7 @@ async function scrapeScene(html, url, site) {
}
const data = JSON.parse(json)[0];
const shootId = new URL(url).pathname.split('/').slice(-1)[0];
const title = data.isPartOf.name;
const date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate();
@@ -98,6 +104,7 @@ async function scrapeScene(html, url, site) {
return {
url,
shootId,
title,
date,
actors,

View File

@@ -47,7 +47,8 @@ function renderReleases(scenes, screen) {
},
top: 1,
height: screen.rows - 3,
width: 161,
// width: 161,
width: config.columns.reduce((acc, column) => acc + column.width, 0),
keys: true,
vi: true,
mouse: true,