Added parameters column to sites database, fixes Perv City scraper. Getting shoot ID from all existing scrapers.
This commit is contained in:
@@ -33,6 +33,7 @@ function curateSites(sites) {
|
||||
description: site.description,
|
||||
url: site.url,
|
||||
networkId: site.network_id,
|
||||
parameters: JSON.parse(site.parameters),
|
||||
}));
|
||||
}
|
||||
|
||||
@@ -50,7 +51,7 @@ async function fetchReleases() {
|
||||
const sites = await accumulateIncludedSites();
|
||||
|
||||
const scenesPerSite = await Promise.all(sites.map(async (site) => {
|
||||
const scraper = scrapers[site.id] || scrapers[site.network];
|
||||
const scraper = scrapers[site.id] || scrapers[site.networkId];
|
||||
|
||||
if (scraper) {
|
||||
const [latest, upcoming] = await Promise.all([
|
||||
|
||||
@@ -23,7 +23,7 @@ async function findSite(url) {
|
||||
function deriveFilename(scene) {
|
||||
const props = {
|
||||
siteName: scene.site.name,
|
||||
sceneId: scene.id,
|
||||
sceneId: scene.shootId,
|
||||
sceneTitle: scene.title,
|
||||
sceneActors: scene.actors.join(config.filename.actorsJoin),
|
||||
sceneDate: moment.utc(scene.date).format(config.filename.dateFormat),
|
||||
|
||||
@@ -15,6 +15,8 @@ function scrapeLatest(html, site) {
|
||||
const url = sceneLinkElement.attr('href');
|
||||
const title = sceneLinkElement.text();
|
||||
|
||||
const shootId = $(element).attr('data-setid');
|
||||
|
||||
const date = moment
|
||||
.utc($(element).find('.update_date').text(), 'MM/DD/YYYY')
|
||||
.toDate();
|
||||
@@ -25,6 +27,7 @@ function scrapeLatest(html, site) {
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
@@ -38,6 +41,8 @@ function scrapeUpcoming(html, site) {
|
||||
const scenesElements = $('#coming_soon_carousel').find('.table').toArray();
|
||||
|
||||
return scenesElements.map((element) => {
|
||||
const shootId = $(element).find('.upcoming_updates_thumb').attr('id').match(/\d+/)[0];
|
||||
|
||||
const details = $(element).find('.update_details_comingsoon')
|
||||
.eq(1)
|
||||
.children()
|
||||
@@ -59,6 +64,7 @@ function scrapeUpcoming(html, site) {
|
||||
|
||||
return {
|
||||
url: null,
|
||||
shootId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
|
||||
@@ -15,7 +15,7 @@ function scrapeLatest(html, site) {
|
||||
const sceneLinkElement = $(element).find('.shoot-thumb-title a');
|
||||
const href = sceneLinkElement.attr('href');
|
||||
const url = `https://kink.com${href}`;
|
||||
const id = href.split('/')[2];
|
||||
const shootId = href.split('/')[2];
|
||||
const title = sceneLinkElement.text();
|
||||
|
||||
const date = moment.utc($(element).find('.date').text(), 'MMM DD, YYYY').toDate();
|
||||
@@ -28,7 +28,7 @@ function scrapeLatest(html, site) {
|
||||
|
||||
return {
|
||||
url,
|
||||
id,
|
||||
shootId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
@@ -41,7 +41,7 @@ function scrapeLatest(html, site) {
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, id, ratingRes, site) {
|
||||
async function scrapeScene(html, url, shootId, ratingRes, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
// const title = $('h1.shoot-title').text().replace(/\ue800/, ''); // fallback, special character is 'like'-heart
|
||||
@@ -70,7 +70,7 @@ async function scrapeScene(html, url, id, ratingRes, site) {
|
||||
|
||||
return {
|
||||
url,
|
||||
id,
|
||||
shootId,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
@@ -90,14 +90,14 @@ async function fetchLatest(site) {
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const id = new URL(url).pathname.split('/')[2];
|
||||
const shootId = new URL(url).pathname.split('/')[2];
|
||||
|
||||
const [res, ratingRes] = await Promise.all([
|
||||
bhttp.get(url),
|
||||
bhttp.get(`https://kink.com/api/ratings/${id}`),
|
||||
bhttp.get(`https://kink.com/api/ratings/${shootId}`),
|
||||
]);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, id, ratingRes, site);
|
||||
return scrapeScene(res.body.toString(), url, shootId, ratingRes, site);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
@@ -6,29 +6,13 @@ const moment = require('moment');
|
||||
|
||||
const { matchTags } = require('../tags');
|
||||
|
||||
const tagMap = {
|
||||
'3+ on 1': 'gangbang',
|
||||
anal: 'anal',
|
||||
bbc: 'big black cock',
|
||||
'cum swallowing': 'swallowing',
|
||||
rough: 'rough',
|
||||
'deep throat': 'deepthroat',
|
||||
'double penetration (DP)': 'DP',
|
||||
'double anal (DAP)': 'DAP',
|
||||
'double vaginal (DPP)': 'DVP',
|
||||
'gapes (gaping asshole)': 'gaping',
|
||||
'huge toys': 'toys',
|
||||
interracial: 'interracial',
|
||||
'triple penetration': 'TP',
|
||||
};
|
||||
|
||||
function extractTitle(originalTitle) {
|
||||
const titleComponents = originalTitle.split(' ');
|
||||
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(GP|SZ|IV|GIO|AA|GL|BZ|FS)\d+/); // detect studio prefixes
|
||||
const id = sceneIdMatch ? sceneIdMatch[0] : null;
|
||||
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
|
||||
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
|
||||
|
||||
return { id, title };
|
||||
return { shootId, title };
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
@@ -40,13 +24,13 @@ function scrapeLatest(html, site) {
|
||||
const url = sceneLinkElement.attr('href');
|
||||
|
||||
const originalTitle = sceneLinkElement.attr('title');
|
||||
const { id, title } = extractTitle(originalTitle);
|
||||
const { shootId, title } = extractTitle(originalTitle);
|
||||
|
||||
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
|
||||
|
||||
return {
|
||||
url,
|
||||
id,
|
||||
shootId,
|
||||
title,
|
||||
date,
|
||||
site,
|
||||
@@ -58,7 +42,7 @@ async function scrapeScene(html, url, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const originalTitle = $('h1.watchpage-title').text().trim();
|
||||
const { id, title } = extractTitle(originalTitle);
|
||||
const { shootId, title } = extractTitle(originalTitle);
|
||||
|
||||
const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
|
||||
|
||||
@@ -74,7 +58,7 @@ async function scrapeScene(html, url, site) {
|
||||
|
||||
return {
|
||||
url,
|
||||
id,
|
||||
shootId,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
|
||||
@@ -7,6 +7,7 @@ const moment = require('moment');
|
||||
function scrape(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const shootId = $('li').attr('id');
|
||||
const sceneLinkElement = $('#scene_title_border a');
|
||||
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
|
||||
@@ -21,6 +22,7 @@ function scrape(html, site) {
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
|
||||
@@ -8,13 +8,15 @@ const { matchTags } = require('../tags');
|
||||
|
||||
function scrape(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('.sceneInfo').toArray();
|
||||
const scenesElements = $('li[data-itemtype=scene]').toArray();
|
||||
|
||||
return scenesElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.sceneTitle a');
|
||||
const url = `${site.url}${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title');
|
||||
|
||||
const shootId = $(element).attr('data-itemid');
|
||||
|
||||
const date = moment
|
||||
.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY')
|
||||
.toDate();
|
||||
@@ -29,6 +31,7 @@ function scrape(html, site) {
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
@@ -42,6 +45,7 @@ function scrape(html, site) {
|
||||
}
|
||||
|
||||
async function scrapeSceneFallback($, url, site) {
|
||||
const shootId = new URL(url).pathname.split('/').slice(-1)[0];
|
||||
const title = $('h1.title').text();
|
||||
const date = moment.utc($('.updatedDate').text(), 'MM-DD-YYYY').toDate();
|
||||
const actors = $('.sceneColActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
@@ -54,6 +58,7 @@ async function scrapeSceneFallback($, url, site) {
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
@@ -75,6 +80,7 @@ async function scrapeScene(html, url, site) {
|
||||
}
|
||||
|
||||
const data = JSON.parse(json)[0];
|
||||
const shootId = new URL(url).pathname.split('/').slice(-1)[0];
|
||||
|
||||
const title = data.isPartOf.name;
|
||||
const date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate();
|
||||
@@ -98,6 +104,7 @@ async function scrapeScene(html, url, site) {
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
|
||||
@@ -47,7 +47,8 @@ function renderReleases(scenes, screen) {
|
||||
},
|
||||
top: 1,
|
||||
height: screen.rows - 3,
|
||||
width: 161,
|
||||
// width: 161,
|
||||
width: config.columns.reduce((acc, column) => acc + column.width, 0),
|
||||
keys: true,
|
||||
vi: true,
|
||||
mouse: true,
|
||||
|
||||
Reference in New Issue
Block a user