Refactored 21sextury scraper.
This commit is contained in:
@@ -6,7 +6,6 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const knex = require('../knex');
|
||||
const { matchTags } = require('../tags');
|
||||
|
||||
/* eslint-disable newline-per-chained-call */
|
||||
function scrapeLatest(html, site) {
|
||||
@@ -49,13 +48,16 @@ async function scrapeScene(html, url, site) {
|
||||
const title = $('meta[itemprop="name"]').attr('content');
|
||||
const description = $('.descr-box p').text(); // meta tags don't contain full description
|
||||
|
||||
const date = moment.utc($('meta[itemprop="uploadDate"]').attr('content'), 'YYYY-MM-DD').toDate();
|
||||
const dateProp = $('meta[itemprop="uploadDate"]').attr('content');
|
||||
const date = dateProp
|
||||
? moment.utc($('meta[itemprop="uploadDate"]').attr('content'), 'YYYY-MM-DD').toDate()
|
||||
: moment.utc($('.title-border:nth-child(2) p').text(), 'MM.DD.YYYY').toDate();
|
||||
const actors = $('.pornstar-card > a').map((actorIndex, actorElement) => $(actorElement).attr('title')).toArray();
|
||||
|
||||
const likes = Number($('.info-panel.likes .likes').text());
|
||||
const duration = Number($('.info-panel.duration .duration').text().slice(0, -4)) * 60;
|
||||
|
||||
const rawTags = $('.tags-tab .tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
const tags = $('.tags-tab .tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
|
||||
const poster = $('#video').attr('poster');
|
||||
const photos = $('.photo-slider-guest .card a').map((photoIndex, photoElement) => $(photoElement).attr('href')).toArray();
|
||||
@@ -63,21 +65,7 @@ async function scrapeScene(html, url, site) {
|
||||
const trailer540 = $('source[res="540"]').attr('src');
|
||||
const trailer720 = $('source[res="720"]').attr('src');
|
||||
|
||||
/*
|
||||
* broken as of nov 2019
|
||||
const { origin } = new URL($('.pornstar-card meta[itemprop="url"]').first().attr('content'));
|
||||
|
||||
const [channelSite, tags] = await Promise.all([
|
||||
// don't find site if original is already specific
|
||||
site.isFallback ? knex('sites').where({ url: origin }).first() : site,
|
||||
matchTags(rawTags),
|
||||
]);
|
||||
*/
|
||||
|
||||
const tags = await matchTags(rawTags);
|
||||
|
||||
return {
|
||||
// url: channelSite ? `${channelSite.url}${new URL(url).pathname}` : url,
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
@@ -88,20 +76,19 @@ async function scrapeScene(html, url, site) {
|
||||
tags,
|
||||
poster,
|
||||
photos,
|
||||
trailer: trailer540
|
||||
? {
|
||||
src: trailer540,
|
||||
quality: 540,
|
||||
}
|
||||
: {
|
||||
// backup
|
||||
trailer: [
|
||||
{
|
||||
src: trailer720,
|
||||
quality: 720,
|
||||
},
|
||||
{
|
||||
src: trailer540,
|
||||
quality: 540,
|
||||
},
|
||||
],
|
||||
rating: {
|
||||
likes,
|
||||
},
|
||||
// site: channelSite || site,
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user