Added description property to recent scrapers (apparently forgotten). Reconfigured eslint for scrapers.

This commit is contained in:
ThePendulum 2019-04-07 05:19:44 +02:00
parent 4b19b49e6c
commit 6b9d99f495
9 changed files with 21 additions and 13 deletions

1
.eslintignore Normal file
View File

@ -0,0 +1 @@
src/scrapers/template.js

View File

@ -3,7 +3,7 @@
"parser": "babel-eslint", "parser": "babel-eslint",
"extends": "airbnb-base", "extends": "airbnb-base",
"parserOptions": { "parserOptions": {
"sourceType": "script" "sourceType": "script",
}, },
"rules": { "rules": {
"strict": 0, "strict": 0,

View File

@ -1,6 +1,6 @@
'use strict'; 'use strict';
/* eslint-disable */ /* eslint-disable newline-per-chained-call */
const bhttp = require('bhttp'); const bhttp = require('bhttp');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const moment = require('moment'); const moment = require('moment');
@ -47,7 +47,7 @@ async function scrapeScene(html, url, site) {
const shootId = workName.length > 1 ? workName[0] : null; const shootId = workName.length > 1 ? workName[0] : null;
const entryId = url.split('/').slice(-1)[0]; const entryId = url.split('/').slice(-1)[0];
const title = data.name; const title = data.name;
const description = data.description; const { description } = data;
const date = moment.utc(data.isPartOf.datePublished, 'YYYY-MM-DD').toDate(); const date = moment.utc(data.isPartOf.datePublished, 'YYYY-MM-DD').toDate();
// const actors = sceneElement.find('.sceneActors a').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray(); // const actors = sceneElement.find('.sceneActors a').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
@ -73,6 +73,7 @@ async function scrapeScene(html, url, site) {
shootId, shootId,
entryId, entryId,
title, title,
description,
actors, actors,
date, date,
duration, duration,

View File

@ -1,6 +1,6 @@
'use strict'; 'use strict';
/* eslint-disable */ /* eslint-disable newline-per-chained-call */
const bhttp = require('bhttp'); const bhttp = require('bhttp');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const moment = require('moment'); const moment = require('moment');
@ -83,12 +83,15 @@ async function scrapeScene(html, url, site) {
url, url,
shootId, shootId,
title, title,
description,
actors, actors,
date, date,
duration,
rating: { rating: {
likes, likes,
dislikes, dislikes,
}, },
tags,
site: channelSite || site, site: channelSite || site,
}; };
} }

View File

@ -1,6 +1,5 @@
'use strict'; 'use strict';
/* eslint-disable */
const bhttp = require('bhttp'); const bhttp = require('bhttp');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const moment = require('moment'); const moment = require('moment');
@ -8,6 +7,7 @@ const moment = require('moment');
const knex = require('../knex'); const knex = require('../knex');
const { matchTags } = require('../tags'); const { matchTags } = require('../tags');
/* eslint-disable newline-per-chained-call */
function scrapeLatest(html, site) { function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.card.m-1').toArray(); const sceneElements = $('.card.m-1').toArray();
@ -62,6 +62,7 @@ async function scrapeScene(html, url, site) {
url: channelSite ? `${channelSite.url}${new URL(url).pathname}` : url, url: channelSite ? `${channelSite.url}${new URL(url).pathname}` : url,
entryId, entryId,
title, title,
description,
actors, actors,
date, date,
duration, duration,

View File

@ -1,6 +1,5 @@
'use strict'; 'use strict';
/* eslint-disable */
const bhttp = require('bhttp'); const bhttp = require('bhttp');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const moment = require('moment'); const moment = require('moment');
@ -8,6 +7,7 @@ const moment = require('moment');
const knex = require('../knex'); const knex = require('../knex');
const { matchTags } = require('../tags'); const { matchTags } = require('../tags');
/* eslint-disable newline-per-chained-call */
function scrape(html, site) { function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.widget-release-card').toArray(); const sceneElements = $('.widget-release-card').toArray();
@ -69,12 +69,13 @@ async function scrapeScene(html, url, site) {
url, url,
entryId, entryId,
title, title,
description,
actors, actors,
tags, tags,
rating: { rating: {
stars, stars,
}, },
site, site: channelSite || site,
}; };
} }

View File

@ -1,6 +1,6 @@
'use strict'; 'use strict';
/* eslint-disable */ /* eslint-disable newline-per-chained-call */
const bhttp = require('bhttp'); const bhttp = require('bhttp');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const moment = require('moment'); const moment = require('moment');
@ -44,7 +44,7 @@ async function scrapeScene(html, url, site) {
const shootId = url.split('/').slice(-1)[0]; const shootId = url.split('/').slice(-1)[0];
const title = $('.video-wrapper meta[itemprop="name"]').attr('content'); const title = $('.video-wrapper meta[itemprop="name"]').attr('content');
const date = moment.utc($('.video-wrapper meta[itemprop="uploadDate"]').attr('content'), 'MM/DD/YYYY').toDate() const date = moment.utc($('.video-wrapper meta[itemprop="uploadDate"]').attr('content'), 'MM/DD/YYYY').toDate();
const actors = $('.content-wrapper .scene-models-list a').map((actorIndex, actorElement) => $(actorElement).text()).toArray(); const actors = $('.content-wrapper .scene-models-list a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const description = $('.video-wrapper meta[itemprop="description"]').attr('content'); const description = $('.video-wrapper meta[itemprop="description"]').attr('content');

View File

@ -1,6 +1,6 @@
'use strict'; 'use strict';
/* eslint-disable */ /* eslint-disable newline-per-chained-call */
const bhttp = require('bhttp'); const bhttp = require('bhttp');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const moment = require('moment'); const moment = require('moment');
@ -68,6 +68,7 @@ async function scrapeScene(html, url, site) {
url, url,
shootId, shootId,
title, title,
description,
actors, actors,
director, director,
date, date,

View File

@ -1,6 +1,6 @@
'use strict'; 'use strict';
/* eslint-disable */ /* eslint-disable newline-per-chained-call */
const bhttp = require('bhttp'); const bhttp = require('bhttp');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const moment = require('moment'); const moment = require('moment');
@ -15,7 +15,7 @@ function scrapeLatest(html, site) {
return scenes.map((scene) => { return scenes.map((scene) => {
const shootId = String(scene.newId); const shootId = String(scene.newId);
const title = scene.title; const { title } = scene;
const url = `${site.url}${scene.targetUrl}`; const url = `${site.url}${scene.targetUrl}`;
const date = moment.utc(scene.releaseDateFormatted, 'MMMM DD, YYYY').toDate(); const date = moment.utc(scene.releaseDateFormatted, 'MMMM DD, YYYY').toDate();
const actors = scene.models; const actors = scene.models;
@ -45,7 +45,7 @@ async function scrapeScene(html, url, site) {
const scene = data.page.data[`${pathname}${search}`].data.video; const scene = data.page.data[`${pathname}${search}`].data.video;
const shootId = String(scene.newId); const shootId = String(scene.newId);
const title = scene.title; const { title } = scene;
const date = new Date(scene.releaseDate); const date = new Date(scene.releaseDate);
const actors = scene.models; const actors = scene.models;
const stars = scene.totalRateVal; const stars = scene.totalRateVal;