'use strict'; const bhttp = require('bhttp'); const cheerio = require('cheerio'); const moment = require('moment'); const knex = require('../knex'); const { matchTags } = require('../tags'); /* eslint-disable newline-per-chained-call */ function scrape(html, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); const sceneElements = $('.widget-release-card').toArray(); return sceneElements.map((element) => { const sceneLinkElement = $(element).find('.title a'); const title = sceneLinkElement.text().trim(); const url = `https://www.mofos.com${sceneLinkElement.attr('href')}`; const entryId = url.split('/').slice(-2, -1)[0]; const date = moment.utc($(element).find('.date-added').text(), 'MMM DD, YYYY').toDate(); const actors = $(element).find('.girls-name a').map((actorIndex, actorElement) => $(actorElement).attr('title').replace(/\s+/g, ' ')).toArray(); const stars = Number($(element).find('.rating').text().slice(0, -1).trim()) / 20; return { url, entryId, title, actors, date, rating: { stars, }, site, }; }); } async function scrapeScene(html, url, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); const sceneElement = $('.video-info'); const entryId = url.split('/').slice(-2, -1)[0]; const title = sceneElement.find('.title').text(); const description = sceneElement.find('.desc').text(); const actors = sceneElement.find('.girls-site-box a.model-name').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray(); const siteElement = sceneElement.find('.site-name'); const sitename = siteElement.text().trim(); const siteId = sitename.replace(/\s+/g, '').toLowerCase(); const siteUrl = siteElement.attr('href').split('/').slice(0, 4).join('/'); const stars = Number(sceneElement.find('.rating-box .rating').text().slice(0, -1).trim()) / 20; const rawTags = sceneElement.find('.categories a').map((tagIndex, tagElement) => $(tagElement).text().trim()).toArray(); const [channelSite, tags] = await Promise.all([ knex('sites') .where({ slug: siteId }) .orWhere({ url: `https://www.mofos.com${siteUrl}` }) .orWhere({ name: sitename }) .first(), matchTags(rawTags), ]); return { url, entryId, title, description, actors, tags, rating: { stars, }, site: channelSite || site, }; } async function fetchLatest(site, page = 1) { const res = page > 1 ? await bhttp.get(`${site.url}/all-models/all-categories/alltime/bydate/${page}/`) : await bhttp.get(`${site.url}/all-models/all-categories/alltime/bydate/`); // explicit page 1 redirects to homepage return scrape(res.body.toString(), site); } async function fetchUpcoming(site) { const res = await bhttp.get(`${site.url}/all-models/all-categories/upcoming/bydate/`); return scrape(res.body.toString(), site); } async function fetchScene(url, site) { const res = await bhttp.get(url); return scrapeScene(res.body.toString(), url, site); } module.exports = { fetchLatest, fetchUpcoming, fetchScene, };