diff --git a/assets/components/networks/network.vue b/assets/components/networks/network.vue index a633b00a..2f28aab6 100644 --- a/assets/components/networks/network.vue +++ b/assets/components/networks/network.vue @@ -38,7 +38,10 @@ :class="{ expanded }" /> -
+
({ ...acc, [baseActor.slug]: baseActor }), {}); const uniqueBaseActors = Object.values(baseActorsBySlug); diff --git a/src/app.js b/src/app.js index 0edcfad4..372d66bc 100644 --- a/src/app.js +++ b/src/app.js @@ -8,6 +8,7 @@ const fetchUpdates = require('./updates'); const { fetchScenes, fetchMovies } = require('./deep'); const { storeReleases } = require('./store-releases'); const { updateReleasesSearch } = require('./releases'); +const { scrapeActors } = require('./actors-legacy'); async function init() { if (argv.server) { @@ -17,12 +18,17 @@ async function init() { if (argv.updateSearch) { await updateReleasesSearch(); - knex.destroy(); - return; + } + + if (argv.actors) { + await scrapeActors(argv.actors); } const updateBaseScenes = (argv.scrape || argv.sites || argv.networks) && await fetchUpdates(); - const deepScenes = argv.deep && await fetchScenes([...(argv.scenes || []), ...(updateBaseScenes || [])]); + + const deepScenes = argv.deep + ? await fetchScenes([...(argv.scenes || []), ...(updateBaseScenes || [])]) + : updateBaseScenes; const sceneMovies = deepScenes && argv.sceneMovies && deepScenes.map(scene => scene.movie).filter(Boolean); const deepMovies = await fetchMovies([...(argv.movies || []), ...(sceneMovies || [])]); diff --git a/src/knex.js b/src/knex.js index 0947d66a..baebbfee 100644 --- a/src/knex.js +++ b/src/knex.js @@ -3,17 +3,9 @@ const config = require('config'); const knex = require('knex'); -/* -module.exports = knex({ - client: 'sqlite3', - connection: { - filename: path.join(__dirname, '../db.sqlite'), - }, - useNullAsDefault: true, -}); -*/ - module.exports = knex({ client: 'pg', connection: config.database, + // performance overhead, don't use asyncStackTraces in production + asyncStackTraces: process.env.NODE_ENV === 'development', }); diff --git a/src/scrapers/aziani.js b/src/scrapers/aziani.js new file mode 100644 index 00000000..efa46f05 --- /dev/null +++ b/src/scrapers/aziani.js @@ -0,0 +1,145 @@ +'use strict'; + +const slugify = require('../utils/slugify'); +const { get, getAll, initAll, extractDate } = require('../utils/qu'); +const { feetInchesToCm } = require('../utils/convert'); + +function getFallbacks(source) { + return [ + source.replace('-1x.jpg', '-4x.jpg'), + source.replace('-1x.jpg', '-3x.jpg'), + source.replace('-1x.jpg', '-2x.jpg'), + source, + ]; +} + +function scrapeAll(scenes, site) { + return scenes.map(({ qu }) => { + const release = {}; + + release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1]; + release.url = qu.url('a'); + + release.title = qu.q('h5 a', true); + release.date = qu.date('.icon-calendar + strong', 'MM/DD/YYYY'); + + release.actors = qu.q('h3', true).replace(/featuring:\s?/i, '').split(', '); + + const photoCount = qu.q('.stdimage', 'cnt'); + [release.poster, ...release.photos] = Array.from({ length: Number(photoCount) }, (value, index) => { + const source = qu.img('.stdimage', `src${index}_1x`, site.url); + + return getFallbacks(source); + }); + + return release; + }); +} + +function scrapeScene({ html, qu }, url) { + const release = { url }; + + release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1]; + + release.title = qu.q('h2', true); + release.description = qu.q('p', true); + + release.date = extractDate(html, 'MM/DD/YYYY', /\b\d{2}\/\d{2}\/\d{4}\b/); + + release.actors = qu.all('h5:not(.video_categories) a').map(actor => ({ + name: qu.q(actor, null, true), + url: qu.url(actor, null), + })); + + release.tags = qu.all('.video_categories a', true); + + release.duration = qu.dur('.video_categories + p'); + + const poster = qu.img('a img'); + + release.poster = getFallbacks(poster); + release.photos = qu.imgs('.featured-video img', 'src0_1x').map(source => getFallbacks(source)); + + return release; +} + +function scrapeProfile({ el, qu }) { + const profile = {}; + + const bio = Array.from(qu.q('.widget-content').childNodes).reduce((acc, node, index, nodes) => { + const nextNode = nodes[index + 1]; + + if (node.tagName === 'STRONG' && nextNode?.nodeType === 3) { + acc[slugify(node.textContent, '_')] = nextNode.textContent.trim(); + } + + return acc; + }, {}); + + if (bio.ethnicity) profile.ethnicity = bio.ethnicity; + if (bio.age) profile.age = Number(bio.age); + + if (bio.height && /\d{3}/.test(bio.height)) profile.height = Number(bio.height.match(/\d+/)[0]); + if (bio.height && /\d[;']\d/.test(bio.height)) profile.height = feetInchesToCm(bio.height); + + if (bio.measurements) { + const [bust, waist, hip] = bio.measurements.split('-'); + + if (bust && /\d+[a-zA-Z]+/.test(bust)) profile.bust = bust; + if (waist) profile.waist = Number(waist); + if (hip) profile.hip = Number(hip); + } + + if (bio.bust_size && !profile.bust) profile.bust = bio.bust_size.toUpperCase(); + + if (bio.birth_location) profile.birthPlace = bio.birth_location; + if (bio.status_married_or_single) profile.relationship = bio.status_married_or_single; + + if (bio.eye_color) profile.eyes = bio.eye_color; + + const avatar = qu.img('.tac img'); + profile.avatar = getFallbacks(avatar); + + profile.releases = scrapeAll(initAll(el, '.featured-video')); + + return profile; +} + +async function fetchLatest(site, page) { + const url = `${site.url}/tour/categories/movies_${page}_d.html`; + const res = await getAll(url, '.featured-video'); + + if (res.ok) { + return scrapeAll(res.items, site); + } + + return res.status; +} + +async function fetchScene(url, site) { + const res = await get(url, '.page-content .row'); + + if (res.ok) { + return scrapeScene(res.item, url, site); + } + + return res.status; +} + +async function fetchProfile(actorName, scraperSlug, site) { + const actorSlug = slugify(actorName, ''); + const url = `${site.url}/tour/models/${actorSlug}.html`; + const res = await get(url, '.page-content .row'); + + if (res.ok) { + return scrapeProfile(res.item); + } + + return res.status; +} + +module.exports = { + fetchLatest, + fetchProfile, + fetchScene, +}; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 81e6f885..f6f28819 100644 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -2,6 +2,7 @@ const adulttime = require('./adulttime'); const assylum = require('./assylum'); +const aziani = require('./aziani'); const amateurallure = require('./amateurallure'); const babes = require('./babes'); const bamvisions = require('./bamvisions'); @@ -70,6 +71,7 @@ module.exports = { adulttime, amateurallure, assylum, + aziani, babes, bamvisions, bang, @@ -132,6 +134,7 @@ module.exports = { analized: fullpornnetwork, analviolation: fullpornnetwork, anilos: nubiles, + aziani, babes, baddaddypov: fullpornnetwork, bamvisions, @@ -155,7 +158,9 @@ module.exports = { famedigital, freeones, freeonesLegacy, + gangbangcreampie: aziani, girlfaction: fullpornnetwork, + gloryholesecrets: aziani, hergape: fullpornnetwork, homemadeanalwhores: fullpornnetwork, hotcrazymess: nubiles, @@ -187,8 +192,8 @@ module.exports = { private: privateNetwork, realitykings, score, - sexyhub: mindgeek, seehimfuck: hush, + sexyhub: mindgeek, thatsitcomshow: nubiles, transangels, tushy: vixen, diff --git a/src/utils/qu.js b/src/utils/qu.js index a2991c86..31e76bea 100644 --- a/src/utils/qu.js +++ b/src/utils/qu.js @@ -34,11 +34,15 @@ function formatDate(dateValue, format, inputFormat) { return moment(dateValue).format(format); } -function prefixProtocol(urlValue, protocol = 'https') { +function prefixUrl(urlValue, origin, protocol = 'https') { if (protocol && /^\/\//.test(urlValue)) { return `${protocol}:${urlValue}`; } + if (origin && /^\//.test(urlValue)) { + return `${origin}${urlValue}`; + } + return urlValue; } @@ -48,7 +52,7 @@ function q(context, selector, attrArg, applyTrim = true) { if (attr) { const value = selector ? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value - : context[attr] || context[attr]?.attributes[attr]?.value; + : context[attr] || context.attributes[attr]?.value; return applyTrim && value ? trim(value) : value; } @@ -60,7 +64,7 @@ function all(context, selector, attrArg, applyTrim = true) { const attr = attrArg === true ? 'textContent' : attrArg; if (attr) { - return Array.from(context.querySelectorAll(selector), el => (applyTrim && el[attr] ? trim(el[attr]) : el[attr])); + return Array.from(context.querySelectorAll(selector), el => q(el, null, attr, applyTrim)); } return Array.from(context.querySelectorAll(selector)); @@ -112,47 +116,47 @@ function date(context, selector, format, match, attr = 'textContent') { return extractDate(dateString, format, match); } -function image(context, selector = 'img', attr = 'src', protocol = 'https') { +function image(context, selector = 'img', attr = 'src', origin, protocol = 'https') { const imageEl = q(context, selector, attr); // no attribute means q output will be HTML element - return attr ? prefixProtocol(imageEl, protocol) : imageEl; + return attr ? prefixUrl(imageEl, origin, protocol) : imageEl; } -function images(context, selector = 'img', attr = 'src', protocol = 'https') { +function images(context, selector = 'img', attr = 'src', origin, protocol = 'https') { const imageEls = all(context, selector, attr); - return attr ? imageEls.map(imageEl => prefixProtocol(imageEl, protocol)) : imageEls; + return attr ? imageEls.map(imageEl => prefixUrl(imageEl, origin, protocol)) : imageEls; } -function url(context, selector = 'a', attr = 'href', protocol = 'https') { +function url(context, selector = 'a', attr = 'href', origin, protocol = 'https') { const urlEl = q(context, selector, attr); - return attr ? prefixProtocol(urlEl, protocol) : urlEl; + return attr ? prefixUrl(urlEl, origin, protocol) : urlEl; } -function urls(context, selector = 'a', attr = 'href', protocol = 'https') { +function urls(context, selector = 'a', attr = 'href', origin, protocol = 'https') { const urlEls = all(context, selector, attr); - return attr ? urlEls.map(urlEl => prefixProtocol(urlEl, protocol)) : urlEls; + return attr ? urlEls.map(urlEl => prefixUrl(urlEl, origin, protocol)) : urlEls; } -function poster(context, selector = 'video', attr = 'poster', protocol = 'https') { +function poster(context, selector = 'video', attr = 'poster', origin, protocol = 'https') { const posterEl = q(context, selector, attr); - return attr ? prefixProtocol(posterEl, protocol) : posterEl; + return attr ? prefixUrl(posterEl, origin, protocol) : posterEl; } -function video(context, selector = 'source', attr = 'src', protocol = 'https') { +function video(context, selector = 'source', attr = 'src', origin, protocol = 'https') { const trailerEl = q(context, selector, attr); - return attr ? prefixProtocol(trailerEl, protocol) : trailerEl; + return attr ? prefixUrl(trailerEl, origin, protocol) : trailerEl; } -function videos(context, selector = 'source', attr = 'src', protocol = 'https') { +function videos(context, selector = 'source', attr = 'src', origin, protocol = 'https') { const trailerEls = all(context, selector, attr); - return attr ? trailerEls.map(trailerEl => prefixProtocol(trailerEl, protocol)) : trailerEls; + return attr ? trailerEls.map(trailerEl => prefixUrl(trailerEl, origin, protocol)) : trailerEls; } function duration(context, selector, match, attr = 'textContent') {