'use strict'; const unprint = require('unprint'); const http = require('../utils/http'); const slugify = require('../utils/slugify'); function extractTitle(originalTitle) { const titleComponents = originalTitle.split(' '); // const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OTS|NF|NT|AX|RV|CM|BTG)\d+/); // detect studio prefixes const sceneIdMatch = titleComponents.slice(-1)[0].match(/\w+\d+\s*$/); // detect studio prefixes const shootId = sceneIdMatch ? sceneIdMatch[0] : null; const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle; return { shootId, title }; } function scrapeAll(scenes, channel) { return scenes.map(({ query }) => { const release = {}; release.url = query.url('.card-scene__view > a'); release.entryId = query.dataset(null, 'content') || new URL(release.url).pathname.match(/watch\/(\d+)/)?.[1]; release.title = query.content('.card-scene__text'); release.shootId = extractTitle(release.title).shootId; release.date = query.date('.label--time:nth-child(2)', 'YYYY-MM-DD'); // only available on front-page, not on studio page release.duration = query.duration('.label--time:first-child'); release.poster = query.img('.card-scene__view img', { attribute: 'data-src' }); const caps = query.json('.card-scene__view > a', { attribute: 'data-casting' })?.map((timestamp) => `${channel.url}/casting/${release.entryId}/${timestamp}`); if (caps && release.poster) { release.caps = caps; } else if (caps) { [release.poster, ...release.caps] = caps; } release.teaser = query.video('.card-scene__view > a', { attribute: 'data-preview' }); return release; }); } function scrapeScene({ query }, { url }) { const release = {}; release.entryId = new URL(url).pathname.match(/watch\/(\d+)/)?.[1]; const featuring = query.content('.watch__title .watch__featuring_models'); release.title = query.content('.watch__title').replace(featuring, ''); release.description = query.content('.text-mob-more'); release.shootId = extractTitle(release.title).shootId; release.date = query.date('.bi-calendar3', 'YYYY-MM-DD'); release.duration = query.duration('.bi-clock'); release.actors = query.all('.watch__title a[href*="/model"]').map((el) => ({ name: unprint.query.content(el), url: unprint.query.url(el, null), })); release.tags = query.contents('.genres-list a[href*="/genre"]'); // release.studio = slugify(query.content('.genres-list a[href*="/studios"]'), '', { removePunctuation: true }); release.poster = query.img('.watch__video video', { attribute: 'data-poster' }); release.trailer = query.all('.watch__video source').map((el) => ({ src: unprint.query.video(el, null, { attribute: 'src' }), quality: unprint.query.number(el, null, { attribute: 'size' }), })); return release; } function scrapeProfile({ query }, url, channel) { const profile = { url }; profile.nationality = query.content('.model__info a[href*="/nationality"]'); profile.age = query.number('//td[contains(text(), "Age")]/following-sibling::td'); profile.avatar = query.img('.model__left img'); profile.scenes = scrapeAll(unprint.initAll(query.all('.card-scene')), channel); return profile; } async function fetchLatest(channel, page) { // const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel const res = await unprint.get(`${channel.url}/latest/${page}`, { selectAll: '.card-scene' }); // studios as channels if (res.ok) { return scrapeAll(res.context, channel); } return res.status; } /* async function fetchLatest(channel, page) { // const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel // const res = await unprint.get(`${channel.url}/latest/${page}`, { selectAll: '.card-scene' }); // studios as channels const url = `${channel.url}/latest/${page}`; // studios as channels const { tab } = await http.getBrowserSession('analvids', { bypass: { headless: false, }, }); const res = await tab.goto(url); const status = res.status(); console.log('STATUS', status); if (status === 200) { const html = await tab.content(); const context = unprint.initAll(html, '.card-scene'); // studios as channels const scenes = scrapeAll(context, channel); tab.close(); return scenes; } return res.status; } */ async function getActorUrl(actor, channel) { if (actor.url) { return actor.url; } const searchRes = await http.get(`${channel.url}/api/autocomplete/search?q=${slugify(actor.name, '+')}`); if (!searchRes.ok) { return searchRes.status; } const result = searchRes.body.terms.find((item) => item.type === 'model'); if (result) { return result.url; } return null; } async function fetchProfile(actor, { channel }) { const actorUrl = await getActorUrl(actor, channel); if (typeof actorUrl !== 'string') { return actorUrl; } const bioRes = await unprint.get(actorUrl); if (bioRes.ok) { return scrapeProfile(bioRes.context, actorUrl, channel); } return bioRes.status; } module.exports = { fetchLatest, scrapeScene, fetchProfile, };