diff --git a/seeds/00_tags.js b/seeds/00_tags.js index b50a564b..1059966e 100755 --- a/seeds/00_tags.js +++ b/seeds/00_tags.js @@ -705,6 +705,10 @@ const tags = [ name: 'gloryhole', slug: 'gloryhole', }, + { + name: 'female gloryhole', + slug: 'female-gloryhole', + }, { name: 'gonzo', slug: 'gonzo', @@ -1414,6 +1418,10 @@ const tags = [ name: 'interview', slug: 'interview', }, + { + name: 'pregnant', + slug: 'pregnant', + }, ]; const aliases = [ @@ -3032,6 +3040,50 @@ const aliases = [ name: 't---y f--k', for: 'titty-fucking', }, + { + name: 'thresome', + for: 'threesome', + }, + { + name: 'fuck', + for: 'sex', + }, + { + name: 'suck', + for: 'blowjob', + }, + { + name: 'analfist', + for: 'anal-fisting', + }, + { + name: 'fivesome', + for: 'orgy', + }, + { + name: 'fucking machine', + slug: 'machine-dildo', + }, + { + name: 'fuck machine', + slug: 'machine-dildo', + }, + { + name: 'fuckmashine', + slug: 'machine-dildo', + }, + { + name: 'fuck saw', + slug: 'machine-dildo', + }, + { + name: 'dirtytalk', + slug: 'dirty-talk', + }, + { + name: 'stepmom', + slug: 'family', + }, ]; const priorities = [ // higher index is higher priority diff --git a/seeds/01_networks.js b/seeds/01_networks.js index af10f3b6..bac867c1 100755 --- a/seeds/01_networks.js +++ b/seeds/01_networks.js @@ -239,6 +239,11 @@ const networks = [ // scene: false, }, }, + { + slug: 'acam', + name: 'A-Cam', + hasLogo: false, + }, { slug: 'amateurallure', name: 'Amateur Allure', diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 60708965..f5b0b218 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -9453,7 +9453,6 @@ const sites = [ { slug: 'realitysis', name: 'Reality Sis', - rename: 'lilsis', url: 'https://realitysis.com', alias: ['lil sis'], tags: ['family'], @@ -14678,6 +14677,45 @@ const sites = [ description: 'BustyOnes.com bringing you the most beautiful big breasts in the world! The hottest women alive showcasing their fantastic tits.', parent: 'twistys', }, + // A-CAM / VILDE + { + name: 'Vilde', + slug: 'vilde', + alias: ['vilde tv', 'johan vilde'], + url: 'https://vilde.tv', + parent: 'acam', + independent: true, + parameters: { + languageUrl: 'https://www.johanvilde.com/select-language', + languageKey: 'select_lang', + }, + }, + { + name: 'Anal Hooked', + slug: 'analhooked', + url: 'https://analhooked.com', + parent: 'acam', + tags: ['anal'], + independent: true, + parameters: { + layout: 'hooked', + staticUrl: true, + languageUrl: 'https://analhooked.com/change-option', + languageKey: 'select_language', + }, + }, + { + name: 'Channel Anal', + slug: 'channelanal', + alias: ['kanal anal'], + url: 'https://channelanal.com', + parent: 'acam', + independent: true, + tags: ['anal'], + parameters: { + layout: 'kanal', + }, + }, // VIP SEX VAULT { name: 'Los Consoladores', @@ -15240,6 +15278,16 @@ sites.reduce((acc, site) => { /* eslint-disable max-len */ exports.seed = async (knex) => { + sites.reduce((acc, channel) => { + if (acc.has(channel.slug)) { + console.log('DUPLICATE', channel.slug); + } else { + acc.add(channel.slug); + } + + return acc; + }, new Set()); + await Promise.all(sites.map(async (channel) => { if (channel.rename) { await knex('entities') diff --git a/src/scrapers/acam.js b/src/scrapers/acam.js new file mode 100755 index 00000000..54d07409 --- /dev/null +++ b/src/scrapers/acam.js @@ -0,0 +1,228 @@ +'use strict'; + +const unprint = require('unprint'); + +const slugify = require('../utils/slugify'); + +function extractEntryId(poster) { + try { + return slugify(new URL(poster).pathname.match(/\/images\/(.*?)\.jpg/)?.[1]?.replace(/smak.*/i, ''), ''); + } catch (error) { + return null; + } +} + +function extractTags(title) { + if (!title) { + return []; + } + + const firstTagIndex = title.match(/[A-Z]{2}/)?.index; + + if (firstTagIndex) { + const tagSection = title + .slice(firstTagIndex) + .match(/([A-Z0-9\s]{2,})/g); + + if (tagSection) { + return tagSection + .map((tag) => tag.trim().toLowerCase()) + .filter(Boolean) || []; + } + } + + return []; +} + +// derived photo is usually uncensored and preferred as poster, but not guaranteed to exist, so fall back to original image +function getPhotos(poster) { + const photo = poster?.replace(/(s[ma]{2}kprov\d*)|([._]preview)/i, ''); // sic + + if (photo === poster) { + return { + poster, + photos: [], + }; + } + + return { + poster: [photo, poster], + photos: [poster], + }; +} + +function scrapeAll(scenes, channel, parameters) { + return scenes.map(({ query }) => { + const release = {}; + + // Vilde URLs are temporary tokens for some reason, seem to be handled entirely back-end + const url = query.url('a[href*="/show-video"]'); + + release.token = new URL(url).pathname.match(/\/show-video\/([a-z0-9]+)/)?.[1]; + release.forceDeep = true; + + release.title = query.content('a h5, .product-content p, .video_text'); + release.tags = extractTags(release.title); + + const { poster, photos } = getPhotos(query.img('img[src*="/videos/images"], img[src*="/uploads/images"]')); + + release.poster = poster; + release.photos = photos; + + if (parameters.staticUrl) { + release.url = url; + release.entryId = release.token; + } else { + release.entryId = extractEntryId(release.poster); + } + + return release; + }); +} + +async function setLanguage(parameters) { + if (parameters.languageUrl) { + const langRes = await unprint.post(parameters.languageUrl, { + [parameters.languageKey || 'select_language']: 'english', + }, { + form: true, + }); + + return langRes.cookies; + } + + return null; +} + +async function fetchLatest(channel, page = 1, { parameters }) { + const cookies = await setLanguage(parameters); + + const res = await unprint.post(`${channel.origin}/pagination`, { + i: page, + status: true, + }, { + selectAll: '.movi-area', + form: true, + cookies, + }); + + if (res.ok) { + return scrapeAll(res.context, channel, parameters); + } + + return res.status; +} + +async function fetchLatestHooked(channel, page = 1, { parameters }) { + const cookies = await setLanguage(parameters); + + const res = await unprint.get(`${channel.origin}/Welcome/index/${(page - 1) * 9}`, { + selectAll: '.product-main', + cookies, + }); + + if (res.ok) { + return scrapeAll(res.context, channel, parameters); + } + + return res.status; +} + +async function fetchLatestKanal(channel, page = 1, { parameters }) { + const cookies = await setLanguage(parameters); + + const res = await unprint.post(`${channel.origin}/pagination`, { + k: page, + status: 1, + }, { + selectAll: '.video_bx', + form: true, + cookies, + }); + + if (res.ok) { + return scrapeAll(res.context, channel, parameters); + } + + return res.status; +} + +function scrapeScene({ query }, { url, baseRelease, parameters }) { + const release = {}; + + // URL is temporary token + if (!query.exists('.login-sec.for-browser, .video-description, .video_co_title')) { + // URL likely expired, still returns 200 + return null; + } + + if (query.exists('.video-description')) { + const descriptions = query.contents('.video-description p').filter(Boolean); + + release.title = descriptions[0]; + release.description = descriptions.slice(1).join(' ') || null; + } else { + release.title = query.content('.login-sec.for-browser h3, .video_co_title h3'); + release.description = query.contents('.login-sec.for-browser h3 ~ *').join(' ') || null; + } + + release.tags = extractTags(release.title); + + const { poster, photos } = getPhotos(query.poster('.play_video_cont video')); + + release.poster = poster; + release.photos = photos; + + release.trailer = query.all('.play_video_cont source') + .map((videoEl) => ({ + src: unprint.query.url(videoEl, null, { attribute: 'src' }), + quality: unprint.query.number(videoEl, null, { attribute: 'size' }), + referer: url, + })) + .toSorted((videoA, videoB) => videoB.quality - videoA.quality); + + if (parameters.staticUrl) { + release.url = url; + release.entryId = baseRelease?.token || new URL(url).pathname.match(/\/show-video\/([a-z0-9]+)/)?.[1]; + } else { + release.entryId = extractEntryId(release.poster); + } + + return release; +} + +async function fetchScene(baseUrl, entity, baseRelease, { parameters }) { + const url = baseUrl || (baseRelease?.token && `${entity.origin}/show-video/${baseRelease.token}`) || null; + + if (!url) { + return null; + } + + const cookies = await setLanguage(parameters); + + const res = await unprint.get(url, { + headers: { + 'accept-language': 'en-US,en', + }, + cookies, + }); + + if (res.ok || res.status === 500) { // Anal Hooked returns 500 for valid scene pages + return scrapeScene(res.context, { url, baseRelease, parameters }); + } + + return res.status; +} + +module.exports = { + fetchLatest, + fetchScene, + hooked: { + fetchLatest: fetchLatestHooked, + fetchScene, + }, + kanal: { + fetchLatest: fetchLatestKanal, + fetchScene, + }, +}; diff --git a/src/scrapers/releases.js b/src/scrapers/releases.js index cc9a06c3..dce85bfd 100644 --- a/src/scrapers/releases.js +++ b/src/scrapers/releases.js @@ -1,5 +1,6 @@ 'use strict'; +const acam = require('./acam'); const adultempire = require('./adultempire'); const angelogodshackoriginal = require('./angelogodshackoriginal'); // const archangel = require('./archangel'); @@ -96,6 +97,7 @@ module.exports = { freeuse: teamskeet, familystrokes: teamskeet, // etc + acam, analvids: pornbox, pornbox, kellymadison,