diff --git a/seeds/00_tags.js b/seeds/00_tags.js index 5e9f2694..45eb3491 100755 --- a/seeds/00_tags.js +++ b/seeds/00_tags.js @@ -179,6 +179,11 @@ const tags = [ slug: 'ball-licking', group: 'oral', }, + { + name: 'ball sucking', + slug: 'ball-sucking', + group: 'oral', + }, { name: 'ballerina', slug: 'ballerina', @@ -1051,6 +1056,10 @@ const tags = [ name: 'spanking', slug: 'spanking', }, + { + name: 'spinner', + slug: 'spinner', + }, { name: 'spooning', slug: 'spooning', @@ -1504,11 +1513,7 @@ const aliases = [ }, { name: 'ball suck', - for: 'ball-licking', - }, - { - name: 'ball sucking', - for: 'ball-licking', + for: 'ball-sucking', }, { name: 'boob fucking', @@ -2762,6 +2767,191 @@ const aliases = [ name: 'interviews', for: 'interview', }, + // censors, amateur allure + { + name: '2 big c---s', + for: 'mfm', + }, + { + name: 'amateur b--w--bs', + for: 'amateur blowjobs', + }, + { + name: 'a--l', + for: 'anal', + }, + { + name: 'a-s', + for: 'ass', + }, + { + name: 'b--l l--k', + for: 'ball-licking', + }, + { + name: 'b--l s--k--g', + for: 'ball-sucking', + }, + { + name: 'big a-s', + for: 'big-butt', + }, + { + name: 'big t--s', + for: 'big-boobs', + }, + { + name: 'b--w--b', + for: 'blowjob', + }, + { + name: 'b--t p--g', + for: 'anal-toy', + }, + { + name: 'c--k--g', + for: 'choking', + }, + { + name: 'c--g--l', + for: 'cowgirl', + }, + { + name: 'c--a--ie', + for: 'creampie', + }, + { + name: 'c-m', + for: 'cum', + }, + { + name: 'c-m in mouth', + for: 'cum-in-mouth', + }, + { + name: 'c-m in mouth swallow', + for: 'swallowing', + }, + { + name: 'c-m swallow', + for: 'swallowing', + }, + { + name: 'c--s--t', + for: 'cumshot', + }, + { + name: 'd--p -h--at', + for: 'deepthroat', + }, + { + name: 'd--g--s--le', + for: 'doggy-style', + }, + { + name: 'd--g--t--e', + for: 'doggy-style', + }, + { + name: 'face f--k', + for: 'facefucking', + }, + { + name: 'f----l', + for: 'facial', + }, + { + name: 'f--g--i-g', + for: 'fingering', + }, + { + name: 'f--r--me', + for: 'foursome', + }, + { + name: 'free t--n sex', + for: 'teen', + }, + { + name: 'girls giving b--w jobs', + for: 'blowjob', + }, + { + name: 'g---y h--e', + for: 'gloryhole', + }, + { + name: 'h--d--b', + for: 'handjob', + }, + { + name: 'l--b--n', + for: 'lesbian', + }, + { + name: 'm--f', + for: 'milf', + }, + { + name: 'm--s--n--y', + for: 'missionary', + }, + { + name: 'o--l', + for: 'oral', + }, + { + name: 'o--l c--a--ie', + for: 'oral-creampie', + }, + { + name: 'o--l sex', + for: 'blowjob', + }, + { + name: 'o--y', + for: 'orgy', + }, + { + name: 'p--n--a-s first ever scene', + for: 'debut', + }, + { + name: 'p---y to mouth', + for: 'pussy-to-mouth', + }, + { + name: 'reverse c--g--l', + for: 'reverse-cowgirl', + }, + { + name: 'r----b', + for: 'rimjob', + }, + { + name: 's--n--r', + for: 'spinner', + }, + { + name: 'swallow c-m', + for: 'swallowing', + }, + { + name: 't--n', + for: 'teen', + }, + { + name: 't--n c-m swallowing videos', + for: 'swallowing', + }, + { + name: 't--e--o-e', + for: 'threesome', + }, + { + name: 't---y f--k', + for: 'titty-fucking', + }, ]; const priorities = [ // higher index is higher priority diff --git a/seeds/02_sites.js b/seeds/02_sites.js index e20d0613..8f06bb61 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -637,6 +637,7 @@ const sites = [ upcoming: false, latest: 'https://www.amateurallure.com/tour/updates/page_%d.html', photos: 'https://www.amateurallure.com/tour/gallery.php', + profile: 'https://www.amateurallure.com/tour/models', }, parent: 'amateurallure', }, @@ -649,6 +650,7 @@ const sites = [ upcoming: false, latest: 'https://www.swallowsalon.com/categories/movies_%d_d.html', photos: 'https://www.swallowsalon.com/gallery.php', + profile: 'https://www.swallowsalon.com/models', }, parent: 'amateurallure', }, diff --git a/src/scrapers/actors.js b/src/scrapers/actors.js index 6220a934..127bd277 100644 --- a/src/scrapers/actors.js +++ b/src/scrapers/actors.js @@ -186,6 +186,10 @@ module.exports = { loveherfeet: loveherfilms, loveherboobs: loveherfilms, shelovesblack: loveherfilms, + // julesjordan + julesjordan, + amateurallure: julesjordan, // different company, same scraper + swallowsalon: julesjordan, // different company, same scraper // etc '18vr': badoink, theflourishxxx: theflourish, @@ -216,7 +220,6 @@ module.exports = { hookuphotshot, inthecrack, jerkaoke: modelmedia, - julesjordan, karups, kellymadison, '8kmembers': kellymadison, diff --git a/src/scrapers/amateurallure.js b/src/scrapers/amateurallure.js deleted file mode 100755 index 9fefc47f..00000000 --- a/src/scrapers/amateurallure.js +++ /dev/null @@ -1,49 +0,0 @@ -'use strict'; - -const { fetchLatest, fetchScene } = require('./julesjordan'); - -function extractActors(scene) { - const release = scene; - - if (!scene.actors || scene.actors.length === 0) { - const introActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (\w+ \w+)/i); - const introTwoActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (?:(\w+)|(\w+ \w+)) and (\w+ \w+)/i); - const returnActorMatches = scene.title.match(/(?:(^\w+)|(\w+ \w+))(?:,| (?:return|visit|pov|give|suck|lick|milk|love|enjoy|service|is))/i); - const returnTwoActorMatches = scene.title.match(/(\w+ \w+) and (?:(\w+)|(\w+ \w+)) (?:return|visit|give|suck|lick|milk|love|enjoy|service|are)/i); - - const rawActors = (introTwoActorMatches || introActorMatches || returnTwoActorMatches || returnActorMatches)?.slice(1); - const actors = rawActors?.filter((actor) => { - if (!actor) return false; - if (/swallow|\bcum|fuck|suck|give|giving|take|takes|taking|head|teen|babe|cute|beaut|naughty|teacher|nanny|adorable|brunette|blonde|bust|audition|from|\band\b|\bto\b/i.test(actor)) return false; - - return true; - }); - - if (actors) { - release.actors = actors; - } - } - - if (release.actors?.length > 1 || /threesome|threeway/.test(scene.title)) { - release.tags = scene.tags ? [...scene.tags, 'mff'] : ['mff']; - } - - return release; -} - -async function fetchLatestWrap(site, page = 1, include, preData) { - const latest = await fetchLatest(site, page, include, preData); - - return latest.map((scene) => extractActors(scene)); -} - -async function fetchSceneWrap(url, channel, baseRelease, include) { - const scene = await fetchScene(url, channel, baseRelease, include); - - return extractActors(scene); -} - -module.exports = { - fetchLatest: fetchLatestWrap, - fetchScene: fetchSceneWrap, -}; diff --git a/src/scrapers/julesjordan.js b/src/scrapers/julesjordan.js index 73bf6128..c6090e27 100755 --- a/src/scrapers/julesjordan.js +++ b/src/scrapers/julesjordan.js @@ -5,7 +5,6 @@ const Promise = require('bluebird'); const unprint = require('unprint'); const argv = require('../argv'); -const qu = require('../utils/qu'); const { heightToCm } = require('../utils/convert'); const slugify = require('../utils/slugify'); @@ -34,10 +33,11 @@ function getEntryIdFromTitle(release) { function scrapeAll(scenes, site, entryIdFromTitle) { return scenes.map(({ element, query }) => { const release = {}; - const title = query.content('.content_img div, .dvd_info > a, a.update_title, a[title] + a[title], .overlay-text') || query.content('a[title*=" "]'); + const title = query.content('.content_img div, .dvd_info > a, a.update_title, .update_title a, a[title] + a[title], .overlay-text') + || query.content('a[title*=" "]'); release.title = title?.slice(0, title.match(/starring:/i)?.index || Infinity).trim(); - release.url = query.url('.content_img a, .dvd_info > a, a.update_title, a[title]'); + release.url = query.url('.content_img a, .dvd_info > a, a.update_title, .update_title a, a[title]'); release.date = query.date('.update_date', ['MM/DD/YYYY', 'YYYY-MM-DD']); release.actors = query.all('.content_img .update_models a, .update_models a').map((actorEl) => ({ @@ -50,9 +50,9 @@ function scrapeAll(scenes, site, entryIdFromTitle) { [release.poster, ...release.photos] = dvdPhotos.length ? dvdPhotos - : Array.from({ length: photoCount }).map((value, index) => { + : Array.from({ length: photoCount }).map((_value, index) => { const src = query.img('a img.thumbs', { attribute: `src${index}_1x` }) || query.img('a img.thumbs', { attribute: `src${index}` }) || query.img('a img.thumbs'); - const prefixedSrc = qu.prefixUrl(src, site.url); + const prefixedSrc = unprint.prefixUrl(src, site.url); if (src) { return Array.from(new Set([ @@ -81,12 +81,25 @@ function scrapeAll(scenes, site, entryIdFromTitle) { || query.element('.rating_box')?.dataset.id || query.attribute('a img', 'id')?.match(/set-target-(\d+)/)?.[1]; - console.log(release.entryId); - return release; }); } +async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle = false) { + const url = site.parameters?.latest + ? util.format(site.parameters.latest, page) + : `${site.url}/trial/categories/movies_${page}_d.html`; + + // const res = await http.get(url); + const res = await unprint.get(url, { selectAll: '.update_details, .grid-item' }); + + if (res.ok) { + return scrapeAll(res.context, site, typeof site.parameters?.entryIdFromTitle === 'boolean' ? site.parameters.entryIdFromTitle : entryIdFromTitle); + } + + return res.status; +} + function scrapeUpcoming(scenes, channel) { return scenes.map(({ query, html }) => { const release = {}; @@ -110,6 +123,19 @@ function scrapeUpcoming(scenes, channel) { }); } +async function fetchUpcoming(site) { + if (site.parameters?.upcoming === false) return null; + + const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`; + const res = await unprint.get(url, { selectAll: '//img[contains(@alt, "Coming Soon")]/ancestor::div' }); + + if (res.ok) { + return scrapeUpcoming(res.context, site); + } + + return res.status; +} + function extractLegacyTrailer(html, context) { const trailerLines = html.split('\n').filter((line) => /movie\["trailer\w*"\]\[/i.test(line)); @@ -206,6 +232,10 @@ async function scrapeScene({ html, query }, context) { release.trailer = extractLegacyTrailer(html, context); } + if (release.trailer?.includes('_sfw')) { + release.trailer = null; + } + // release.photos = async () => await getPhotos(release.entryId, context.entity); // probably no longer works on any site if (argv.jjFullPhotos) { release.photos = getPhotos(query, release, context); @@ -216,7 +246,8 @@ async function scrapeScene({ html, query }, context) { release.photos = [ ...context.baseRelease?.photos?.map((sources) => sources.at(-1).src) || [], ...query.imgs('#images img'), - ].map((source) => Array.from(new Set([ + ...query.imgs('img.update_thumb', { attribute: 'src0_1x' }), + ].filter(Boolean).map((source) => Array.from(new Set([ source.replace(/.jpg$/, '-full.jpg'), source.replace(/-1x.jpg$/, '-4x.jpg'), source.replace(/-1x.jpg$/, '-2x.jpg'), @@ -278,10 +309,11 @@ function scrapeMovie({ query }, { url }) { return movie; } -function scrapeProfile({ query }, url, name, entity) { +function scrapeProfile({ query }, url, entity) { const profile = { url }; - profile.description = query.content('//comment()[contains(., " Bio Extra Field ")]/following-sibling::span'); // the spaces are important to avoid selecting a similar comment + profile.description = query.content('//comment()[contains(., " Bio Extra Field ")]/following-sibling::span') // the spaces are important to avoid selecting a similar comment + || query.content('//comment()[contains(., " Bio Extra Field ")]/following-sibling::text()'); profile.height = heightToCm(query.content('//span[contains(text(), "Height")]/following-sibling::span')); profile.measurements = query.content('//span[contains(text(), "Measurements")]/following-sibling::span'); @@ -300,41 +332,18 @@ function scrapeProfile({ query }, url, name, entity) { query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src0_1x' }), query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src0' }), query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src' }), - ].filter(Boolean); + // ...query.sourceSet('.model_bio_pic img, .model_bio_thumb', { origin: entity.url }), + ].filter(Boolean).map((src) => ({ + src, + referer: entity.url, + verifyType: 'image', + })); profile.scenes = scrapeAll(unprint.initAll(query.all('.grid-item')), entity, true); return profile; } -async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle = false) { - const url = site.parameters?.latest - ? util.format(site.parameters.latest, page) - : `${site.url}/trial/categories/movies_${page}_d.html`; - - // const res = await http.get(url); - const res = await unprint.get(url, { selectAll: '.update_details, .grid-item' }); - - if (res.ok) { - return scrapeAll(res.context, site, typeof site.parameters?.entryIdFromTitle === 'boolean' ? site.parameters.entryIdFromTitle : entryIdFromTitle); - } - - return res.status; -} - -async function fetchUpcoming(site) { - if (site.parameters?.upcoming === false) return null; - - const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`; - const res = await unprint.get(url, { selectAll: '//img[contains(@alt, "Coming Soon")]/ancestor::div' }); - - if (res.ok) { - return scrapeUpcoming(res.context, site); - } - - return res.status; -} - async function fetchProfile({ name: actorName, url }, entity) { const actorSlugA = slugify(actorName, ''); const actorSlugB = slugify(actorName, '-'); @@ -356,10 +365,12 @@ async function fetchProfile({ name: actorName, url }, entity) { return null; } - const res = await unprint.get(profileUrl); + const res = await unprint.get(profileUrl, { + followRedirects: false, + }); if (res.ok) { - return scrapeProfile(res.context, profileUrl, actorName, entity); + return scrapeProfile(res.context, profileUrl, entity); } return null; diff --git a/src/scrapers/releases.js b/src/scrapers/releases.js index 38f2ca2a..54b6ec37 100644 --- a/src/scrapers/releases.js +++ b/src/scrapers/releases.js @@ -4,7 +4,6 @@ const adultempire = require('./adultempire'); const angelogodshackoriginal = require('./angelogodshackoriginal'); // const archangel = require('./archangel'); const assylum = require('./assylum'); -const amateurallure = require('./amateurallure'); const americanpornstar = require('./americanpornstar'); const amnesiac = require('./amnesiac'); const aziani = require('./aziani'); @@ -93,7 +92,7 @@ module.exports = { // daringsex, // arch angel // etc - amateurallure, + amateurallure: julesjordan, americanpornstar, amateureuro: porndoe, amnesiac, diff --git a/tests/profiles.js b/tests/profiles.js index d25361ae..943a31ab 100644 --- a/tests/profiles.js +++ b/tests/profiles.js @@ -195,11 +195,14 @@ const actors = [ // naughty america { entity: 'naughtyamerica', name: 'Nicole Aniston', fields: ['avatar', 'description'] }, { entity: 'tonightsgirlfriend', name: 'Abella Danger', fields: ['avatar'] }, + // jules jordan scraper + { entity: 'julesjordan', name: 'Vanna Bardot', fields: ['height', 'dateOfBirth', 'measurements', 'description', 'avatar'] }, + { entity: 'amateurallure', name: 'Ava Amira', fields: ['avatar', 'description'] }, + { entity: 'swallowsalon', name: 'Abella Danger', fields: ['avatar'] }, // etc. { entity: 'analvids', name: 'Veronica Leal', fields: ['avatar', 'gender', 'birthCountry', 'nationality', 'age', 'aliases', 'nationality'] }, { entity: 'bangbros', name: 'Kira Perez', fields: ['avatar', 'gender', 'ethnicity', 'hairColor'] }, { entity: 'hookuphotshot', name: 'Kenzie Reeves', fields: ['avatar', 'description'] }, - { entity: 'julesjordan', name: 'Vanna Bardot', fields: ['height', 'dateOfBirth', 'measurements', 'description', 'avatar'] }, { entity: 'pornworld', name: 'Veronica Leal', fields: ['avatar', 'nationality', 'age'] }, { entity: 'private', name: 'Cherry Kiss', fields: ['avatar', 'description', 'nationality', 'measurements', 'height', 'weight', 'hairColor', 'eye', 'hasTattoos', 'tattoos', 'hasPiercings', 'piercings'] }, { entity: 'teenmegaworld', name: 'Sheri Vi', fields: ['avatar', 'description', 'hairColor', 'eyes'] }, @@ -235,7 +238,11 @@ async function validateUrl(url, mime = 'image/') { return false; } - const res = await fetch(href); + const res = await fetch(href, { + headers: { + Referer: url.referer || new URL(href).origin, + }, + }); const type = res.headers.get('content-type'); const resolvedType = url.expectType?.[type] || type || 'image/jpeg';