Fixed JJ scraped for Amateur Allure, removed wrapper. Added tag censor aliases.
This commit is contained in:
parent
defe45c396
commit
38784a6e59
200
seeds/00_tags.js
200
seeds/00_tags.js
|
|
@ -179,6 +179,11 @@ const tags = [
|
|||
slug: 'ball-licking',
|
||||
group: 'oral',
|
||||
},
|
||||
{
|
||||
name: 'ball sucking',
|
||||
slug: 'ball-sucking',
|
||||
group: 'oral',
|
||||
},
|
||||
{
|
||||
name: 'ballerina',
|
||||
slug: 'ballerina',
|
||||
|
|
@ -1051,6 +1056,10 @@ const tags = [
|
|||
name: 'spanking',
|
||||
slug: 'spanking',
|
||||
},
|
||||
{
|
||||
name: 'spinner',
|
||||
slug: 'spinner',
|
||||
},
|
||||
{
|
||||
name: 'spooning',
|
||||
slug: 'spooning',
|
||||
|
|
@ -1504,11 +1513,7 @@ const aliases = [
|
|||
},
|
||||
{
|
||||
name: 'ball suck',
|
||||
for: 'ball-licking',
|
||||
},
|
||||
{
|
||||
name: 'ball sucking',
|
||||
for: 'ball-licking',
|
||||
for: 'ball-sucking',
|
||||
},
|
||||
{
|
||||
name: 'boob fucking',
|
||||
|
|
@ -2762,6 +2767,191 @@ const aliases = [
|
|||
name: 'interviews',
|
||||
for: 'interview',
|
||||
},
|
||||
// censors, amateur allure
|
||||
{
|
||||
name: '2 big c---s',
|
||||
for: 'mfm',
|
||||
},
|
||||
{
|
||||
name: 'amateur b--w--bs',
|
||||
for: 'amateur blowjobs',
|
||||
},
|
||||
{
|
||||
name: 'a--l',
|
||||
for: 'anal',
|
||||
},
|
||||
{
|
||||
name: 'a-s',
|
||||
for: 'ass',
|
||||
},
|
||||
{
|
||||
name: 'b--l l--k',
|
||||
for: 'ball-licking',
|
||||
},
|
||||
{
|
||||
name: 'b--l s--k--g',
|
||||
for: 'ball-sucking',
|
||||
},
|
||||
{
|
||||
name: 'big a-s',
|
||||
for: 'big-butt',
|
||||
},
|
||||
{
|
||||
name: 'big t--s',
|
||||
for: 'big-boobs',
|
||||
},
|
||||
{
|
||||
name: 'b--w--b',
|
||||
for: 'blowjob',
|
||||
},
|
||||
{
|
||||
name: 'b--t p--g',
|
||||
for: 'anal-toy',
|
||||
},
|
||||
{
|
||||
name: 'c--k--g',
|
||||
for: 'choking',
|
||||
},
|
||||
{
|
||||
name: 'c--g--l',
|
||||
for: 'cowgirl',
|
||||
},
|
||||
{
|
||||
name: 'c--a--ie',
|
||||
for: 'creampie',
|
||||
},
|
||||
{
|
||||
name: 'c-m',
|
||||
for: 'cum',
|
||||
},
|
||||
{
|
||||
name: 'c-m in mouth',
|
||||
for: 'cum-in-mouth',
|
||||
},
|
||||
{
|
||||
name: 'c-m in mouth swallow',
|
||||
for: 'swallowing',
|
||||
},
|
||||
{
|
||||
name: 'c-m swallow',
|
||||
for: 'swallowing',
|
||||
},
|
||||
{
|
||||
name: 'c--s--t',
|
||||
for: 'cumshot',
|
||||
},
|
||||
{
|
||||
name: 'd--p -h--at',
|
||||
for: 'deepthroat',
|
||||
},
|
||||
{
|
||||
name: 'd--g--s--le',
|
||||
for: 'doggy-style',
|
||||
},
|
||||
{
|
||||
name: 'd--g--t--e',
|
||||
for: 'doggy-style',
|
||||
},
|
||||
{
|
||||
name: 'face f--k',
|
||||
for: 'facefucking',
|
||||
},
|
||||
{
|
||||
name: 'f----l',
|
||||
for: 'facial',
|
||||
},
|
||||
{
|
||||
name: 'f--g--i-g',
|
||||
for: 'fingering',
|
||||
},
|
||||
{
|
||||
name: 'f--r--me',
|
||||
for: 'foursome',
|
||||
},
|
||||
{
|
||||
name: 'free t--n sex',
|
||||
for: 'teen',
|
||||
},
|
||||
{
|
||||
name: 'girls giving b--w jobs',
|
||||
for: 'blowjob',
|
||||
},
|
||||
{
|
||||
name: 'g---y h--e',
|
||||
for: 'gloryhole',
|
||||
},
|
||||
{
|
||||
name: 'h--d--b',
|
||||
for: 'handjob',
|
||||
},
|
||||
{
|
||||
name: 'l--b--n',
|
||||
for: 'lesbian',
|
||||
},
|
||||
{
|
||||
name: 'm--f',
|
||||
for: 'milf',
|
||||
},
|
||||
{
|
||||
name: 'm--s--n--y',
|
||||
for: 'missionary',
|
||||
},
|
||||
{
|
||||
name: 'o--l',
|
||||
for: 'oral',
|
||||
},
|
||||
{
|
||||
name: 'o--l c--a--ie',
|
||||
for: 'oral-creampie',
|
||||
},
|
||||
{
|
||||
name: 'o--l sex',
|
||||
for: 'blowjob',
|
||||
},
|
||||
{
|
||||
name: 'o--y',
|
||||
for: 'orgy',
|
||||
},
|
||||
{
|
||||
name: 'p--n--a-s first ever scene',
|
||||
for: 'debut',
|
||||
},
|
||||
{
|
||||
name: 'p---y to mouth',
|
||||
for: 'pussy-to-mouth',
|
||||
},
|
||||
{
|
||||
name: 'reverse c--g--l',
|
||||
for: 'reverse-cowgirl',
|
||||
},
|
||||
{
|
||||
name: 'r----b',
|
||||
for: 'rimjob',
|
||||
},
|
||||
{
|
||||
name: 's--n--r',
|
||||
for: 'spinner',
|
||||
},
|
||||
{
|
||||
name: 'swallow c-m',
|
||||
for: 'swallowing',
|
||||
},
|
||||
{
|
||||
name: 't--n',
|
||||
for: 'teen',
|
||||
},
|
||||
{
|
||||
name: 't--n c-m swallowing videos',
|
||||
for: 'swallowing',
|
||||
},
|
||||
{
|
||||
name: 't--e--o-e',
|
||||
for: 'threesome',
|
||||
},
|
||||
{
|
||||
name: 't---y f--k',
|
||||
for: 'titty-fucking',
|
||||
},
|
||||
];
|
||||
|
||||
const priorities = [ // higher index is higher priority
|
||||
|
|
|
|||
|
|
@ -637,6 +637,7 @@ const sites = [
|
|||
upcoming: false,
|
||||
latest: 'https://www.amateurallure.com/tour/updates/page_%d.html',
|
||||
photos: 'https://www.amateurallure.com/tour/gallery.php',
|
||||
profile: 'https://www.amateurallure.com/tour/models',
|
||||
},
|
||||
parent: 'amateurallure',
|
||||
},
|
||||
|
|
@ -649,6 +650,7 @@ const sites = [
|
|||
upcoming: false,
|
||||
latest: 'https://www.swallowsalon.com/categories/movies_%d_d.html',
|
||||
photos: 'https://www.swallowsalon.com/gallery.php',
|
||||
profile: 'https://www.swallowsalon.com/models',
|
||||
},
|
||||
parent: 'amateurallure',
|
||||
},
|
||||
|
|
|
|||
|
|
@ -186,6 +186,10 @@ module.exports = {
|
|||
loveherfeet: loveherfilms,
|
||||
loveherboobs: loveherfilms,
|
||||
shelovesblack: loveherfilms,
|
||||
// julesjordan
|
||||
julesjordan,
|
||||
amateurallure: julesjordan, // different company, same scraper
|
||||
swallowsalon: julesjordan, // different company, same scraper
|
||||
// etc
|
||||
'18vr': badoink,
|
||||
theflourishxxx: theflourish,
|
||||
|
|
@ -216,7 +220,6 @@ module.exports = {
|
|||
hookuphotshot,
|
||||
inthecrack,
|
||||
jerkaoke: modelmedia,
|
||||
julesjordan,
|
||||
karups,
|
||||
kellymadison,
|
||||
'8kmembers': kellymadison,
|
||||
|
|
|
|||
|
|
@ -1,49 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const { fetchLatest, fetchScene } = require('./julesjordan');
|
||||
|
||||
function extractActors(scene) {
|
||||
const release = scene;
|
||||
|
||||
if (!scene.actors || scene.actors.length === 0) {
|
||||
const introActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (\w+ \w+)/i);
|
||||
const introTwoActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (?:(\w+)|(\w+ \w+)) and (\w+ \w+)/i);
|
||||
const returnActorMatches = scene.title.match(/(?:(^\w+)|(\w+ \w+))(?:,| (?:return|visit|pov|give|suck|lick|milk|love|enjoy|service|is))/i);
|
||||
const returnTwoActorMatches = scene.title.match(/(\w+ \w+) and (?:(\w+)|(\w+ \w+)) (?:return|visit|give|suck|lick|milk|love|enjoy|service|are)/i);
|
||||
|
||||
const rawActors = (introTwoActorMatches || introActorMatches || returnTwoActorMatches || returnActorMatches)?.slice(1);
|
||||
const actors = rawActors?.filter((actor) => {
|
||||
if (!actor) return false;
|
||||
if (/swallow|\bcum|fuck|suck|give|giving|take|takes|taking|head|teen|babe|cute|beaut|naughty|teacher|nanny|adorable|brunette|blonde|bust|audition|from|\band\b|\bto\b/i.test(actor)) return false;
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
if (actors) {
|
||||
release.actors = actors;
|
||||
}
|
||||
}
|
||||
|
||||
if (release.actors?.length > 1 || /threesome|threeway/.test(scene.title)) {
|
||||
release.tags = scene.tags ? [...scene.tags, 'mff'] : ['mff'];
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatestWrap(site, page = 1, include, preData) {
|
||||
const latest = await fetchLatest(site, page, include, preData);
|
||||
|
||||
return latest.map((scene) => extractActors(scene));
|
||||
}
|
||||
|
||||
async function fetchSceneWrap(url, channel, baseRelease, include) {
|
||||
const scene = await fetchScene(url, channel, baseRelease, include);
|
||||
|
||||
return extractActors(scene);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchLatestWrap,
|
||||
fetchScene: fetchSceneWrap,
|
||||
};
|
||||
|
|
@ -5,7 +5,6 @@ const Promise = require('bluebird');
|
|||
const unprint = require('unprint');
|
||||
|
||||
const argv = require('../argv');
|
||||
const qu = require('../utils/qu');
|
||||
const { heightToCm } = require('../utils/convert');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
|
|
@ -34,10 +33,11 @@ function getEntryIdFromTitle(release) {
|
|||
function scrapeAll(scenes, site, entryIdFromTitle) {
|
||||
return scenes.map(({ element, query }) => {
|
||||
const release = {};
|
||||
const title = query.content('.content_img div, .dvd_info > a, a.update_title, a[title] + a[title], .overlay-text') || query.content('a[title*=" "]');
|
||||
const title = query.content('.content_img div, .dvd_info > a, a.update_title, .update_title a, a[title] + a[title], .overlay-text')
|
||||
|| query.content('a[title*=" "]');
|
||||
|
||||
release.title = title?.slice(0, title.match(/starring:/i)?.index || Infinity).trim();
|
||||
release.url = query.url('.content_img a, .dvd_info > a, a.update_title, a[title]');
|
||||
release.url = query.url('.content_img a, .dvd_info > a, a.update_title, .update_title a, a[title]');
|
||||
release.date = query.date('.update_date', ['MM/DD/YYYY', 'YYYY-MM-DD']);
|
||||
|
||||
release.actors = query.all('.content_img .update_models a, .update_models a').map((actorEl) => ({
|
||||
|
|
@ -50,9 +50,9 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
|
|||
|
||||
[release.poster, ...release.photos] = dvdPhotos.length
|
||||
? dvdPhotos
|
||||
: Array.from({ length: photoCount }).map((value, index) => {
|
||||
: Array.from({ length: photoCount }).map((_value, index) => {
|
||||
const src = query.img('a img.thumbs', { attribute: `src${index}_1x` }) || query.img('a img.thumbs', { attribute: `src${index}` }) || query.img('a img.thumbs');
|
||||
const prefixedSrc = qu.prefixUrl(src, site.url);
|
||||
const prefixedSrc = unprint.prefixUrl(src, site.url);
|
||||
|
||||
if (src) {
|
||||
return Array.from(new Set([
|
||||
|
|
@ -81,12 +81,25 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
|
|||
|| query.element('.rating_box')?.dataset.id
|
||||
|| query.attribute('a img', 'id')?.match(/set-target-(\d+)/)?.[1];
|
||||
|
||||
console.log(release.entryId);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle = false) {
|
||||
const url = site.parameters?.latest
|
||||
? util.format(site.parameters.latest, page)
|
||||
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
||||
|
||||
// const res = await http.get(url);
|
||||
const res = await unprint.get(url, { selectAll: '.update_details, .grid-item' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, site, typeof site.parameters?.entryIdFromTitle === 'boolean' ? site.parameters.entryIdFromTitle : entryIdFromTitle);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeUpcoming(scenes, channel) {
|
||||
return scenes.map(({ query, html }) => {
|
||||
const release = {};
|
||||
|
|
@ -110,6 +123,19 @@ function scrapeUpcoming(scenes, channel) {
|
|||
});
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
if (site.parameters?.upcoming === false) return null;
|
||||
|
||||
const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`;
|
||||
const res = await unprint.get(url, { selectAll: '//img[contains(@alt, "Coming Soon")]/ancestor::div' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeUpcoming(res.context, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function extractLegacyTrailer(html, context) {
|
||||
const trailerLines = html.split('\n').filter((line) => /movie\["trailer\w*"\]\[/i.test(line));
|
||||
|
||||
|
|
@ -206,6 +232,10 @@ async function scrapeScene({ html, query }, context) {
|
|||
release.trailer = extractLegacyTrailer(html, context);
|
||||
}
|
||||
|
||||
if (release.trailer?.includes('_sfw')) {
|
||||
release.trailer = null;
|
||||
}
|
||||
|
||||
// release.photos = async () => await getPhotos(release.entryId, context.entity); // probably no longer works on any site
|
||||
if (argv.jjFullPhotos) {
|
||||
release.photos = getPhotos(query, release, context);
|
||||
|
|
@ -216,7 +246,8 @@ async function scrapeScene({ html, query }, context) {
|
|||
release.photos = [
|
||||
...context.baseRelease?.photos?.map((sources) => sources.at(-1).src) || [],
|
||||
...query.imgs('#images img'),
|
||||
].map((source) => Array.from(new Set([
|
||||
...query.imgs('img.update_thumb', { attribute: 'src0_1x' }),
|
||||
].filter(Boolean).map((source) => Array.from(new Set([
|
||||
source.replace(/.jpg$/, '-full.jpg'),
|
||||
source.replace(/-1x.jpg$/, '-4x.jpg'),
|
||||
source.replace(/-1x.jpg$/, '-2x.jpg'),
|
||||
|
|
@ -278,10 +309,11 @@ function scrapeMovie({ query }, { url }) {
|
|||
return movie;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, url, name, entity) {
|
||||
function scrapeProfile({ query }, url, entity) {
|
||||
const profile = { url };
|
||||
|
||||
profile.description = query.content('//comment()[contains(., " Bio Extra Field ")]/following-sibling::span'); // the spaces are important to avoid selecting a similar comment
|
||||
profile.description = query.content('//comment()[contains(., " Bio Extra Field ")]/following-sibling::span') // the spaces are important to avoid selecting a similar comment
|
||||
|| query.content('//comment()[contains(., " Bio Extra Field ")]/following-sibling::text()');
|
||||
|
||||
profile.height = heightToCm(query.content('//span[contains(text(), "Height")]/following-sibling::span'));
|
||||
profile.measurements = query.content('//span[contains(text(), "Measurements")]/following-sibling::span');
|
||||
|
|
@ -300,41 +332,18 @@ function scrapeProfile({ query }, url, name, entity) {
|
|||
query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src0_1x' }),
|
||||
query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src0' }),
|
||||
query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src' }),
|
||||
].filter(Boolean);
|
||||
// ...query.sourceSet('.model_bio_pic img, .model_bio_thumb', { origin: entity.url }),
|
||||
].filter(Boolean).map((src) => ({
|
||||
src,
|
||||
referer: entity.url,
|
||||
verifyType: 'image',
|
||||
}));
|
||||
|
||||
profile.scenes = scrapeAll(unprint.initAll(query.all('.grid-item')), entity, true);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle = false) {
|
||||
const url = site.parameters?.latest
|
||||
? util.format(site.parameters.latest, page)
|
||||
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
||||
|
||||
// const res = await http.get(url);
|
||||
const res = await unprint.get(url, { selectAll: '.update_details, .grid-item' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, site, typeof site.parameters?.entryIdFromTitle === 'boolean' ? site.parameters.entryIdFromTitle : entryIdFromTitle);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
if (site.parameters?.upcoming === false) return null;
|
||||
|
||||
const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`;
|
||||
const res = await unprint.get(url, { selectAll: '//img[contains(@alt, "Coming Soon")]/ancestor::div' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeUpcoming(res.context, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName, url }, entity) {
|
||||
const actorSlugA = slugify(actorName, '');
|
||||
const actorSlugB = slugify(actorName, '-');
|
||||
|
|
@ -356,10 +365,12 @@ async function fetchProfile({ name: actorName, url }, entity) {
|
|||
return null;
|
||||
}
|
||||
|
||||
const res = await unprint.get(profileUrl);
|
||||
const res = await unprint.get(profileUrl, {
|
||||
followRedirects: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.context, profileUrl, actorName, entity);
|
||||
return scrapeProfile(res.context, profileUrl, entity);
|
||||
}
|
||||
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ const adultempire = require('./adultempire');
|
|||
const angelogodshackoriginal = require('./angelogodshackoriginal');
|
||||
// const archangel = require('./archangel');
|
||||
const assylum = require('./assylum');
|
||||
const amateurallure = require('./amateurallure');
|
||||
const americanpornstar = require('./americanpornstar');
|
||||
const amnesiac = require('./amnesiac');
|
||||
const aziani = require('./aziani');
|
||||
|
|
@ -93,7 +92,7 @@ module.exports = {
|
|||
// daringsex,
|
||||
// arch angel
|
||||
// etc
|
||||
amateurallure,
|
||||
amateurallure: julesjordan,
|
||||
americanpornstar,
|
||||
amateureuro: porndoe,
|
||||
amnesiac,
|
||||
|
|
|
|||
|
|
@ -195,11 +195,14 @@ const actors = [
|
|||
// naughty america
|
||||
{ entity: 'naughtyamerica', name: 'Nicole Aniston', fields: ['avatar', 'description'] },
|
||||
{ entity: 'tonightsgirlfriend', name: 'Abella Danger', fields: ['avatar'] },
|
||||
// jules jordan scraper
|
||||
{ entity: 'julesjordan', name: 'Vanna Bardot', fields: ['height', 'dateOfBirth', 'measurements', 'description', 'avatar'] },
|
||||
{ entity: 'amateurallure', name: 'Ava Amira', fields: ['avatar', 'description'] },
|
||||
{ entity: 'swallowsalon', name: 'Abella Danger', fields: ['avatar'] },
|
||||
// etc.
|
||||
{ entity: 'analvids', name: 'Veronica Leal', fields: ['avatar', 'gender', 'birthCountry', 'nationality', 'age', 'aliases', 'nationality'] },
|
||||
{ entity: 'bangbros', name: 'Kira Perez', fields: ['avatar', 'gender', 'ethnicity', 'hairColor'] },
|
||||
{ entity: 'hookuphotshot', name: 'Kenzie Reeves', fields: ['avatar', 'description'] },
|
||||
{ entity: 'julesjordan', name: 'Vanna Bardot', fields: ['height', 'dateOfBirth', 'measurements', 'description', 'avatar'] },
|
||||
{ entity: 'pornworld', name: 'Veronica Leal', fields: ['avatar', 'nationality', 'age'] },
|
||||
{ entity: 'private', name: 'Cherry Kiss', fields: ['avatar', 'description', 'nationality', 'measurements', 'height', 'weight', 'hairColor', 'eye', 'hasTattoos', 'tattoos', 'hasPiercings', 'piercings'] },
|
||||
{ entity: 'teenmegaworld', name: 'Sheri Vi', fields: ['avatar', 'description', 'hairColor', 'eyes'] },
|
||||
|
|
@ -235,7 +238,11 @@ async function validateUrl(url, mime = 'image/') {
|
|||
return false;
|
||||
}
|
||||
|
||||
const res = await fetch(href);
|
||||
const res = await fetch(href, {
|
||||
headers: {
|
||||
Referer: url.referer || new URL(href).origin,
|
||||
},
|
||||
});
|
||||
|
||||
const type = res.headers.get('content-type');
|
||||
const resolvedType = url.expectType?.[type] || type || 'image/jpeg';
|
||||
|
|
|
|||
Loading…
Reference in New Issue