forked from DebaucheryLibrarian/traxxx
Splitting Han titles and actors in Model Media scraper.
This commit is contained in:
@@ -14,10 +14,38 @@ function scrapeAll(scenes) {
|
||||
const { origin, pathname, searchParams } = new URL(url);
|
||||
|
||||
release.url = `${origin}${pathname}`;
|
||||
release.actors = searchParams.get('models_name')?.split(',');
|
||||
release.shootId = pathname.match(/((LA)|(LT)|(MA)|(MD)|(MM)|(MS)|(MT)|(RR))[\w-]+/)?.[0]; // pathname sometimes contains other text, match at least two letters to prevent false positives
|
||||
|
||||
release.actors = searchParams.get('models_name')?.split(',').map((actor) => {
|
||||
const [han, english] = actor.split('/').map((name) => name.trim());
|
||||
|
||||
if (/amateur/i.test(english)) {
|
||||
// not a name
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
name: english || han,
|
||||
alias: english && han,
|
||||
};
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
const rawTitle = query.content('.video-title div')?.replace(release.shootId, '');
|
||||
|
||||
if (rawTitle) {
|
||||
// find / closest to Han in case there are multiple, account for no / at all
|
||||
const hanIndex = rawTitle.match(/\p{Script_Extensions=Han}/u)?.index;
|
||||
const splitIndex = rawTitle.slice(0, hanIndex).lastIndexOf('/') || hanIndex;
|
||||
|
||||
if (hanIndex && splitIndex > -1) {
|
||||
release.title = rawTitle.slice(0, splitIndex).trim();
|
||||
release.altTitles = [rawTitle.slice(splitIndex + 1).trim()];
|
||||
} else {
|
||||
release.title = rawTitle;
|
||||
}
|
||||
}
|
||||
|
||||
release.title = query.content('.video-title div');
|
||||
release.duration = query.duration('.timestamp');
|
||||
|
||||
const poster = query.img('img', { attribute: 'data-src' });
|
||||
@@ -31,8 +59,6 @@ function scrapeAll(scenes) {
|
||||
|
||||
release.teaser = query.video(null, { attribute: 'data-video-src' });
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
@@ -49,17 +75,16 @@ function scrapeProfile({ query }) {
|
||||
}
|
||||
|
||||
profile.description = query.content('h2') || null;
|
||||
profile.height = query.number('//span[text()="Measurements"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
|
||||
profile.height = query.number('//span[text()="Height"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
|
||||
profile.weight = query.number('//span[text()="Weight"]/following-sibling::span', { match: /(\d+) kg/, matchIndex: 1 });
|
||||
|
||||
profile.measurements = query.number('//span[text()="Birth Place"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
|
||||
// can't find a single profile wiht this information available, but add for good measure
|
||||
profile.measurements = query.content('//span[text()="Measurements"]/following-sibling::span');
|
||||
profile.birthPlace = query.number('//span[text()="Birth Place"]/following-sibling::span');
|
||||
|
||||
profile.banner = query.img('div[class*="banner"] > img');
|
||||
profile.photos = query.imgs('#MusModelSwiper img');
|
||||
|
||||
console.log(profile);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
|
||||
@@ -82,6 +82,7 @@ const scrapers = {
|
||||
americanpornstar,
|
||||
amateureuro: porndoe,
|
||||
archangel,
|
||||
asiam: modelmedia,
|
||||
assylum,
|
||||
aziani,
|
||||
badoink,
|
||||
@@ -115,6 +116,7 @@ const scrapers = {
|
||||
interracialpass: hush,
|
||||
inthecrack,
|
||||
jayrock,
|
||||
jerkaoke: modelmedia,
|
||||
jesseloadsmonsterfacials,
|
||||
julesjordan,
|
||||
karups,
|
||||
@@ -178,6 +180,7 @@ const scrapers = {
|
||||
analviolation: fullpornnetwork,
|
||||
anilos: nubiles,
|
||||
archangel,
|
||||
asiam: modelmedia,
|
||||
aziani,
|
||||
babes: mindgeek,
|
||||
babevr: badoink,
|
||||
@@ -234,6 +237,7 @@ const scrapers = {
|
||||
interracialpovs: hush,
|
||||
inthecrack,
|
||||
jamesdeen: fullpornnetwork,
|
||||
jerkaoke: modelmedia,
|
||||
julesjordan,
|
||||
karups,
|
||||
kellymadison,
|
||||
|
||||
@@ -32,6 +32,7 @@ async function curateReleaseEntry(release, batchId, existingRelease, type = 'sce
|
||||
|
||||
const curatedRelease = {
|
||||
title: decode(release.title),
|
||||
alt_titles: release.altTitles?.map((title) => decode(title)),
|
||||
entry_id: release.entryId || null,
|
||||
entity_id: release.entity.id,
|
||||
studio_id: release.studio?.id || null,
|
||||
@@ -46,6 +47,8 @@ async function curateReleaseEntry(release, batchId, existingRelease, type = 'sce
|
||||
updated_batch_id: batchId,
|
||||
};
|
||||
|
||||
console.log(curatedRelease);
|
||||
|
||||
if (release.id) {
|
||||
// release is updated
|
||||
curatedRelease.id = release.id;
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
|
||||
const knex = require('../knex');
|
||||
const logger = require('../logger')(__filename);
|
||||
const http = require('./http');
|
||||
@@ -27,7 +29,7 @@ async function resolvePlace(query) {
|
||||
// https://operations.osmfoundation.org/policies/nominatim/
|
||||
const res = await http.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`, {
|
||||
headers: {
|
||||
'User-Agent': 'contact at moonloop.adult@protonmail.com',
|
||||
'User-Agent': config.location.userAgent,
|
||||
},
|
||||
interval: 1000,
|
||||
concurrency: 1,
|
||||
|
||||
Reference in New Issue
Block a user