Splitting Han titles and actors in Model Media scraper.
|
@ -35,7 +35,10 @@
|
|||
/>
|
||||
|
||||
<div class="info column">
|
||||
<div class="row row-title">
|
||||
<div
|
||||
class="row row-title"
|
||||
:class="{ 'has-alt': release.altTitles?.length > 0 }"
|
||||
>
|
||||
<h2
|
||||
v-if="release.title"
|
||||
class="title"
|
||||
|
@ -62,6 +65,19 @@
|
|||
/>
|
||||
</div>
|
||||
|
||||
<div
|
||||
v-if="release.altTitles?.length > 0"
|
||||
class="row alttitles"
|
||||
>
|
||||
<h2
|
||||
v-for="(altTitle, index) in release.altTitles"
|
||||
:key="`altitle-${index}`"
|
||||
class="alttitle"
|
||||
>
|
||||
{{ altTitle }}
|
||||
</h2>
|
||||
</div>
|
||||
|
||||
<Releases
|
||||
v-if="release.scenes && release.scenes.length > 0"
|
||||
:releases="release.scenes"
|
||||
|
@ -520,6 +536,11 @@ export default {
|
|||
color: var(--shadow);
|
||||
}
|
||||
|
||||
.alttitle {
|
||||
color: var(--shadow);
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
.album-toggle {
|
||||
height: fit-content;
|
||||
display: inline-flex;
|
||||
|
|
|
@ -545,6 +545,7 @@ const releaseFragment = `
|
|||
release(id: $releaseId) {
|
||||
id
|
||||
title
|
||||
altTitles
|
||||
description
|
||||
date
|
||||
datePrecision
|
||||
|
|
|
@ -30,6 +30,9 @@ module.exports = {
|
|||
},
|
||||
},
|
||||
},
|
||||
location: {
|
||||
userAgent: 'contact via https://traxxx.me/',
|
||||
},
|
||||
analytics: {
|
||||
enabled: false,
|
||||
address: 'http://localhost:3000/script.js',
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
exports.up = async (knex) => {
|
||||
await knex.schema.alterTable('releases', (table) => {
|
||||
table.specificType('alt_titles', 'text ARRAY');
|
||||
});
|
||||
};
|
||||
|
||||
exports.down = async (knex) => {
|
||||
await knex.schema.alterTable('releases', (table) => {
|
||||
table.dropColumn('alt_titles');
|
||||
});
|
||||
};
|
After Width: | Height: | Size: 1.1 KiB |
After Width: | Height: | Size: 1.1 KiB |
After Width: | Height: | Size: 1.1 KiB |
After Width: | Height: | Size: 2.1 KiB |
After Width: | Height: | Size: 2.1 KiB |
After Width: | Height: | Size: 2.1 KiB |
Before Width: | Height: | Size: 4.0 KiB After Width: | Height: | Size: 4.0 KiB |
Before Width: | Height: | Size: 4.0 KiB After Width: | Height: | Size: 4.0 KiB |
After Width: | Height: | Size: 2.0 KiB |
After Width: | Height: | Size: 2.1 KiB |
After Width: | Height: | Size: 2.1 KiB |
After Width: | Height: | Size: 2.1 KiB |
Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 12 KiB |
Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 12 KiB |
|
@ -14,10 +14,38 @@ function scrapeAll(scenes) {
|
|||
const { origin, pathname, searchParams } = new URL(url);
|
||||
|
||||
release.url = `${origin}${pathname}`;
|
||||
release.actors = searchParams.get('models_name')?.split(',');
|
||||
release.shootId = pathname.match(/((LA)|(LT)|(MA)|(MD)|(MM)|(MS)|(MT)|(RR))[\w-]+/)?.[0]; // pathname sometimes contains other text, match at least two letters to prevent false positives
|
||||
|
||||
release.actors = searchParams.get('models_name')?.split(',').map((actor) => {
|
||||
const [han, english] = actor.split('/').map((name) => name.trim());
|
||||
|
||||
if (/amateur/i.test(english)) {
|
||||
// not a name
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
name: english || han,
|
||||
alias: english && han,
|
||||
};
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
const rawTitle = query.content('.video-title div')?.replace(release.shootId, '');
|
||||
|
||||
if (rawTitle) {
|
||||
// find / closest to Han in case there are multiple, account for no / at all
|
||||
const hanIndex = rawTitle.match(/\p{Script_Extensions=Han}/u)?.index;
|
||||
const splitIndex = rawTitle.slice(0, hanIndex).lastIndexOf('/') || hanIndex;
|
||||
|
||||
if (hanIndex && splitIndex > -1) {
|
||||
release.title = rawTitle.slice(0, splitIndex).trim();
|
||||
release.altTitles = [rawTitle.slice(splitIndex + 1).trim()];
|
||||
} else {
|
||||
release.title = rawTitle;
|
||||
}
|
||||
}
|
||||
|
||||
release.title = query.content('.video-title div');
|
||||
release.duration = query.duration('.timestamp');
|
||||
|
||||
const poster = query.img('img', { attribute: 'data-src' });
|
||||
|
@ -31,8 +59,6 @@ function scrapeAll(scenes) {
|
|||
|
||||
release.teaser = query.video(null, { attribute: 'data-video-src' });
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
@ -49,17 +75,16 @@ function scrapeProfile({ query }) {
|
|||
}
|
||||
|
||||
profile.description = query.content('h2') || null;
|
||||
profile.height = query.number('//span[text()="Measurements"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
|
||||
profile.height = query.number('//span[text()="Height"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
|
||||
profile.weight = query.number('//span[text()="Weight"]/following-sibling::span', { match: /(\d+) kg/, matchIndex: 1 });
|
||||
|
||||
profile.measurements = query.number('//span[text()="Birth Place"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
|
||||
// can't find a single profile wiht this information available, but add for good measure
|
||||
profile.measurements = query.content('//span[text()="Measurements"]/following-sibling::span');
|
||||
profile.birthPlace = query.number('//span[text()="Birth Place"]/following-sibling::span');
|
||||
|
||||
profile.banner = query.img('div[class*="banner"] > img');
|
||||
profile.photos = query.imgs('#MusModelSwiper img');
|
||||
|
||||
console.log(profile);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
|
|
|
@ -82,6 +82,7 @@ const scrapers = {
|
|||
americanpornstar,
|
||||
amateureuro: porndoe,
|
||||
archangel,
|
||||
asiam: modelmedia,
|
||||
assylum,
|
||||
aziani,
|
||||
badoink,
|
||||
|
@ -115,6 +116,7 @@ const scrapers = {
|
|||
interracialpass: hush,
|
||||
inthecrack,
|
||||
jayrock,
|
||||
jerkaoke: modelmedia,
|
||||
jesseloadsmonsterfacials,
|
||||
julesjordan,
|
||||
karups,
|
||||
|
@ -178,6 +180,7 @@ const scrapers = {
|
|||
analviolation: fullpornnetwork,
|
||||
anilos: nubiles,
|
||||
archangel,
|
||||
asiam: modelmedia,
|
||||
aziani,
|
||||
babes: mindgeek,
|
||||
babevr: badoink,
|
||||
|
@ -234,6 +237,7 @@ const scrapers = {
|
|||
interracialpovs: hush,
|
||||
inthecrack,
|
||||
jamesdeen: fullpornnetwork,
|
||||
jerkaoke: modelmedia,
|
||||
julesjordan,
|
||||
karups,
|
||||
kellymadison,
|
||||
|
|
|
@ -32,6 +32,7 @@ async function curateReleaseEntry(release, batchId, existingRelease, type = 'sce
|
|||
|
||||
const curatedRelease = {
|
||||
title: decode(release.title),
|
||||
alt_titles: release.altTitles?.map((title) => decode(title)),
|
||||
entry_id: release.entryId || null,
|
||||
entity_id: release.entity.id,
|
||||
studio_id: release.studio?.id || null,
|
||||
|
@ -46,6 +47,8 @@ async function curateReleaseEntry(release, batchId, existingRelease, type = 'sce
|
|||
updated_batch_id: batchId,
|
||||
};
|
||||
|
||||
console.log(curatedRelease);
|
||||
|
||||
if (release.id) {
|
||||
// release is updated
|
||||
curatedRelease.id = release.id;
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
|
||||
const knex = require('../knex');
|
||||
const logger = require('../logger')(__filename);
|
||||
const http = require('./http');
|
||||
|
@ -27,7 +29,7 @@ async function resolvePlace(query) {
|
|||
// https://operations.osmfoundation.org/policies/nominatim/
|
||||
const res = await http.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`, {
|
||||
headers: {
|
||||
'User-Agent': 'contact at moonloop.adult@protonmail.com',
|
||||
'User-Agent': config.location.userAgent,
|
||||
},
|
||||
interval: 1000,
|
||||
concurrency: 1,
|
||||
|
|