Splitting Han titles and actors in Model Media scraper.
|
@ -35,7 +35,10 @@
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<div class="info column">
|
<div class="info column">
|
||||||
<div class="row row-title">
|
<div
|
||||||
|
class="row row-title"
|
||||||
|
:class="{ 'has-alt': release.altTitles?.length > 0 }"
|
||||||
|
>
|
||||||
<h2
|
<h2
|
||||||
v-if="release.title"
|
v-if="release.title"
|
||||||
class="title"
|
class="title"
|
||||||
|
@ -62,6 +65,19 @@
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div
|
||||||
|
v-if="release.altTitles?.length > 0"
|
||||||
|
class="row alttitles"
|
||||||
|
>
|
||||||
|
<h2
|
||||||
|
v-for="(altTitle, index) in release.altTitles"
|
||||||
|
:key="`altitle-${index}`"
|
||||||
|
class="alttitle"
|
||||||
|
>
|
||||||
|
{{ altTitle }}
|
||||||
|
</h2>
|
||||||
|
</div>
|
||||||
|
|
||||||
<Releases
|
<Releases
|
||||||
v-if="release.scenes && release.scenes.length > 0"
|
v-if="release.scenes && release.scenes.length > 0"
|
||||||
:releases="release.scenes"
|
:releases="release.scenes"
|
||||||
|
@ -520,6 +536,11 @@ export default {
|
||||||
color: var(--shadow);
|
color: var(--shadow);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.alttitle {
|
||||||
|
color: var(--shadow);
|
||||||
|
font-size: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
.album-toggle {
|
.album-toggle {
|
||||||
height: fit-content;
|
height: fit-content;
|
||||||
display: inline-flex;
|
display: inline-flex;
|
||||||
|
|
|
@ -545,6 +545,7 @@ const releaseFragment = `
|
||||||
release(id: $releaseId) {
|
release(id: $releaseId) {
|
||||||
id
|
id
|
||||||
title
|
title
|
||||||
|
altTitles
|
||||||
description
|
description
|
||||||
date
|
date
|
||||||
datePrecision
|
datePrecision
|
||||||
|
|
|
@ -30,6 +30,9 @@ module.exports = {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
location: {
|
||||||
|
userAgent: 'contact via https://traxxx.me/',
|
||||||
|
},
|
||||||
analytics: {
|
analytics: {
|
||||||
enabled: false,
|
enabled: false,
|
||||||
address: 'http://localhost:3000/script.js',
|
address: 'http://localhost:3000/script.js',
|
||||||
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
exports.up = async (knex) => {
|
||||||
|
await knex.schema.alterTable('releases', (table) => {
|
||||||
|
table.specificType('alt_titles', 'text ARRAY');
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
exports.down = async (knex) => {
|
||||||
|
await knex.schema.alterTable('releases', (table) => {
|
||||||
|
table.dropColumn('alt_titles');
|
||||||
|
});
|
||||||
|
};
|
After Width: | Height: | Size: 1.1 KiB |
After Width: | Height: | Size: 1.1 KiB |
After Width: | Height: | Size: 1.1 KiB |
After Width: | Height: | Size: 2.1 KiB |
After Width: | Height: | Size: 2.1 KiB |
After Width: | Height: | Size: 2.1 KiB |
Before Width: | Height: | Size: 4.0 KiB After Width: | Height: | Size: 4.0 KiB |
Before Width: | Height: | Size: 4.0 KiB After Width: | Height: | Size: 4.0 KiB |
After Width: | Height: | Size: 2.0 KiB |
After Width: | Height: | Size: 2.1 KiB |
After Width: | Height: | Size: 2.1 KiB |
After Width: | Height: | Size: 2.1 KiB |
Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 12 KiB |
Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 12 KiB |
|
@ -14,10 +14,38 @@ function scrapeAll(scenes) {
|
||||||
const { origin, pathname, searchParams } = new URL(url);
|
const { origin, pathname, searchParams } = new URL(url);
|
||||||
|
|
||||||
release.url = `${origin}${pathname}`;
|
release.url = `${origin}${pathname}`;
|
||||||
release.actors = searchParams.get('models_name')?.split(',');
|
release.shootId = pathname.match(/((LA)|(LT)|(MA)|(MD)|(MM)|(MS)|(MT)|(RR))[\w-]+/)?.[0]; // pathname sometimes contains other text, match at least two letters to prevent false positives
|
||||||
|
|
||||||
|
release.actors = searchParams.get('models_name')?.split(',').map((actor) => {
|
||||||
|
const [han, english] = actor.split('/').map((name) => name.trim());
|
||||||
|
|
||||||
|
if (/amateur/i.test(english)) {
|
||||||
|
// not a name
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: english || han,
|
||||||
|
alias: english && han,
|
||||||
|
};
|
||||||
|
}).filter(Boolean);
|
||||||
|
}
|
||||||
|
|
||||||
|
const rawTitle = query.content('.video-title div')?.replace(release.shootId, '');
|
||||||
|
|
||||||
|
if (rawTitle) {
|
||||||
|
// find / closest to Han in case there are multiple, account for no / at all
|
||||||
|
const hanIndex = rawTitle.match(/\p{Script_Extensions=Han}/u)?.index;
|
||||||
|
const splitIndex = rawTitle.slice(0, hanIndex).lastIndexOf('/') || hanIndex;
|
||||||
|
|
||||||
|
if (hanIndex && splitIndex > -1) {
|
||||||
|
release.title = rawTitle.slice(0, splitIndex).trim();
|
||||||
|
release.altTitles = [rawTitle.slice(splitIndex + 1).trim()];
|
||||||
|
} else {
|
||||||
|
release.title = rawTitle;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
release.title = query.content('.video-title div');
|
|
||||||
release.duration = query.duration('.timestamp');
|
release.duration = query.duration('.timestamp');
|
||||||
|
|
||||||
const poster = query.img('img', { attribute: 'data-src' });
|
const poster = query.img('img', { attribute: 'data-src' });
|
||||||
|
@ -31,8 +59,6 @@ function scrapeAll(scenes) {
|
||||||
|
|
||||||
release.teaser = query.video(null, { attribute: 'data-video-src' });
|
release.teaser = query.video(null, { attribute: 'data-video-src' });
|
||||||
|
|
||||||
console.log(release);
|
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -49,17 +75,16 @@ function scrapeProfile({ query }) {
|
||||||
}
|
}
|
||||||
|
|
||||||
profile.description = query.content('h2') || null;
|
profile.description = query.content('h2') || null;
|
||||||
profile.height = query.number('//span[text()="Measurements"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
|
profile.height = query.number('//span[text()="Height"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
|
||||||
profile.weight = query.number('//span[text()="Weight"]/following-sibling::span', { match: /(\d+) kg/, matchIndex: 1 });
|
profile.weight = query.number('//span[text()="Weight"]/following-sibling::span', { match: /(\d+) kg/, matchIndex: 1 });
|
||||||
|
|
||||||
profile.measurements = query.number('//span[text()="Birth Place"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
|
// can't find a single profile wiht this information available, but add for good measure
|
||||||
|
profile.measurements = query.content('//span[text()="Measurements"]/following-sibling::span');
|
||||||
profile.birthPlace = query.number('//span[text()="Birth Place"]/following-sibling::span');
|
profile.birthPlace = query.number('//span[text()="Birth Place"]/following-sibling::span');
|
||||||
|
|
||||||
profile.banner = query.img('div[class*="banner"] > img');
|
profile.banner = query.img('div[class*="banner"] > img');
|
||||||
profile.photos = query.imgs('#MusModelSwiper img');
|
profile.photos = query.imgs('#MusModelSwiper img');
|
||||||
|
|
||||||
console.log(profile);
|
|
||||||
|
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -82,6 +82,7 @@ const scrapers = {
|
||||||
americanpornstar,
|
americanpornstar,
|
||||||
amateureuro: porndoe,
|
amateureuro: porndoe,
|
||||||
archangel,
|
archangel,
|
||||||
|
asiam: modelmedia,
|
||||||
assylum,
|
assylum,
|
||||||
aziani,
|
aziani,
|
||||||
badoink,
|
badoink,
|
||||||
|
@ -115,6 +116,7 @@ const scrapers = {
|
||||||
interracialpass: hush,
|
interracialpass: hush,
|
||||||
inthecrack,
|
inthecrack,
|
||||||
jayrock,
|
jayrock,
|
||||||
|
jerkaoke: modelmedia,
|
||||||
jesseloadsmonsterfacials,
|
jesseloadsmonsterfacials,
|
||||||
julesjordan,
|
julesjordan,
|
||||||
karups,
|
karups,
|
||||||
|
@ -178,6 +180,7 @@ const scrapers = {
|
||||||
analviolation: fullpornnetwork,
|
analviolation: fullpornnetwork,
|
||||||
anilos: nubiles,
|
anilos: nubiles,
|
||||||
archangel,
|
archangel,
|
||||||
|
asiam: modelmedia,
|
||||||
aziani,
|
aziani,
|
||||||
babes: mindgeek,
|
babes: mindgeek,
|
||||||
babevr: badoink,
|
babevr: badoink,
|
||||||
|
@ -234,6 +237,7 @@ const scrapers = {
|
||||||
interracialpovs: hush,
|
interracialpovs: hush,
|
||||||
inthecrack,
|
inthecrack,
|
||||||
jamesdeen: fullpornnetwork,
|
jamesdeen: fullpornnetwork,
|
||||||
|
jerkaoke: modelmedia,
|
||||||
julesjordan,
|
julesjordan,
|
||||||
karups,
|
karups,
|
||||||
kellymadison,
|
kellymadison,
|
||||||
|
|
|
@ -32,6 +32,7 @@ async function curateReleaseEntry(release, batchId, existingRelease, type = 'sce
|
||||||
|
|
||||||
const curatedRelease = {
|
const curatedRelease = {
|
||||||
title: decode(release.title),
|
title: decode(release.title),
|
||||||
|
alt_titles: release.altTitles?.map((title) => decode(title)),
|
||||||
entry_id: release.entryId || null,
|
entry_id: release.entryId || null,
|
||||||
entity_id: release.entity.id,
|
entity_id: release.entity.id,
|
||||||
studio_id: release.studio?.id || null,
|
studio_id: release.studio?.id || null,
|
||||||
|
@ -46,6 +47,8 @@ async function curateReleaseEntry(release, batchId, existingRelease, type = 'sce
|
||||||
updated_batch_id: batchId,
|
updated_batch_id: batchId,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
console.log(curatedRelease);
|
||||||
|
|
||||||
if (release.id) {
|
if (release.id) {
|
||||||
// release is updated
|
// release is updated
|
||||||
curatedRelease.id = release.id;
|
curatedRelease.id = release.id;
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
|
const config = require('config');
|
||||||
|
|
||||||
const knex = require('../knex');
|
const knex = require('../knex');
|
||||||
const logger = require('../logger')(__filename);
|
const logger = require('../logger')(__filename);
|
||||||
const http = require('./http');
|
const http = require('./http');
|
||||||
|
@ -27,7 +29,7 @@ async function resolvePlace(query) {
|
||||||
// https://operations.osmfoundation.org/policies/nominatim/
|
// https://operations.osmfoundation.org/policies/nominatim/
|
||||||
const res = await http.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`, {
|
const res = await http.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`, {
|
||||||
headers: {
|
headers: {
|
||||||
'User-Agent': 'contact at moonloop.adult@protonmail.com',
|
'User-Agent': config.location.userAgent,
|
||||||
},
|
},
|
||||||
interval: 1000,
|
interval: 1000,
|
||||||
concurrency: 1,
|
concurrency: 1,
|
||||||
|
|