Improved Woodman scraper, added profiles.
|
@ -179,7 +179,6 @@ module.exports = {
|
||||||
'gangbangcreampie',
|
'gangbangcreampie',
|
||||||
'gloryholesecrets',
|
'gloryholesecrets',
|
||||||
'aziani',
|
'aziani',
|
||||||
'legalporno',
|
|
||||||
[
|
[
|
||||||
'firstanalquest',
|
'firstanalquest',
|
||||||
'doubleviewcasting',
|
'doubleviewcasting',
|
||||||
|
@ -188,6 +187,8 @@ module.exports = {
|
||||||
'silverstonedvd',
|
'silverstonedvd',
|
||||||
'silviasaint',
|
'silviasaint',
|
||||||
],
|
],
|
||||||
|
'legalporno',
|
||||||
|
'pierrewoodman',
|
||||||
'score',
|
'score',
|
||||||
'boobpedia',
|
'boobpedia',
|
||||||
'pornhub',
|
'pornhub',
|
||||||
|
|
After Width: | Height: | Size: 1.1 KiB |
After Width: | Height: | Size: 1.1 KiB |
After Width: | Height: | Size: 1.1 KiB |
After Width: | Height: | Size: 9.7 KiB |
After Width: | Height: | Size: 3.2 KiB |
After Width: | Height: | Size: 3.6 KiB |
After Width: | Height: | Size: 19 KiB |
After Width: | Height: | Size: 228 KiB |
After Width: | Height: | Size: 68 KiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 40 KiB |
After Width: | Height: | Size: 22 KiB |
After Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 21 KiB |
After Width: | Height: | Size: 30 KiB |
|
@ -519,7 +519,7 @@ const networks = [
|
||||||
{
|
{
|
||||||
slug: 'pierrewoodman',
|
slug: 'pierrewoodman',
|
||||||
name: 'Pierre Woodman',
|
name: 'Pierre Woodman',
|
||||||
url: 'http://www.pierrewoodman.com',
|
url: 'http://www.woodmancastingx.com',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'xempire',
|
slug: 'xempire',
|
||||||
|
|
|
@ -0,0 +1,153 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const qu = require('../utils/qu');
|
||||||
|
const slugify = require('../utils/slugify');
|
||||||
|
const capitalize = require('../utils/capitalize');
|
||||||
|
|
||||||
|
function removeImageBorder(source) {
|
||||||
|
if (!source) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const sourceNoId = source.replace(/_[a-z0-9]+(_v\d)?\.jpg/, '.jpg');
|
||||||
|
|
||||||
|
return [
|
||||||
|
sourceNoId
|
||||||
|
.replace(/actoravatar_/, 'actoravatarnoborder_')
|
||||||
|
.replace(/scenedefault/, 'scenenoborder'),
|
||||||
|
sourceNoId,
|
||||||
|
source,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
function mapActor(actorEl, query, entity) {
|
||||||
|
const avatar = query.img(actorEl);
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: capitalize(query.cnt(actorEl, '.name, .informations p'), { uncapitalize: true }),
|
||||||
|
url: query.url(actorEl, null, 'href', { origin: entity.url }),
|
||||||
|
avatar: removeImageBorder(avatar),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeAll(scenes, channel, discard = true) {
|
||||||
|
return scenes.map(({ query, el }) => {
|
||||||
|
const release = {};
|
||||||
|
|
||||||
|
release.url = query.url(el, null, 'href', { origin: channel.url });
|
||||||
|
const { hostname, pathname } = new URL(release.url);
|
||||||
|
|
||||||
|
release.entryId = pathname.match(/_(\d+)/)?.[1];
|
||||||
|
release.channel = hostname.match(/(\w+)\.com/)?.[1];
|
||||||
|
|
||||||
|
if (discard && release.channel !== channel.slug) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
release.title = query.content('.title, .informations h3');
|
||||||
|
release.duration = query.duration('.duration, .timer');
|
||||||
|
|
||||||
|
release.actors = query.cnt('.sub')?.split(/,\s*/);
|
||||||
|
|
||||||
|
release.poster = removeImageBorder(query.img('.thumb, picture img'));
|
||||||
|
|
||||||
|
return release;
|
||||||
|
}).filter(Boolean);
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeScene({ query, html }, url, entity) {
|
||||||
|
const release = {};
|
||||||
|
|
||||||
|
release.entryId = new URL(url).pathname.match(/_(\d+)/)?.[1];
|
||||||
|
|
||||||
|
const title = query.cnt('.page_title h1, h2');
|
||||||
|
const wunfTitle = title.match(/wunf \d+/i)?.[0];
|
||||||
|
|
||||||
|
release.title = wunfTitle ? wunfTitle.toUpperCase() : title;
|
||||||
|
release.description = query.cnt('.info_container .description');
|
||||||
|
|
||||||
|
release.date = query.date('.info_container .info_line:nth-child(1)', 'YYYY-MM-DD') || query.date('.description', 'DD MMMM YYYY', /\d{1,2} \w+ \d{4}/);
|
||||||
|
release.actors = query.all('.girl_item, .starring .item').map(actorEl => mapActor(actorEl, query, entity));
|
||||||
|
|
||||||
|
release.duration = query.duration('.infos .description');
|
||||||
|
|
||||||
|
if (!release.duration) {
|
||||||
|
const duration = query.cnt('.info_container .info_line:nth-child(2)');
|
||||||
|
|
||||||
|
release.duration = (duration.match(/(\d+) hour/)?.[1] || 0) * 3600
|
||||||
|
+ (duration.match(/(\d+) minutes/)?.[1] || 0) * 60;
|
||||||
|
}
|
||||||
|
|
||||||
|
release.tags = query.cnts('.tags a:not(.more_tag)');
|
||||||
|
release.poster = removeImageBorder(html.match(/image: "(.*)"/)?.[1]);
|
||||||
|
|
||||||
|
release.trailer = html.match(/url: "(.*mp4.*)"/g)?.map(src => ({
|
||||||
|
src: src.match(/"(.*)"/)?.[1],
|
||||||
|
quality: Number(src.match(/[-/](\d+)p/)?.[1]),
|
||||||
|
}));
|
||||||
|
|
||||||
|
return release;
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeProfile({ query }, entity) {
|
||||||
|
const profile = {};
|
||||||
|
|
||||||
|
profile.avatar = removeImageBorder(query.img('.actor img'));
|
||||||
|
profile.nationality = query.cnt('.nationality, .nationnality'); // sic
|
||||||
|
|
||||||
|
profile.scenes = scrapeAll(qu.initAll(query.all('.videos .item')), entity, false);
|
||||||
|
|
||||||
|
return profile;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchLatest(channel, page) {
|
||||||
|
const res = await qu.getAll(channel.parameters?.latest
|
||||||
|
? `${channel.parameters.latest}?page=${page}`
|
||||||
|
: `${channel.url}/videos?page=${page}`, '.items .scene');
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return scrapeAll(res.items, channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchProfile(baseActor, entity) {
|
||||||
|
const res = await qu.get('https://www.woodmancastingx.com');
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
const searchUrl = qu.prefixUrl(res.html.match(/"(.*searchCompletion\.js)"/)?.[1], 'https://www.woodmancastingx.com');
|
||||||
|
|
||||||
|
if (!searchUrl) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const searchRes = await qu.get(searchUrl, null, null, { decodeJSON: true });
|
||||||
|
|
||||||
|
if (!searchRes.ok) {
|
||||||
|
return searchRes.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
const [actorId] = searchRes.body.actors.find(([_actorId, actorName]) => slugify(actorName) === baseActor.slug) || [];
|
||||||
|
|
||||||
|
if (!actorId) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const actorRes = await qu.get(`https://www.woodmancastingx.com/search/redirection/actors/${actorId}`);
|
||||||
|
|
||||||
|
if (actorRes.ok) {
|
||||||
|
return scrapeProfile(actorRes.item, entity);
|
||||||
|
}
|
||||||
|
|
||||||
|
return actorRes.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
fetchLatest,
|
||||||
|
scrapeScene,
|
||||||
|
fetchProfile,
|
||||||
|
};
|
|
@ -255,6 +255,7 @@ const scrapers = {
|
||||||
pervcity,
|
pervcity,
|
||||||
pervertgallery: fullpornnetwork,
|
pervertgallery: fullpornnetwork,
|
||||||
peternorth: famedigital,
|
peternorth: famedigital,
|
||||||
|
pierrewoodman,
|
||||||
pimpxxx: cherrypimps,
|
pimpxxx: cherrypimps,
|
||||||
letsdoeit: porndoe,
|
letsdoeit: porndoe,
|
||||||
mamacitaz: porndoe,
|
mamacitaz: porndoe,
|
||||||
|
|