Updated Woodman scraper to use unprint.
This commit is contained in:
parent
9ec2ad25a7
commit
bfb48abfdd
|
|
@ -1,154 +1,165 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const qu = require('../utils/qu');
|
const unprint = require('unprint');
|
||||||
|
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
const capitalize = require('../utils/capitalize');
|
const capitalize = require('../utils/capitalize');
|
||||||
|
|
||||||
function removeImageBorder(source) {
|
|
||||||
if (!source) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
const sourceNoId = source.replace(/_[a-z0-9]+(_v\d)?\.jpg/, '.jpg');
|
|
||||||
|
|
||||||
return [
|
|
||||||
sourceNoId
|
|
||||||
.replace(/actoravatar_/, 'actoravatarnoborder_')
|
|
||||||
.replace(/scenedefault/, 'scenenoborder'),
|
|
||||||
sourceNoId,
|
|
||||||
source,
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
function mapActor(actorEl, query, entity) {
|
|
||||||
const avatar = query.img(actorEl);
|
|
||||||
|
|
||||||
return {
|
|
||||||
name: capitalize(query.cnt(actorEl, '.name, .informations p'), { uncapitalize: true }),
|
|
||||||
url: query.url(actorEl, null, 'href', { origin: entity.url }),
|
|
||||||
avatar: removeImageBorder(avatar),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
function scrapeAll(scenes, channel, discard = true) {
|
function scrapeAll(scenes, channel, discard = true) {
|
||||||
return scenes.map(({ query, el }) => {
|
return scenes.reduce((acc, { query, element }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.url = query.url(el, null, 'href', { origin: channel.url });
|
release.url = unprint.query.url(element, null, { attribute: 'href', origin: channel.url });
|
||||||
|
|
||||||
const { hostname, pathname } = new URL(release.url);
|
const { hostname, pathname } = new URL(release.url);
|
||||||
|
|
||||||
release.entryId = pathname.match(/_(\d+)/)?.[1];
|
release.entryId = pathname.match(/_(\d+)/)?.[1];
|
||||||
release.channel = hostname.match(/(\w+)\.com/)?.[1];
|
release.channel = hostname.match(/(\w+)\.com/)?.[1];
|
||||||
|
|
||||||
if (discard && release.channel !== channel.slug) {
|
if (discard && release.channel !== channel.slug) {
|
||||||
return null;
|
acc.unextracted.concat(release);
|
||||||
|
|
||||||
|
return acc;
|
||||||
}
|
}
|
||||||
|
|
||||||
release.title = query.content('.title, .informations h3');
|
release.title = query.content('.title, .informations h3');
|
||||||
release.duration = query.duration('.duration, .timer');
|
release.duration = query.duration('.duration, .timer, .infos');
|
||||||
|
|
||||||
release.actors = query.cnt('.sub')?.split(/,\s*/);
|
release.actors = query.content('.sub')?.split(/,\s*/);
|
||||||
|
|
||||||
release.poster = removeImageBorder(query.img('.thumb, picture img'));
|
release.poster = query.img('.thumb, picture img');
|
||||||
|
|
||||||
return release;
|
acc.scenes.concat(release);
|
||||||
}).filter(Boolean);
|
|
||||||
|
return acc;
|
||||||
|
}, {
|
||||||
|
scenes: [],
|
||||||
|
unextracted: [],
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene({ query, html }, url, entity) {
|
async function fetchLatest(channel, page) {
|
||||||
|
const res = await unprint.get(channel.parameters?.latest
|
||||||
|
? `${channel.parameters.latest}?page=${page}`
|
||||||
|
: `${channel.url}/videos?page=${page}`, { selectAll: '.items .scene' });
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return scrapeAll(res.context, channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeScene({ query, html }, { url, entity }) {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.entryId = new URL(url).pathname.match(/_(\d+)/)?.[1];
|
release.entryId = new URL(url).pathname.match(/_(\d+)/)?.[1];
|
||||||
|
|
||||||
const title = query.cnt('.page_title h1, h2');
|
const title = query.content('.page_title h1, h2');
|
||||||
const wunfTitle = title.match(/wunf \d+/i)?.[0];
|
const wunfTitle = title.match(/wunf \d+/i)?.[0];
|
||||||
|
|
||||||
release.title = wunfTitle ? wunfTitle.toUpperCase() : title;
|
release.title = wunfTitle ? wunfTitle.toUpperCase() : title;
|
||||||
release.description = query.cnt('.info_container .description');
|
release.description = query.content('.info_container .description');
|
||||||
|
|
||||||
release.date = query.date('.info_container .info_line:nth-child(1)', 'YYYY-MM-DD') || query.date('.description', 'DD MMMM YYYY', /\d{1,2} \w+ \d{4}/);
|
release.date = query.date('.info_container .info_line:nth-child(1)', 'YYYY-MM-DD')
|
||||||
release.actors = query.all('.girl_item, .starring .item').map((actorEl) => mapActor(actorEl, query, entity));
|
|| query.date('.description', 'D MMMM YYYY', { match: /\d{1,2} \w+ \d{4}/ });
|
||||||
|
|
||||||
|
release.actors = query.all('.girl_item, .starring .item').map((actorEl) => {
|
||||||
|
const avatar = unprint.query.img(actorEl);
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: capitalize(unprint.query.content(actorEl, '.name, .informations p'), { uncapitalize: true }),
|
||||||
|
url: unprint.query.url(actorEl, null, { origin: entity.url }),
|
||||||
|
avatar,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
release.duration = query.duration('.infos .description');
|
release.duration = query.duration('.infos .description');
|
||||||
|
|
||||||
if (!release.duration) {
|
if (!release.duration) {
|
||||||
const duration = query.cnt('.info_container .info_line:nth-child(2)');
|
const duration = query.content('.info_container .info_line:nth-child(2)');
|
||||||
|
|
||||||
release.duration = (duration.match(/(\d+) hour/)?.[1] || 0) * 3600
|
release.duration = (duration.match(/(\d+) hour/)?.[1] || 0) * 3600
|
||||||
+ (duration.match(/(\d+) minutes/)?.[1] || 0) * 60;
|
+ (duration.match(/(\d+) minutes/)?.[1] || 0) * 60;
|
||||||
}
|
}
|
||||||
|
|
||||||
release.tags = query.cnts('.tags a:not(.more_tag)');
|
release.tags = query.contents('.tags a:not(.more_tag)');
|
||||||
release.poster = removeImageBorder(html.match(/image: "(.*?)"/)?.[1]);
|
release.poster = html.match(/image: "(.*?)"/)?.[1];
|
||||||
|
|
||||||
release.trailer = html.match(/url: "(.*mp4.*)"/g)?.map((src) => ({
|
release.trailer = html.match(/url: "(.*mp4.*)"/g)?.map((src) => ({
|
||||||
src: src.match(/"(.*)"/)?.[1],
|
src: src.match(/"(.*)"/)?.[1],
|
||||||
quality: Number(src.match(/[-/](\d+)p/)?.[1]),
|
quality: Number(src.match(/[-/](\d+)p/)?.[1]),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
if (query.exists('.download-icon-4k')) {
|
||||||
|
release.qualities = [2160];
|
||||||
|
}
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeProfile({ query }, entity) {
|
function scrapeProfile({ query }, entity) {
|
||||||
const profile = {};
|
const profile = {};
|
||||||
|
|
||||||
profile.avatar = removeImageBorder(query.img('.actor img'));
|
profile.avatar = query.img('.actor img');
|
||||||
profile.nationality = query.cnt('.nationality, .nationnality'); // sic
|
profile.nationality = query.content('.nationality, .nationnality'); // sic
|
||||||
|
|
||||||
profile.scenes = scrapeAll(qu.initAll(query.all('.videos .item')), entity, false);
|
profile.scenes = scrapeAll(unprint.initAll(query.all('.videos .item')), entity, false);
|
||||||
|
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(channel, page) {
|
async function getActorUrl(actor) {
|
||||||
const res = await qu.getAll(channel.parameters?.latest
|
if (actor.url) {
|
||||||
? `${channel.parameters.latest}?page=${page}`
|
return actor.url;
|
||||||
: `${channel.url}/videos?page=${page}`, '.items .scene');
|
|
||||||
|
|
||||||
if (res.ok) {
|
|
||||||
return scrapeAll(res.items, channel);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
// Wake up'n Fuck has higher quality images, but not all performers are available, vice versa they are
|
||||||
}
|
const res = await unprint.get('https://www.woodmancastingx.com');
|
||||||
|
|
||||||
async function fetchProfile(baseActor, entity) {
|
|
||||||
const res = await qu.get('https://www.woodmancastingx.com');
|
|
||||||
|
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
const searchUrl = qu.prefixUrl(res.html.match(/"(.*searchCompletion\.js)"/)?.[1], 'https://www.woodmancastingx.com');
|
const searchUrl = unprint.prefixUrl(res.context.html.match(/"(.*searchCompletion\.js)"/)?.[1], 'https://www.woodmancastingx.com');
|
||||||
|
|
||||||
if (!searchUrl) {
|
if (!searchUrl) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const searchRes = await qu.get(searchUrl, null, null, { decodeJSON: true });
|
const searchRes = await unprint.get(searchUrl);
|
||||||
|
|
||||||
if (!searchRes.ok) {
|
if (!searchRes.ok) {
|
||||||
return searchRes.status;
|
return searchRes.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
const [actorId] = searchRes.body.actors.find(([_actorId, actorName]) => slugify(actorName) === baseActor.slug) || [];
|
const [actorId] = searchRes.data.actors.find(([_actorId, actorName]) => slugify(actorName) === actor.slug) || [];
|
||||||
|
|
||||||
if (!actorId) {
|
if (!actorId) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const actorRes = await qu.get(`https://www.woodmancastingx.com/search/redirection/actors/${actorId}`);
|
return `https://www.woodmancastingx.com/search/redirection/actors/${actorId}`;
|
||||||
|
}
|
||||||
|
|
||||||
if (actorRes.ok) {
|
async function fetchProfile(actor, entity) {
|
||||||
return scrapeProfile(actorRes.item, entity);
|
const actorUrl = await getActorUrl(actor);
|
||||||
|
|
||||||
|
if (typeof actorUrl !== 'string') {
|
||||||
|
return actorUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
return actorRes.status;
|
const res = await unprint.get(actorUrl);
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return scrapeProfile(res.context, entity);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
scrapeScene,
|
scrapeScene,
|
||||||
fetchProfile,
|
fetchProfile,
|
||||||
deprecated: true,
|
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ function resizeSrc(src) {
|
||||||
function deriveDate(query) {
|
function deriveDate(query) {
|
||||||
const now = new Date();
|
const now = new Date();
|
||||||
|
|
||||||
// Nov. 12th
|
// Nov. 2025
|
||||||
const dateMY = query.date('.i-date', 'MMM. YYYY', { match: /(\w+\.? \d{4})/ });
|
const dateMY = query.date('.i-date', 'MMM. YYYY', { match: /(\w+\.? \d{4})/ });
|
||||||
|
|
||||||
if (dateMY) {
|
if (dateMY) {
|
||||||
|
|
@ -34,6 +34,7 @@ function deriveDate(query) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Nov. 12th
|
||||||
const dateMDo = query.date('.i-date', 'MMM. Do', { match: /(\w+\.? \d{1,2}\w+)/ });
|
const dateMDo = query.date('.i-date', 'MMM. Do', { match: /(\w+\.? \d{1,2}\w+)/ });
|
||||||
|
|
||||||
if (dateMDo) {
|
if (dateMDo) {
|
||||||
|
|
@ -47,6 +48,7 @@ function deriveDate(query) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 8 Weeks Ago
|
||||||
const dateAgo = query.dateAgo('.i-date');
|
const dateAgo = query.dateAgo('.i-date');
|
||||||
|
|
||||||
if (dateAgo) {
|
if (dateAgo) {
|
||||||
|
|
|
||||||
|
|
@ -279,7 +279,7 @@ const scrapers = {
|
||||||
wankzvr,
|
wankzvr,
|
||||||
tranzvr: wankzvr,
|
tranzvr: wankzvr,
|
||||||
milfvr: wankzvr,
|
milfvr: wankzvr,
|
||||||
// nubilus
|
// nubiles
|
||||||
anilos: nubiles,
|
anilos: nubiles,
|
||||||
brattysis: nubiles,
|
brattysis: nubiles,
|
||||||
deeplush: nubiles,
|
deeplush: nubiles,
|
||||||
|
|
@ -298,6 +298,9 @@ const scrapers = {
|
||||||
aziani,
|
aziani,
|
||||||
'2poles1hole': aziani,
|
'2poles1hole': aziani,
|
||||||
creampiled: aziani,
|
creampiled: aziani,
|
||||||
|
// woodman
|
||||||
|
pierrewoodman,
|
||||||
|
wakeupnfuck: pierrewoodman,
|
||||||
// etc
|
// etc
|
||||||
'18vr': badoink,
|
'18vr': badoink,
|
||||||
theflourishxxx: theflourish,
|
theflourishxxx: theflourish,
|
||||||
|
|
@ -362,7 +365,6 @@ const scrapers = {
|
||||||
pervcity,
|
pervcity,
|
||||||
dpdiva: pervcity,
|
dpdiva: pervcity,
|
||||||
pervertgallery: fullpornnetwork,
|
pervertgallery: fullpornnetwork,
|
||||||
pierrewoodman,
|
|
||||||
porncz,
|
porncz,
|
||||||
pornhub,
|
pornhub,
|
||||||
pornworld,
|
pornworld,
|
||||||
|
|
|
||||||
|
|
@ -176,6 +176,7 @@ const actors = [
|
||||||
{ entity: 'sexlikereal', name: 'Agatha Vega', fields: ['avatar', 'birthPlace', 'height', 'weight', 'description'] },
|
{ entity: 'sexlikereal', name: 'Agatha Vega', fields: ['avatar', 'birthPlace', 'height', 'weight', 'description'] },
|
||||||
{ entity: 'porncz', name: 'Kama Oxi', fields: ['avatar', 'gender', 'birthCountry', 'ethnicity', 'age', 'hairColor', 'cup', 'naturalBoobs', 'hasTattoos'] },
|
{ entity: 'porncz', name: 'Kama Oxi', fields: ['avatar', 'gender', 'birthCountry', 'ethnicity', 'age', 'hairColor', 'cup', 'naturalBoobs', 'hasTattoos'] },
|
||||||
{ entity: 'score', name: 'Vanessa Blue', fields: ['avatar', 'gender', 'placeOfResidence', 'ethnicity', 'height', 'weight', 'measurements', 'hairColor', 'dateOfBirth'] },
|
{ entity: 'score', name: 'Vanessa Blue', fields: ['avatar', 'gender', 'placeOfResidence', 'ethnicity', 'height', 'weight', 'measurements', 'hairColor', 'dateOfBirth'] },
|
||||||
|
{ entity: 'pierrewoodman', name: 'Makayla Cox', fields: ['avatar', 'nationality'] },
|
||||||
];
|
];
|
||||||
|
|
||||||
const actorScrapers = scrapers.actors;
|
const actorScrapers = scrapers.actors;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue