Added conditions to Tokyo Hot scraper to prevent total failure.
This commit is contained in:
151
src/scrapers/modelmedia.js
Normal file
151
src/scrapers/modelmedia.js
Normal file
@@ -0,0 +1,151 @@
|
||||
'use strict';
|
||||
|
||||
const unprint = require('unprint');
|
||||
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = query.attribute(null, 'video-id');
|
||||
|
||||
const url = query.url(null);
|
||||
|
||||
if (url) {
|
||||
const { origin, pathname, searchParams } = new URL(url);
|
||||
|
||||
release.url = `${origin}${pathname}`;
|
||||
release.actors = searchParams.get('models_name')?.split(',');
|
||||
}
|
||||
|
||||
release.title = query.content('.video-title div');
|
||||
release.duration = query.duration('.timestamp');
|
||||
|
||||
const poster = query.img('img', { attribute: 'data-src' });
|
||||
|
||||
if (poster) {
|
||||
release.poster = [
|
||||
poster.replace(/w=\d+/, 'w=1920').replace(/h=\d+/, 'h=1080'),
|
||||
poster,
|
||||
];
|
||||
}
|
||||
|
||||
release.teaser = query.video(null, { attribute: 'data-video-src' });
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }) {
|
||||
const profile = {};
|
||||
const avatar = query.img('div[class*="prof-pic"] > img');
|
||||
|
||||
if (avatar) {
|
||||
profile.avatar = [
|
||||
avatar.replace(/w=\d+/, 'w=720').replace(/h=\d+/, 'h=1080'),
|
||||
avatar,
|
||||
];
|
||||
}
|
||||
|
||||
profile.description = query.content('h2') || null;
|
||||
profile.height = query.number('//span[text()="Measurements"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
|
||||
profile.weight = query.number('//span[text()="Weight"]/following-sibling::span', { match: /(\d+) kg/, matchIndex: 1 });
|
||||
|
||||
profile.measurements = query.number('//span[text()="Birth Place"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
|
||||
profile.birthPlace = query.number('//span[text()="Birth Place"]/following-sibling::span');
|
||||
|
||||
profile.banner = query.img('div[class*="banner"] > img');
|
||||
profile.photos = query.imgs('#MusModelSwiper img');
|
||||
|
||||
console.log(profile);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function getCookie(channel) {
|
||||
const tokenRes = await unprint.get(channel.url);
|
||||
|
||||
if (!tokenRes.ok) {
|
||||
return tokenRes.status;
|
||||
}
|
||||
|
||||
const csrfToken = tokenRes.context?.query.attribute('meta[name="csrf-token"]', 'content');
|
||||
const cookie = tokenRes.response.headers['set-cookie']?.join(';');
|
||||
|
||||
if (!csrfToken || !cookie) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const confirmAdultRes = await unprint.post(`${channel.url}/adult_confirmation_and_accept_cookie`, null, {
|
||||
headers: {
|
||||
cookie,
|
||||
'x-csrf-token': csrfToken,
|
||||
},
|
||||
});
|
||||
|
||||
if (!confirmAdultRes.ok) {
|
||||
return confirmAdultRes.status;
|
||||
}
|
||||
|
||||
return cookie;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page) {
|
||||
const cookie = await getCookie(channel);
|
||||
|
||||
const res = await unprint.get(`${channel.url}/videos?sort=published_at&page=${page}`, {
|
||||
selectAll: '.row a[video-id]',
|
||||
headers: {
|
||||
cookie,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
// deep pages are paywalled
|
||||
|
||||
async function searchProfile(actor, context, cookie) {
|
||||
const searchRes = await unprint.get(`${context.channel.url}/livesearch?keyword=${actor.name}`, {
|
||||
headers: {
|
||||
cookie,
|
||||
},
|
||||
});
|
||||
|
||||
if (!searchRes.ok) {
|
||||
return searchRes.status;
|
||||
}
|
||||
|
||||
return searchRes.context.query.url(`a[title="${actor.name}"]`);
|
||||
}
|
||||
|
||||
async function fetchProfile(actor, context) {
|
||||
const cookie = await getCookie(context.entity);
|
||||
const actorUrl = actor.url || await searchProfile(actor, context, cookie);
|
||||
|
||||
if (!actorUrl) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const res = await unprint.get(actorUrl, {
|
||||
headers: {
|
||||
cookie,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.context, actorUrl);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
};
|
||||
Reference in New Issue
Block a user