traxxx/src/scrapers/tokyohot.js

172 lines
3.9 KiB
JavaScript

'use strict';
const unprint = require('unprint');
const slugify = require('../utils/slugify');
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
const pathname = query.url();
release.url = unprint.prefixUrl(pathname, channel.url);
release.entryId = pathname.match(/product\/(\w+)/)?.[1];
release.shootId = query.attribute('img', 'title');
release.title = query.content('.title')?.replace(/^tokyo hot\s*/i, '');
release.description = query.content('.text');
const poster = query.img();
release.poster = [
poster.replace('220x124', '820x462'),
poster,
];
return release;
});
}
function scrapeScene({ query }, url, channel) {
const release = {};
release.entryId = new URL(url).pathname.match(/product\/(\w+)/)?.[1];
release.shootId = query.content('//dt[contains(text(), "Product ID")]/following-sibling::dd[1]');
release.title = query.content('.contents h2');
release.description = query.content('.contents .sentence');
release.date = query.date('//dt[contains(text(), "Release Date")]/following-sibling::dd[1]', 'YYYY/MM/DD');
release.duration = query.duration('//dt[contains(text(), "Duration")]/following-sibling::dd[1]');
release.actors = query.all('.info a[href*="/cast"]').map((el) => ({
name: unprint.query.content(el),
url: unprint.query.url(el, null, { origin: channel.url }),
}));
release.tags = query.contents('.info a[href*="type=play"]');
const poster = query.poster('.movie video');
release.poster = [
poster,
poster.replace('820x462', '220x124'),
];
release.trailer = query.video('.movie source');
release.photos = query.imgs('.scap a', { attribute: 'href' }).map((img) => [
img,
img.replace('640x480_wlimited', '150x150_default'),
]);
release.caps = query.imgs('.vcap a', { attribute: 'href' }).map((img) => [
img,
img.replace('640x480_wlimited', '120x120_default'),
]);
return release;
}
// measurements are specified as a range in centimeters 85 ~ 89cm
function getMeasurement(string, inches = false) {
if (!string) {
return null;
}
const value = Array.from(string.matchAll(/(\d+(?:\.\d+)?)\s*cm/g)).at(-1)?.[1];
if (!value) {
return null;
}
if (inches) {
return Math.round(Number(value) * 0.393701);
}
return Number(value);
}
function scrapeProfile({ query }) {
const profile = {};
const keys = query.contents('.info dt');
const values = query.contents('.info dd');
const bio = Object.fromEntries(keys.map((key, index) => [slugify(key, '_'), values[index]]));
profile.birthPlace = bio.home_town;
profile.height = getMeasurement(bio.height);
profile.cup = bio.cup_size?.replace('cup', '').trim();
profile.bust = getMeasurement(bio.bust_size, true);
profile.waist = getMeasurement(bio.waist_size, true);
profile.hip = getMeasurement(bio.hip_size || bio.hip, true);
profile.hairStyle = bio.hair_style;
profile.shoeSize = getMeasurement(bio.shoes_size);
profile.bloodType = bio.blood_type.replace('type', '').trim();
profile.avatar = query.img('#profile img');
return profile;
}
async function fetchLatest(channel, page) {
const url = `${channel.url}/product/?vendor=Tokyo-Hot&page=${page}&order=published_at`;
const res = await unprint.get(url, {
selectAll: '#main .list .detail',
agent: {
rejectUnauthorized: false,
},
});
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
async function fetchScene(url, channel) {
const res = await unprint.get(url, {
agent: {
rejectUnauthorized: false,
},
});
if (res.ok) {
return scrapeScene(res.context, url, channel);
}
return res.status;
}
async function fetchProfile(actor, context) {
if (!actor.url) {
// search is cumbersome
return null;
}
const res = await unprint.get(actor.url, {
agent: {
rejectUnauthorized: false,
},
});
if (res.ok) {
return scrapeProfile(res.context, context);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
};