Added screen caps separate from photos. Added Tokyo Hot. Added hair type, shoe size and blood type actor fields.
This commit is contained in:
171
src/scrapers/tokyohot.js
Normal file
171
src/scrapers/tokyohot.js
Normal file
@@ -0,0 +1,171 @@
|
||||
'use strict';
|
||||
|
||||
const unprint = require('unprint');
|
||||
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
const pathname = query.url();
|
||||
|
||||
release.url = unprint.prefixUrl(pathname, channel.url);
|
||||
release.entryId = pathname.match(/product\/(\w+)/)?.[1];
|
||||
release.shootId = query.attribute('img', 'title');
|
||||
|
||||
release.title = query.content('.title')?.replace(/^tokyo hot\s*/i, '');
|
||||
release.description = query.content('.text');
|
||||
|
||||
const poster = query.img();
|
||||
|
||||
release.poster = [
|
||||
poster.replace('220x124', '820x462'),
|
||||
poster,
|
||||
];
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, url, channel) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/product\/(\w+)/)?.[1];
|
||||
release.shootId = query.content('//dt[contains(text(), "Product ID")]/following-sibling::dd[1]');
|
||||
|
||||
release.title = query.content('.contents h2');
|
||||
release.description = query.content('.contents .sentence');
|
||||
release.date = query.date('//dt[contains(text(), "Release Date")]/following-sibling::dd[1]', 'YYYY/MM/DD');
|
||||
release.duration = query.duration('//dt[contains(text(), "Duration")]/following-sibling::dd[1]');
|
||||
|
||||
release.actors = query.all('.info a[href*="/cast"]').map((el) => ({
|
||||
name: unprint.query.content(el),
|
||||
url: unprint.query.url(el, null, { origin: channel.url }),
|
||||
}));
|
||||
|
||||
release.tags = query.contents('.info a[href*="type=play"]');
|
||||
|
||||
const poster = query.poster('.movie video');
|
||||
|
||||
release.poster = [
|
||||
poster,
|
||||
poster.replace('820x462', '220x124'),
|
||||
];
|
||||
|
||||
release.trailer = query.video('.movie source');
|
||||
|
||||
release.photos = query.imgs('.scap a', { attribute: 'href' }).map((img) => [
|
||||
img,
|
||||
img.replace('640x480_wlimited', '150x150_default'),
|
||||
]);
|
||||
|
||||
release.caps = query.imgs('.vcap a', { attribute: 'href' }).map((img) => [
|
||||
img,
|
||||
img.replace('640x480_wlimited', '120x120_default'),
|
||||
]);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
// measurements are specified as a range in centimeters 85 ~ 89cm
|
||||
function getMeasurement(string, inches = false) {
|
||||
if (!string) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const value = Array.from(string.matchAll(/(\d+(?:\.\d+)?)\s*cm/g)).at(-1)?.[1];
|
||||
|
||||
if (!value) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (inches) {
|
||||
return Math.round(Number(value) * 0.393701);
|
||||
}
|
||||
|
||||
return Number(value);
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }) {
|
||||
const profile = {};
|
||||
|
||||
const keys = query.contents('.info dt');
|
||||
const values = query.contents('.info dd');
|
||||
|
||||
const bio = Object.fromEntries(keys.map((key, index) => [slugify(key, '_'), values[index]]));
|
||||
|
||||
profile.birthPlace = bio.home_town;
|
||||
|
||||
profile.height = getMeasurement(bio.height);
|
||||
|
||||
profile.cup = bio.cup_size?.replace('cup', '').trim();
|
||||
profile.bust = getMeasurement(bio.bust_size, true);
|
||||
profile.waist = getMeasurement(bio.waist_size, true);
|
||||
profile.hip = getMeasurement(bio.hip_size || bio.hip, true);
|
||||
|
||||
profile.hairStyle = bio.hair_style;
|
||||
profile.shoeSize = getMeasurement(bio.shoes_size);
|
||||
|
||||
profile.bloodType = bio.blood_type.replace('type', '').trim();
|
||||
|
||||
profile.avatar = query.img('#profile img');
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page) {
|
||||
const url = `${channel.url}/product/?vendor=Tokyo-Hot&page=${page}&order=published_at`;
|
||||
|
||||
const res = await unprint.get(url, {
|
||||
selectAll: '#main .list .detail',
|
||||
agent: {
|
||||
rejectUnauthorized: false,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel) {
|
||||
const res = await unprint.get(url, {
|
||||
agent: {
|
||||
rejectUnauthorized: false,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.context, url, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actor, context) {
|
||||
if (!actor.url) {
|
||||
// search is cumbersome
|
||||
return null;
|
||||
}
|
||||
|
||||
const res = await unprint.get(actor.url, {
|
||||
agent: {
|
||||
rejectUnauthorized: false,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.context, context);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
Reference in New Issue
Block a user