traxxx/src/scrapers/hookuphotshot.js

106 lines
2.9 KiB
JavaScript
Raw Normal View History

2020-09-03 20:22:12 +00:00
'use strict';
const qu = require('../utils/q');
const slugify = require('../utils/slugify');
function scrapeAll(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('.date-title a');
const avatarEl = query.el('.girl-thumb-container img');
release.actors = query.all('.date-starring a').map((actorEl) => {
const name = query.cnt(actorEl);
return {
name,
gender: 'female',
url: query.url(actorEl, null),
...(new RegExp(name).test(avatarEl.alt) && {
avatar: [
avatarEl.src.replace(/-\d+x\d+/, ''),
avatarEl.src,
].map((src) => ({ src, interval: 1000, concurrency: 1 })),
2020-09-03 20:22:12 +00:00
}),
};
}).concat({
name: 'Bryan Gozzling',
gender: 'male',
});
release.duration = query.dur('.date-facts');
release.stars = query.number('[data-rating]', null, 'data-rating');
const photoCount = query.number('input[id*=count]', null, 'value');
const photoPath = query.url('input[id*=baseurl]', 'value');
release.poster = {
src: query.img('.date-img-swap'),
interval: 1000,
concurrency: 1,
2020-09-03 20:22:12 +00:00
};
release.photos = [...Array(photoCount)].map((value, index) => ({
src: `${photoPath}/${String(index + 1).padStart(2, '0')}.jpg`,
interval: 1000,
concurrency: 1,
2020-09-03 20:22:12 +00:00
}));
// dates appear to be manually curated
const fullTitle = query.cnt('.date-title a');
const [monthName, date, title] = fullTitle.match(/(\w+)\.? (\d+)\s*-?\s*(.*)/)?.slice(1) || [];
const [year, month] = release.poster.src.match(/uploads\/(\d+)\/(\d+)/)?.slice(1) || [];
release.title = title.replace(/behind the\.\.\./i, 'Behind the Scenes');
release.date = qu.extractDate(`${year}-${monthName || month}-${date}`, ['YYYY-MM-DD', 'YYYY-MMM-DD', 'YYYY-MMMM-DD']);
// release.entryId = new URL(release.url).pathname.split('/')[2];
release.entryId = `${release.date.getFullYear()}-${release.date.getMonth() + 1}-${release.date.getDate()}-${slugify(release.actors[0].name)}`;
release.tags = ['rough', ...release.title.match(/behind the scenes|anal/gi) || []];
return release;
});
}
function scrapeProfile({ query }) {
const profile = {};
profile.gender = 'female';
profile.description = query.cnts('.girl-about p:not(.bio-facts)').join(' ');
profile.avatar = query.img('.girl-pic');
// no deep scraping available, and not all scene details available here
return profile;
}
async function fetchLatest(channel, page = 1) {
const url = `${channel.url}/the-dates/page/${page}`;
const res = await qu.getAll(url, '#et-projects li');
if (res.ok) {
return scrapeAll(res.items, channel);
}
return res.status;
}
async function fetchProfile({ name: actorName }, entity, include) {
const url = `${entity.url}/girls/${slugify(actorName)}`;
const res = await qu.get(url);
if (res.ok) {
return scrapeProfile(res.item, actorName, entity, include);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchProfile,
};