traxxx/src/scrapers/inthecrack.js

125 lines
3.1 KiB
JavaScript
Raw Normal View History

'use strict';
const moment = require('moment');
const qu = require('../utils/q');
const slugify = require('../utils/slugify');
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('a', 'href', { origin: channel.url });
release.entryId = new URL(release.url).pathname.match(/\/Collection\/(\d+)/)[1];
release.shootId = query.cnt('a span:nth-of-type(1)').match(/^\d+/)?.[0];
release.date = query.date('a span:nth-of-type(2)', 'YYYY-MM-DD');
release.actors = (query.q('a img', 'alt') || query.cnt('a span:nth-of-type(1)'))?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g);
release.poster = release.shootId
? `https://inthecrack.com/assets/images/posters/collections/${release.shootId}.jpg`
: query.img('a img', 'src', { origin: channel.url });
return release;
});
}
function scrapeScene({ query, html }, url, channel) {
const release = {};
release.entryId = new URL(url).pathname.match(/\/Collection\/(\d+)/)[1];
release.shootId = query.cnt('h2 span').match(/^\d+/)?.[0];
release.actors = query.cnt('h2 span')?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g);
release.description = query.cnt('p#CollectionDescription');
release.productionLocation = query.cnt('.modelCollectionHeader p')?.match(/Shoot Location: (.*)/)?.[1];
release.poster = qu.prefixUrl(html.match(/background-image: url\('(.*)'\)/)?.[1], channel.url);
release.chapters = query.all('.ClipOuter').map((el) => {
const chapter = {};
chapter.title = query.text(el, 'h4');
chapter.description = query.cnt(el, 'p');
chapter.duration = query.dur(el, '.InlineDuration');
const posterStyle = query.style(el, '.clipImage', 'background-image');
const poster = qu.prefixUrl(posterStyle.match(/url\((.*)\)/)?.[1], channel.url);
if (poster) {
const { origin, pathname } = new URL(poster);
chapter.poster = [
`${origin}${pathname}`, // full size
poster,
];
}
if (query.exists(el, '.ThreeDInfo')) {
chapter.tags = ['3d'];
}
return chapter;
});
return release;
}
function scrapeProfile({ query, el }, actorName, entity, include) {
const profile = {};
profile.description = query.cnt('.bio-text');
profile.birthPlace = query.cnt('.birth-place span');
profile.avatar = query.img('.actor-photo img');
if (include.releases) {
return scrapeAll(qu.initAll(el, '.scene'));
}
console.log(profile);
return profile;
}
async function fetchLatest(channel, page = 1) {
const year = moment().subtract(page - 1, ' year').year();
const url = `${channel.url}/Collections/Date/${year}`;
const res = await qu.getAll(url, '.collectionGridLayout li');
if (res.ok) {
return scrapeAll(res.items, channel);
}
return res.status;
}
async function fetchScene(url, channel) {
const res = await qu.get(url);
if (res.ok) {
return scrapeScene(res.item, url, channel);
}
return res.status;
}
async function fetchProfile({ name: actorName }, entity, include) {
const url = `${entity.url}/actors/${slugify(actorName, '_')}`;
const res = await qu.get(url);
if (res.ok) {
return scrapeProfile(res.item, actorName, entity, include);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
// fetchProfile,
};