207 lines
5.8 KiB
JavaScript
207 lines
5.8 KiB
JavaScript
'use strict';
|
|
|
|
const qu = require('../utils/qu');
|
|
const slugify = require('../utils/slugify');
|
|
const { feetInchesToCm } = require('../utils/convert');
|
|
|
|
function scrapeLatestBlog(scenes, channel) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
|
|
release.url = query.url('a.more:not([href*="/join.php"])', 'href', { origin: channel.url });
|
|
|
|
if (release.url) {
|
|
release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-');
|
|
} else {
|
|
release.entryId = query.img('.bigthumb').match(/\/scenes\/(\w+)/)?.[1];
|
|
}
|
|
|
|
release.title = query.q('h5 strong', true)?.match(/. - (.+)$/)[1] || query.text('.videos h3');
|
|
release.description = query.text('p');
|
|
release.date = query.date('h5 strong, .videos h3', 'MMM. DD, YYYY', /\w+. \d{2}, \d{4}/);
|
|
|
|
// remove common patterns so only the name is left
|
|
const curatedTitle = release.title.replace(/\b(part \d|\banal|bts)\b/gi, '').trim();
|
|
|
|
if (!/\band\b/.test(curatedTitle) && new RegExp(curatedTitle).test(release.description)) {
|
|
// scene title is probably the actor name
|
|
release.actors = [release.title];
|
|
}
|
|
|
|
release.poster = query.img('.bigthumb', null, { origin: channel.url });
|
|
release.photos = query.imgs('.smallthumb', null, { origin: channel.url });
|
|
|
|
release.tags = query.all('a[href*="/keywords"]', true);
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
function scrapeAll(scenes, channel) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
|
|
release.url = query.url('.updateInfo h5 a:not([href*="content/"]):not([href*="#coming"])');
|
|
release.entryId = query.url('.updateThumb img', 'alt');
|
|
|
|
release.title = query.q('.updateInfo h5 a', true);
|
|
|
|
release.actors = query.all('.tour_update_models a', true);
|
|
release.date = query.date('.availdate, .updateInfo p span:nth-child(2)', 'MM/DD/YYYY');
|
|
|
|
release.poster = query.img('.updateThumb img');
|
|
|
|
const trailer = query.q('.updateInfo h5 a', 'onclick')?.match(/'(.+)'/)?.[1];
|
|
|
|
if (trailer) {
|
|
release.trailer = {
|
|
src: `${channel.url}${trailer}`,
|
|
};
|
|
}
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
function scrapeSceneBlog({ query }, url, channel) {
|
|
const release = {};
|
|
|
|
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-');
|
|
|
|
release.title = query.text('h4 strong, .videos h3');
|
|
release.description = query.q('#about p, .videos p', true);
|
|
|
|
const actors = query.urls('a[href*="/girl/"]').map(actorUrl => actorUrl.match(/video-([\w\s]+)/)?.[1]).filter(Boolean);
|
|
|
|
if (actors.length > 0) {
|
|
release.actors = actors;
|
|
} else {
|
|
// release.actors = [query.q('.previewmed h5 strong', true)?.match(/^([\w\s]+),/)?.[0] || query.q('.videos h3', true)].filter(Boolean);
|
|
release.actors = [release.title];
|
|
}
|
|
|
|
release.tags = query.all('.info a[href*="/keywords"], .buttons a[href*="/keywords"]', true);
|
|
|
|
release.poster = query.img('#info .main-preview, .bigthumb', null, { origin: channel.url });
|
|
release.photos = [query.img('.previewmed img', null, { origin: channel.url })].concat(query.imgs('.hd-clip img, .smallthumb', null, { origin: channel.url })).filter(photo => photo);
|
|
|
|
return release;
|
|
}
|
|
|
|
function scrapeScene({ query, html }, url, channel) {
|
|
const release = {};
|
|
|
|
release.title = query.q('.updatesBlock h2', true);
|
|
release.poster = query.meta('property="og:image"');
|
|
release.entryId = release.poster.match(/\/content\/(.*)\//)?.[1];
|
|
|
|
const trailer = html.match(/src="(.+\.mp4)"/)?.[1];
|
|
|
|
if (trailer) {
|
|
release.trailer = {
|
|
src: `${channel.url}${trailer}`,
|
|
};
|
|
}
|
|
|
|
return release;
|
|
}
|
|
|
|
function scrapeProfile({ query }, entity) {
|
|
const profile = {};
|
|
|
|
const bio = query.cnts('.info p').reduce((acc, info) => {
|
|
const [key, value] = info.match(/(\w+):\s*(.*)/).slice(1);
|
|
|
|
return { ...acc, [slugify(key, '_')]: value };
|
|
}, {});
|
|
|
|
profile.age = Number(bio.age);
|
|
profile.height = feetInchesToCm(bio.height);
|
|
profile.eyes = bio.eyes || bio.eyecolor;
|
|
|
|
if (bio.figure || bio.measurements) {
|
|
const [bust, cup, waist, hip] = (bio.figure || bio.measurements)?.match(/(\d+)(\w+)-(\d+)-(\d+)/).slice(1);
|
|
|
|
profile.bust = Number(bust);
|
|
profile.cup = cup;
|
|
profile.waist = Number(waist);
|
|
profile.hip = Number(hip);
|
|
}
|
|
|
|
profile.avatar = query.img('img.main-preview', 'src', { origin: entity.url });
|
|
|
|
return profile;
|
|
}
|
|
|
|
async function fetchLatestBlog(channel, page) {
|
|
const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`;
|
|
const res = await qu.getAll(url, '.videos');
|
|
|
|
return res.ok ? scrapeLatestBlog(res.items, channel) : res.status;
|
|
}
|
|
|
|
async function fetchLatest(channel, page = 1) {
|
|
if (channel.parameters?.blog) {
|
|
return fetchLatestBlog(channel, page);
|
|
}
|
|
|
|
const url = `${channel.url}/categories/Movies_${page}_d.html`;
|
|
const res = await qu.getAll(url, '.bodyArea .updateItem');
|
|
|
|
return res.ok ? scrapeAll(res.items, channel) : res.status;
|
|
}
|
|
|
|
async function fetchUpcoming(channel) {
|
|
if (channel.parameters?.blog) {
|
|
return [];
|
|
}
|
|
|
|
const res = await qu.getAll(channel.url, '#owl-upcomingScenes .updateItem');
|
|
|
|
return res.ok ? scrapeAll(res.items, channel) : res.status;
|
|
}
|
|
|
|
async function fetchScene(url, channel) {
|
|
const res = await qu.get(url);
|
|
|
|
if (res.ok) {
|
|
if (channel.parameters?.blog) {
|
|
return scrapeSceneBlog(res.item, url, channel);
|
|
}
|
|
|
|
return scrapeScene(res.item, url, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
async function fetchProfile(baseActor, entity) {
|
|
const modelsRes = await qu.getAll(`${entity.url}/free/girls.php?alpha=${baseActor.name.slice(0, 1)}`, '.model');
|
|
|
|
console.log(baseActor);
|
|
|
|
if (modelsRes.ok) {
|
|
const models = modelsRes.items.filter(({ query }) => query.cnt('strong') === baseActor.name);
|
|
|
|
return Promise.all(models.map(async (model) => {
|
|
const modelUrl = model.query.url('a', 'href', { origin: entity.url });
|
|
const modelRes = await qu.get(modelUrl);
|
|
|
|
if (modelRes.ok) {
|
|
return scrapeProfile(modelRes.item, entity);
|
|
}
|
|
|
|
return modelRes.status;
|
|
}));
|
|
}
|
|
|
|
return modelsRes.status;
|
|
}
|
|
|
|
module.exports = {
|
|
fetchLatest,
|
|
fetchScene,
|
|
fetchUpcoming,
|
|
fetchProfile,
|
|
};
|