Generalized Adult Empire subsite scraper, added West Coast Productions.
This commit is contained in:
329
src/scrapers/adultempire.js
Normal file
329
src/scrapers/adultempire.js
Normal file
@@ -0,0 +1,329 @@
|
||||
'use strict';
|
||||
|
||||
const qu = require('../utils/qu');
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { feetInchesToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
async function getPhotos(entryId, channel) {
|
||||
const res = await http.get(`${channel.url}/Membership/GetScreenshots?sceneID=scene_${entryId}`);
|
||||
|
||||
if (res.ok) {
|
||||
return res.body.split(/[\s,]+/).filter(Boolean);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
function scrapeAllTour(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('.scene-update-details, .feature-update-details', 'href', { origin: channel.url });
|
||||
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
release.title = query.q('.scene-img-wrapper img', 'alt').replace(/\s*image$/i, '');
|
||||
|
||||
release.date = query.date('.scene-update-stats span, .feature-update-details span', 'MMM DD, YYYY');
|
||||
release.actors = query.cnt('.scene-update-details h3, .feature-update-details h2')?.split(/\s*\|\s*/).map(actor => actor.trim());
|
||||
|
||||
const poster = query.img('.scene-img-wrapper img');
|
||||
release.poster = [
|
||||
poster.replace(/\/res\/\d+/, '/res/1920'),
|
||||
poster.replace(/\/res\/\d+/, '/res/1600'),
|
||||
poster,
|
||||
];
|
||||
|
||||
release.trailer = { src: query.video('.scene-img-wrapper source') };
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeAllGrid(scenes, channel, options) {
|
||||
return Promise.all(scenes.map(async ({ query, el }) => {
|
||||
const release = {};
|
||||
const uri = query.url('.grid-item-title') || query.url('a.animated-screen');
|
||||
|
||||
release.entryId = el.id.match(/\d+/)?.[0] || uri.match(/^(\d+)\//)?.[1];
|
||||
|
||||
release.title = query.cnt('.grid-item-title');
|
||||
release.url = qu.prefixUrl(uri, channel.url);
|
||||
|
||||
release.poster = query.img('.screenshot');
|
||||
|
||||
if (options.includePhotos) {
|
||||
release.photos = await getPhotos(release.entryId, channel);
|
||||
}
|
||||
|
||||
return release;
|
||||
}));
|
||||
}
|
||||
|
||||
function scrapeMovieScenes(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = query.cnt('.scene-title a');
|
||||
release.url = query.url('.scene-title a', 'href', { origin: 'https://www.elegantangel.com' });
|
||||
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
release.duration = query.number('.scene-length') * 60;
|
||||
release.actors = query.cnts('.scene-cast-list a');
|
||||
|
||||
release.poster = query.img('a img');
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeRelease({ query, html }, url, channel, baseRelease, options) {
|
||||
const release = {};
|
||||
const type = query.exists('.scene-list-header') ? 'movie' : 'scene';
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
release.title = query.cnt('.scene-page .description, .video-page .description');
|
||||
release.date = query.date('.release-date:first-child', 'MMM DD, YYYY', /\w{3} \d{2}, \d{4}/);
|
||||
|
||||
release.actors = query.all('.video-performer').map((el) => {
|
||||
const avatar = qu.query.img(el, 'img', 'data-bgsrc');
|
||||
|
||||
return {
|
||||
name: qu.query.cnt(el, 'span'),
|
||||
url: qu.query.url(el, 'a', 'href', { origin: channel.url }),
|
||||
avatar: [
|
||||
avatar.replace(/\/actor\/\d+/, '/actor/1600'),
|
||||
avatar,
|
||||
],
|
||||
};
|
||||
});
|
||||
|
||||
release.tags = query.cnts('.tags a, .categories a');
|
||||
release.studio = options?.parameters.studio === false ? null : slugify(query.cnt('.studio span:last-child'), '');
|
||||
|
||||
if (type === 'scene') {
|
||||
release.director = query.text('.director');
|
||||
release.duration = query.number('.release-date:last-child') * 60;
|
||||
release.poster = baseRelease?.poster || query.url('link[rel="image_src"]') || query.meta('property="og:image"');
|
||||
}
|
||||
|
||||
if (type === 'movie') {
|
||||
release.director = query.cnt('.director a');
|
||||
release.covers = query.imgs('.carousel-item > img');
|
||||
|
||||
release.scenes = scrapeMovieScenes(qu.initAll(query.all('#scenes .grid-item')), channel);
|
||||
}
|
||||
|
||||
if (query.exists('.video-title .movie-title')) {
|
||||
release.movie = {
|
||||
title: query.cnt('#viewLargeBoxcover .modal-title a'),
|
||||
url: query.url('#viewLargeBoxcover .modal-title a', 'href', { origin: channel.url }),
|
||||
entryId: query.url('#viewLargeBoxcover .modal-title a')?.match(/(\d+)\//)[1],
|
||||
covers: query.imgs('#viewLargeBoxcover #viewLargeBoxcoverCarousel .carousel-item > img'),
|
||||
};
|
||||
}
|
||||
|
||||
release.photos = query.imgs('#dv_frames a > img').map(photo => [
|
||||
photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1920`),
|
||||
photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1600`),
|
||||
photo,
|
||||
]);
|
||||
|
||||
const trailerId = html.match(/item: (\d+),/)?.[1];
|
||||
|
||||
if (trailerId) {
|
||||
const trailerUrl = `https://www.adultempire.com/videoEmbed/${trailerId}?type=preview`;
|
||||
const trailerRes = await qu.get(trailerUrl);
|
||||
|
||||
if (trailerRes.ok) {
|
||||
const stream = trailerRes.item.query.video();
|
||||
|
||||
release.trailer = { stream };
|
||||
}
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeMovies(movies, channel) {
|
||||
return movies.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('.boxcover', 'href', { origin: channel.url });
|
||||
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
release.title = query.cnt('span');
|
||||
|
||||
const cover = query.img('picture img');
|
||||
|
||||
release.covers = [
|
||||
// filename is ignored, back-cover has suffix after media ID
|
||||
cover.replace('_sq.jpg', '/front.jpg').replace(/\/product\/\d+/, '/product/500'),
|
||||
cover.replace('_sq.jpg', 'b/back.jpg').replace(/\/product\/\d+/, '/product/500'),
|
||||
];
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeActorScenes(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('a', 'href', { origin: channel.url });
|
||||
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
release.title = query.cnt('.grid-item-title');
|
||||
|
||||
const poster = query.img('a img');
|
||||
release.poster = [
|
||||
poster.replace(/\/\d+\//, '/1600/'),
|
||||
poster,
|
||||
];
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeProfile({ query }, url, channel, include) {
|
||||
const profile = {};
|
||||
|
||||
const bio = query.cnts('.performer-page-header li').reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
|
||||
const measurements = bio.meas?.match(/(\d+)(\w+)-(\d+)-(\d+)/);
|
||||
|
||||
if (measurements) {
|
||||
[profile.bust, profile.cup, profile.waist, profile.hip] = measurements.slice(1);
|
||||
}
|
||||
|
||||
profile.hair = bio.hair;
|
||||
profile.eyes = bio.eyes;
|
||||
profile.ethnicity = bio.ethnicity;
|
||||
|
||||
profile.height = feetInchesToCm(bio.height);
|
||||
profile.weight = lbsToKg(bio.weight);
|
||||
|
||||
profile.avatar = query.img('picture img');
|
||||
|
||||
if (include) {
|
||||
const actorId = new URL(url).pathname.match(/\/(\d+)/)[1];
|
||||
const res = await qu.getAll(`${channel.url}/www.elegantangel.com/streaming-video-by-scene.html?cast=${actorId}`, '.grid-item', null, {
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
profile.releases = scrapeActorScenes(res.items, channel);
|
||||
}
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatestTour(channel, page = 1) {
|
||||
const url = `${channel.url}/tour?page=${page}`;
|
||||
const res = await qu.getAll(url, '.scene-update', null, {
|
||||
// invalid certificate
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAllTour(res.items, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchLatestGrid(channel, page, options) {
|
||||
const res = await qu.getAll(`${channel.url}/watch-newest-clips-and-scenes.html?page=${page}&hybridview=member`, '.item-grid-scene .grid-item');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAllGrid(res.items, channel, options);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchMovie(url, channel, baseRelease, options) {
|
||||
const res = await qu.get(url, null, null, {
|
||||
// invalid certificate
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeRelease(res.item, url, channel, baseRelease, options);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchMovies(channel, page = 1) {
|
||||
const res = await qu.getAll(`https://www.elegantangel.com/streaming-elegant-angel-dvds-on-video.html?page=${page}`, '.grid-item', null, {
|
||||
// invalid certificate
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeMovies(res.items, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfilePage(actorUrl, channel, include) {
|
||||
const res = await qu.get(actorUrl, '.performer-page', null, {
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item, actorUrl, channel, include);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(baseActor, channel, include) {
|
||||
if (baseActor.url) {
|
||||
const profile = await fetchProfilePage(baseActor, channel, include);
|
||||
|
||||
if (typeof profile === 'object') {
|
||||
return profile;
|
||||
}
|
||||
}
|
||||
|
||||
const searchRes = await http.get(`${channel.url}/search/SearchAutoComplete_Agg_ByMedia?rows=9&name_startsWith=${slugify(baseActor.name, '+')}`);
|
||||
|
||||
if (searchRes.ok) {
|
||||
const actorResult = searchRes.body.Results.find(result => /performer/i.test(result.BasicResponseGroup?.displaytype) && new RegExp(baseActor.name, 'i').test(result.BasicResponseGroup?.description));
|
||||
|
||||
if (actorResult) {
|
||||
return fetchProfilePage(`${channel.url}${actorResult.BasicResponseGroup.id}`, channel, include);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return searchRes.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchLatestTour,
|
||||
fetchMovies,
|
||||
fetchMovie,
|
||||
fetchProfile,
|
||||
scrapeScene: scrapeRelease,
|
||||
scrapeMovie: scrapeRelease,
|
||||
grid: {
|
||||
fetchLatest: fetchLatestGrid,
|
||||
scrapeScene: scrapeRelease,
|
||||
fetchMovie,
|
||||
fetchProfile,
|
||||
},
|
||||
};
|
||||
Reference in New Issue
Block a user