Generalized Adult Empire subsite scraper, added West Coast Productions.
This commit is contained in:
@@ -58,7 +58,12 @@ async function fetchScene(scraper, url, entity, baseRelease, options) {
|
||||
|
||||
if (scraper.scrapeScene) {
|
||||
const session = qu.session();
|
||||
const res = await qu.get(url, null, null, { session });
|
||||
|
||||
const res = await qu.get(url, null, null, {
|
||||
session,
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
const cookie = await session._sessionOptions.cookieJar.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
|
||||
10
src/media.js
10
src/media.js
@@ -242,7 +242,7 @@ async function findSourceDuplicates(baseMedias) {
|
||||
const existingSourceMediaByUrl = itemsByKey(existingSourceMedia, 'source');
|
||||
const existingExtractMediaByUrl = itemsByKey(existingExtractMedia, 'source_page');
|
||||
|
||||
return [existingSourceMediaByUrl, existingExtractMediaByUrl];
|
||||
return { existingSourceMediaByUrl, existingExtractMediaByUrl };
|
||||
}
|
||||
|
||||
async function findHashDuplicates(medias) {
|
||||
@@ -285,7 +285,7 @@ async function findHashDuplicates(medias) {
|
||||
}))
|
||||
.concat(selfDuplicateMedias);
|
||||
|
||||
return [selfUniqueHashMedias, existingHashMedias];
|
||||
return { uniqueHashMedias: selfUniqueHashMedias, existingHashMedias };
|
||||
}
|
||||
|
||||
async function extractSource(baseSource, { existingExtractMediaByUrl }) {
|
||||
@@ -741,7 +741,7 @@ function curateMediaEntry(media, index) {
|
||||
async function storeMedias(baseMedias, options) {
|
||||
await fsPromises.mkdir(path.join(config.media.path, 'temp'), { recursive: true });
|
||||
|
||||
const [existingSourceMediaByUrl, existingExtractMediaByUrl] = await findSourceDuplicates(baseMedias);
|
||||
const { existingSourceMediaByUrl, existingExtractMediaByUrl } = await findSourceDuplicates(baseMedias);
|
||||
|
||||
const fetchedMedias = await Promise.map(
|
||||
baseMedias,
|
||||
@@ -749,7 +749,7 @@ async function storeMedias(baseMedias, options) {
|
||||
{ concurrency: 100 }, // don't overload disk (or network, although this has its own throttling)
|
||||
);
|
||||
|
||||
const [uniqueHashMedias, existingHashMedias] = await findHashDuplicates(fetchedMedias);
|
||||
const { uniqueHashMedias, existingHashMedias } = await findHashDuplicates(fetchedMedias);
|
||||
|
||||
const savedMedias = await Promise.map(
|
||||
uniqueHashMedias,
|
||||
@@ -770,7 +770,7 @@ async function storeMedias(baseMedias, options) {
|
||||
const newMediaEntries = newMediaWithEntries.filter(media => media.newEntry).map(media => media.entry);
|
||||
|
||||
try {
|
||||
await bulkInsert('media', newMediaEntries);
|
||||
await bulkInsert('media', newMediaEntries, false);
|
||||
|
||||
return [...newMediaWithEntries, ...existingHashMedias];
|
||||
} catch (error) {
|
||||
|
||||
@@ -1,10 +1,21 @@
|
||||
'use strict';
|
||||
|
||||
const qu = require('../utils/q');
|
||||
const qu = require('../utils/qu');
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { feetInchesToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
function scrapeAll(scenes, channel) {
|
||||
async function getPhotos(entryId, channel) {
|
||||
const res = await http.get(`${channel.url}/Membership/GetScreenshots?sceneID=scene_${entryId}`);
|
||||
|
||||
if (res.ok) {
|
||||
return res.body.split(/[\s,]+/).filter(Boolean);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
function scrapeAllTour(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
@@ -29,6 +40,26 @@ function scrapeAll(scenes, channel) {
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeAllGrid(scenes, channel, options) {
|
||||
return Promise.all(scenes.map(async ({ query, el }) => {
|
||||
const release = {};
|
||||
const uri = query.url('.grid-item-title') || query.url('a.animated-screen');
|
||||
|
||||
release.entryId = el.id.match(/\d+/)?.[0] || uri.match(/^(\d+)\//)?.[1];
|
||||
|
||||
release.title = query.cnt('.grid-item-title');
|
||||
release.url = qu.prefixUrl(uri, channel.url);
|
||||
|
||||
release.poster = query.img('.screenshot');
|
||||
|
||||
if (options.includePhotos) {
|
||||
release.photos = await getPhotos(release.entryId, channel);
|
||||
}
|
||||
|
||||
return release;
|
||||
}));
|
||||
}
|
||||
|
||||
function scrapeMovieScenes(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
@@ -46,8 +77,9 @@ function scrapeMovieScenes(scenes) {
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeRelease({ query, html }, url, channel, baseRelease, type = 'scene') {
|
||||
async function scrapeRelease({ query, html }, url, channel, baseRelease, options) {
|
||||
const release = {};
|
||||
const type = query.exists('.scene-list-header') ? 'movie' : 'scene';
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
@@ -68,7 +100,7 @@ async function scrapeRelease({ query, html }, url, channel, baseRelease, type =
|
||||
});
|
||||
|
||||
release.tags = query.cnts('.tags a, .categories a');
|
||||
release.studio = slugify(query.cnt('.studio span:last-child'), '');
|
||||
release.studio = options?.parameters.studio === false ? null : slugify(query.cnt('.studio span:last-child'), '');
|
||||
|
||||
if (type === 'scene') {
|
||||
release.director = query.text('.director');
|
||||
@@ -83,6 +115,15 @@ async function scrapeRelease({ query, html }, url, channel, baseRelease, type =
|
||||
release.scenes = scrapeMovieScenes(qu.initAll(query.all('#scenes .grid-item')), channel);
|
||||
}
|
||||
|
||||
if (query.exists('.video-title .movie-title')) {
|
||||
release.movie = {
|
||||
title: query.cnt('#viewLargeBoxcover .modal-title a'),
|
||||
url: query.url('#viewLargeBoxcover .modal-title a', 'href', { origin: channel.url }),
|
||||
entryId: query.url('#viewLargeBoxcover .modal-title a')?.match(/(\d+)\//)[1],
|
||||
covers: query.imgs('#viewLargeBoxcover #viewLargeBoxcoverCarousel .carousel-item > img'),
|
||||
};
|
||||
}
|
||||
|
||||
release.photos = query.imgs('#dv_frames a > img').map(photo => [
|
||||
photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1920`),
|
||||
photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1600`),
|
||||
@@ -174,7 +215,7 @@ async function scrapeProfile({ query }, url, channel, include) {
|
||||
|
||||
if (include) {
|
||||
const actorId = new URL(url).pathname.match(/\/(\d+)/)[1];
|
||||
const res = await qu.getAll(`https://www.elegantangel.com/streaming-video-by-scene.html?cast=${actorId}`, '.grid-item', null, {
|
||||
const res = await qu.getAll(`${channel.url}/www.elegantangel.com/streaming-video-by-scene.html?cast=${actorId}`, '.grid-item', null, {
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
@@ -186,7 +227,7 @@ async function scrapeProfile({ query }, url, channel, include) {
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
async function fetchLatestTour(channel, page = 1) {
|
||||
const url = `${channel.url}/tour?page=${page}`;
|
||||
const res = await qu.getAll(url, '.scene-update', null, {
|
||||
// invalid certificate
|
||||
@@ -194,33 +235,30 @@ async function fetchLatest(channel, page = 1) {
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, channel);
|
||||
return scrapeAllTour(res.items, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel, baseRelease) {
|
||||
async function fetchLatestGrid(channel, page, options) {
|
||||
const res = await qu.getAll(`${channel.url}/watch-newest-clips-and-scenes.html?page=${page}&hybridview=member`, '.item-grid-scene .grid-item');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAllGrid(res.items, channel, options);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchMovie(url, channel, baseRelease, options) {
|
||||
const res = await qu.get(url, null, null, {
|
||||
// invalid certificate
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeRelease(res.item, url, channel, baseRelease);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchMovie(url, channel, baseRelease) {
|
||||
const res = await qu.get(url, null, null, {
|
||||
// invalid certificate
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeRelease(res.item, url, channel, baseRelease, 'movie');
|
||||
return scrapeRelease(res.item, url, channel, baseRelease, options);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
@@ -239,26 +277,53 @@ async function fetchMovies(channel, page = 1) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(baseActor, channel, include) {
|
||||
if (!baseActor.url) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const res = await qu.get(baseActor.url, '.performer-page', null, {
|
||||
async function fetchProfilePage(actorUrl, channel, include) {
|
||||
const res = await qu.get(actorUrl, '.performer-page', null, {
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item, baseActor.url, channel, include);
|
||||
return scrapeProfile(res.item, actorUrl, channel, include);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(baseActor, channel, include) {
|
||||
if (baseActor.url) {
|
||||
const profile = await fetchProfilePage(baseActor, channel, include);
|
||||
|
||||
if (typeof profile === 'object') {
|
||||
return profile;
|
||||
}
|
||||
}
|
||||
|
||||
const searchRes = await http.get(`${channel.url}/search/SearchAutoComplete_Agg_ByMedia?rows=9&name_startsWith=${slugify(baseActor.name, '+')}`);
|
||||
|
||||
if (searchRes.ok) {
|
||||
const actorResult = searchRes.body.Results.find(result => /performer/i.test(result.BasicResponseGroup?.displaytype) && new RegExp(baseActor.name, 'i').test(result.BasicResponseGroup?.description));
|
||||
|
||||
if (actorResult) {
|
||||
return fetchProfilePage(`${channel.url}${actorResult.BasicResponseGroup.id}`, channel, include);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return searchRes.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest: fetchLatestTour,
|
||||
fetchMovies,
|
||||
fetchMovie,
|
||||
fetchProfile,
|
||||
scrapeScene: scrapeRelease,
|
||||
scrapeMovie: scrapeRelease,
|
||||
grid: {
|
||||
fetchLatest: fetchLatestGrid,
|
||||
scrapeScene: scrapeRelease,
|
||||
fetchMovie,
|
||||
fetchProfile,
|
||||
},
|
||||
};
|
||||
@@ -1,5 +1,6 @@
|
||||
'use strict';
|
||||
|
||||
const adultempire = require('./adultempire');
|
||||
const adulttime = require('./adulttime');
|
||||
const assylum = require('./assylum');
|
||||
const aziani = require('./aziani');
|
||||
@@ -16,7 +17,6 @@ const czechav = require('./czechav');
|
||||
const ddfnetwork = require('./ddfnetwork');
|
||||
const dogfart = require('./dogfart');
|
||||
const dorcel = require('./dorcel');
|
||||
const elegantangel = require('./elegantangel');
|
||||
const famedigital = require('./famedigital');
|
||||
const firstanalquest = require('./firstanalquest');
|
||||
const elevatedx = require('./elevatedx');
|
||||
@@ -88,7 +88,7 @@ const scrapers = {
|
||||
dogfart,
|
||||
dogfartnetwork: dogfart,
|
||||
dorcel,
|
||||
elegantangel,
|
||||
elegantangel: adultempire,
|
||||
famedigital,
|
||||
exploitedx: elevatedx,
|
||||
firstanalquest,
|
||||
@@ -145,6 +145,7 @@ const scrapers = {
|
||||
vixen,
|
||||
vogov,
|
||||
wankzvr,
|
||||
westcoastproductions: adultempire,
|
||||
whalemember,
|
||||
xempire,
|
||||
},
|
||||
@@ -186,7 +187,7 @@ const scrapers = {
|
||||
dorcelclub: dorcel,
|
||||
doubleviewcasting: firstanalquest,
|
||||
dtfsluts: fullpornnetwork,
|
||||
elegantangel,
|
||||
elegantangel: adultempire,
|
||||
evilangel: gamma,
|
||||
exploitedcollegegirls: elevatedx,
|
||||
eyeontheguy: hush,
|
||||
@@ -271,6 +272,7 @@ const scrapers = {
|
||||
vixen,
|
||||
vrcosplayx: badoink,
|
||||
wankzvr,
|
||||
westcoastproductions: adultempire,
|
||||
wicked: gamma,
|
||||
wildoncam: cherrypimps,
|
||||
xempire,
|
||||
|
||||
@@ -570,6 +570,7 @@ module.exports = {
|
||||
formatDate,
|
||||
get,
|
||||
getAll,
|
||||
http,
|
||||
fetch: get,
|
||||
fetchAll: getAll,
|
||||
context: init,
|
||||
|
||||
Reference in New Issue
Block a user