Updated AnalVids.

This commit is contained in:
DebaucheryLibrarian 2023-08-16 03:08:47 +02:00
parent 20ba833147
commit b29a34c76a
26 changed files with 295 additions and 149 deletions

View File

@ -375,7 +375,7 @@ module.exports = {
thumbnailQuality: 100,
lazySize: 90,
lazyQuality: 90,
trailerQuality: [480, 540, 360, 720, 960, 1080, 320, 1440, 1600, 1920, 2160, 270, 240, 180],
trailerQuality: [540, 720, 480, 360, 960, 1080, 320, 1440, 1600, 1920, 2160, 270, 240, 180],
limit: 25, // max number of photos per release
attempts: 2,
fetchStreams: true,

14
package-lock.json generated
View File

@ -81,7 +81,7 @@
"tunnel": "0.0.6",
"ua-parser-js": "^1.0.32",
"undici": "^4.13.0",
"unprint": "^0.10.3",
"unprint": "^0.10.7",
"url-pattern": "^1.0.3",
"v-tooltip": "^2.0.3",
"video.js": "^7.11.4",
@ -17626,9 +17626,9 @@
}
},
"node_modules/unprint": {
"version": "0.10.3",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.10.3.tgz",
"integrity": "sha512-ui8BbBo4JmKR++w50rSUFyg8X6l9EAbLRpATxdjxyS7yYevjcGMEt3HT0nrBG2JXDMkLwWZ+WoOaz3qC5stSxQ==",
"version": "0.10.7",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.10.7.tgz",
"integrity": "sha512-AuWdWvxkNrFSa8jGE3HIUJ7Pwvp4GR5kJ6eYu5zO8Devizc2D7qifvQv1ksLa9ZenKRjmsgoFVVHOAroGFNE3g==",
"dependencies": {
"axios": "^0.27.2",
"bottleneck": "^2.19.5",
@ -32536,9 +32536,9 @@
"integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw="
},
"unprint": {
"version": "0.10.3",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.10.3.tgz",
"integrity": "sha512-ui8BbBo4JmKR++w50rSUFyg8X6l9EAbLRpATxdjxyS7yYevjcGMEt3HT0nrBG2JXDMkLwWZ+WoOaz3qC5stSxQ==",
"version": "0.10.7",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.10.7.tgz",
"integrity": "sha512-AuWdWvxkNrFSa8jGE3HIUJ7Pwvp4GR5kJ6eYu5zO8Devizc2D7qifvQv1ksLa9ZenKRjmsgoFVVHOAroGFNE3g==",
"requires": {
"axios": "^0.27.2",
"bottleneck": "^2.19.5",

View File

@ -140,7 +140,7 @@
"tunnel": "0.0.6",
"ua-parser-js": "^1.0.32",
"undici": "^4.13.0",
"unprint": "^0.10.3",
"unprint": "^0.10.7",
"url-pattern": "^1.0.3",
"v-tooltip": "^2.0.3",
"video.js": "^7.11.4",

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.4 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

BIN
public/img/logos/analvids/lazy/analvids.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.8 KiB

After

Width:  |  Height:  |  Size: 4.5 KiB

BIN
public/img/logos/analvids/lazy/favicon.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.4 KiB

After

Width:  |  Height:  |  Size: 2.4 KiB

BIN
public/img/logos/analvids/lazy/favicon_dark.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.2 KiB

After

Width:  |  Height:  |  Size: 7.2 KiB

BIN
public/img/logos/analvids/lazy/favicon_light.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.4 KiB

After

Width:  |  Height:  |  Size: 2.4 KiB

BIN
public/img/logos/analvids/lazy/legalporno.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.5 KiB

After

Width:  |  Height:  |  Size: 2.6 KiB

BIN
public/img/logos/analvids/lazy/network.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.8 KiB

After

Width:  |  Height:  |  Size: 4.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 29 KiB

BIN
public/img/logos/analvids/thumbs/analvids.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 8.8 KiB

BIN
public/img/logos/analvids/thumbs/favicon.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.4 KiB

After

Width:  |  Height:  |  Size: 2.4 KiB

BIN
public/img/logos/analvids/thumbs/favicon_dark.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.2 KiB

After

Width:  |  Height:  |  Size: 7.2 KiB

BIN
public/img/logos/analvids/thumbs/favicon_light.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.4 KiB

After

Width:  |  Height:  |  Size: 2.4 KiB

BIN
public/img/logos/analvids/thumbs/legalporno.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

BIN
public/img/logos/analvids/thumbs/network.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 8.8 KiB

223
src/scrapers/analvids.js Executable file → Normal file
View File

@ -1,8 +1,6 @@
'use strict';
const { JSDOM } = require('jsdom');
const cheerio = require('cheerio');
const moment = require('moment');
const unprint = require('unprint');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
@ -17,187 +15,132 @@ function extractTitle(originalTitle) {
return { shootId, title };
}
function getPoster(posterElement, sceneId) {
const posterStyle = posterElement.attr('style');
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
if (posterStyle) {
return posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
}
release.url = query.url('.card-scene__view > a');
release.entryId = query.dataset(null, 'content') || new URL(release.url).pathname.match(/watch\/(\d+)/)?.[1];
const posterRange = posterElement.attr('data-casting');
const posterRangeData = posterRange ? JSON.parse(posterRange) : null;
const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)];
release.title = query.content('.card-scene__text');
release.shootId = extractTitle(release.title).shootId;
if (!posterTimeRange) {
return null;
}
release.date = query.date('.label--time:nth-child(2)', 'YYYY-MM-DD');
release.duration = query.duration('.label--time:first-child');
if (typeof posterTimeRange === 'number') {
// poster time is already a single time value
return `https://analvids.com/casting/${sceneId}/${posterTimeRange}`;
}
release.poster = query.img('.card-scene__view img', { attribute: 'data-src' });
const caps = query.json('.card-scene__view > a', { attribute: 'data-casting' })?.map((timestamp) => `${channel.url}/casting/${release.entryId}/${timestamp}`);
const [max, min] = posterTimeRange.split('-');
const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min));
if (caps && release.poster) {
release.caps = caps;
} else if (caps) {
[release.poster, ...release.caps] = caps;
}
return `https://analvids.com/casting/${sceneId}/${posterTime}`;
}
release.teaser = query.video('.card-scene__view > a', { attribute: 'data-preview' });
function scrapeAll(html) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.thumbnails > div').toArray();
return scenesElements.map((element) => {
const sceneLinkElement = $(element).find('.thumbnail-title a');
const url = sceneLinkElement.attr('href');
const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
const { shootId, title } = extractTitle(originalTitle);
const entryId = new URL(url).pathname.split('/')[2];
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
const sceneId = $(element).attr('data-content');
const posterElement = $(element).find('.thumbnail-avatar');
const poster = getPoster(posterElement, sceneId);
return {
url,
shootId,
entryId,
title,
date,
poster,
};
return release;
});
}
async function scrapeScene(html, url, site, useGallery) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const playerObject = $('script:contains("new WatchPage")').html();
const playerData = playerObject && playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.lastIndexOf('},') + 1);
const data = playerData && JSON.parse(playerData);
function scrapeScene({ query }, { url }) {
const release = {};
const release = { url };
release.entryId = new URL(url).pathname.match(/watch\/(\d+)/)?.[1];
const originalTitle = $('h1.watchpage-title').text().trim();
const { shootId, title } = extractTitle(originalTitle);
const featuring = query.content('.watch__title .watch__featuring_models');
release.shootId = shootId;
release.entryId = new URL(url).pathname.split('/')[2];
release.title = query.content('.watch__title').replace(featuring, '');
release.description = query.content('.text-mob-more');
release.shootId = extractTitle(release.title).shootId;
release.title = title;
release.date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
release.date = query.date('.bi-calendar3', 'YYYY-MM-DD');
release.duration = query.duration('.bi-clock');
const [actorsElement, tagsElement, descriptionElement] = $('.scene-description__row').toArray();
release.actors = query.all('.watch__title a[href*="/model"]').map((el) => ({
name: unprint.query.content(el),
url: unprint.query.url(el, null),
}));
release.description = $('meta[name="description"]')?.attr('content')?.trim()
|| (descriptionElement && $(descriptionElement).find('dd').text().trim());
release.tags = query.contents('.genres-list a[href*="/genre"]');
release.actors = $(actorsElement)
.find('a[href*="com/model"]')
.map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.studio = slugify(query.content('.genres-list a[href*="/studios"]'), '', { removePunctuation: true });
release.duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
release.tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
release.poster = query.img('.watch__video video', { attribute: 'data-poster' });
release.photoCount = query.number('.bi-eye');
const photos = useGallery
? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray()
: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray();
release.photos = photos.map((source) => {
// source without parameters sometimes serves larger preview photo
const { origin, pathname } = new URL(source);
return `${origin}${pathname}`;
});
const posterStyle = $('#player').attr('style');
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
release.poster = poster || release.photos.slice(Math.floor(release.photos.length / 3) * -1); // poster unavailable, try last 1/3rd of high res photos as fallback
if (data) {
const qualityMap = {
web: 240,
vga: 480,
hd: 720,
'1080p': 1080,
};
release.trailer = data.clip.qualities.map((trailer) => ({
src: trailer.src,
type: trailer.type,
quality: qualityMap[trailer.quality] || trailer.quality,
}));
}
const studioName = $('.watchpage-studioname').first().text().trim();
release.studio = slugify(studioName, '', { removePunctuation: true });
release.trailer = query.all('.watch__video source').map((el) => ({
src: unprint.query.video(el, null, { attribute: 'src' }),
quality: unprint.query.number(el, null, { attribute: 'size' }),
}));
return release;
}
async function scrapeProfile(html, _url, actorName) {
const { document } = new JSDOM(html).window;
function scrapeProfile({ query }, url, channel) {
const profile = { url };
const profile = {
name: actorName,
};
profile.nationality = query.content('.model__info a[href*="/nationality"]');
profile.age = query.number('//td[contains(text(), "Age")]/following-sibling::td');
const avatarEl = document.querySelector('.model--avatar img[src^="http"]');
const entries = Array.from(document.querySelectorAll('.model--description tr'), (el) => el.textContent.replace(/\n/g, '').split(':'));
profile.avatar = query.img('.model__left img');
const bio = entries
.filter((entry) => entry.length === 2) // ignore entries without ':' (About section, see Blanche Bradburry)
.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {});
profile.birthPlace = bio.Nationality;
if (bio.Age) profile.age = bio.Age;
if (avatarEl) profile.avatar = avatarEl.src;
profile.releases = scrapeAll(html);
profile.scenes = scrapeAll(unprint.initAll(query.all('.card-scene')), channel);
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await http.get(`${site.url}/new-videos/${page}`);
async function fetchLatest(channel, page) {
const res = await unprint.get(`${channel.url}/new-videos/${page}`, { selectAll: '.card-scene' });
return scrapeAll(res.body.toString(), site);
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
async function fetchScene(url, site) {
const useGallery = true;
async function getActorUrl(actor, channel) {
if (actor.url) {
return actor.url;
}
// TODO: fall back on screenshots when gallery is not available
const res = useGallery
? await http.get(`${url}/gallery#gallery`)
: await http.get(`${url}/screenshots#screenshots`);
const searchRes = await http.get(`${channel.url}/api/autocomplete/search?q=${slugify(actor.name, '+')}`);
return scrapeScene(res.body.toString(), url, site, useGallery);
}
if (!searchRes.ok) {
return searchRes.status;
}
async function fetchProfile({ name: actorName }) {
const res = await http.get(`https://www.analvids.com/api/autocomplete/search?q=${actorName.replace(' ', '+')}`);
const data = res.body;
const result = data.terms.find((item) => item.type === 'model');
const result = searchRes.body.terms.find((item) => item.type === 'model');
if (result) {
const bioRes = await http.get(result.url);
const html = bioRes.body.toString();
return scrapeProfile(html, result.url, actorName);
return result.url;
}
return null;
}
async function fetchProfile(actor, { channel }) {
const actorUrl = await getActorUrl(actor, channel);
if (typeof actorUrl !== 'string') {
return actorUrl;
}
const bioRes = await unprint.get(actorUrl);
if (bioRes.ok) {
return scrapeProfile(bioRes.context, actorUrl, channel);
}
return bioRes.status;
}
module.exports = {
fetchLatest,
scrapeScene: {
scraper: scrapeScene,
unprint: true,
},
fetchProfile,
fetchScene,
};

203
src/scrapers/analvids_legacy.js Executable file
View File

@ -0,0 +1,203 @@
'use strict';
const { JSDOM } = require('jsdom');
const cheerio = require('cheerio');
const moment = require('moment');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
function extractTitle(originalTitle) {
const titleComponents = originalTitle.split(' ');
// const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OTS|NF|NT|AX|RV|CM|BTG)\d+/); // detect studio prefixes
const sceneIdMatch = titleComponents.slice(-1)[0].match(/\w+\d+\s*$/); // detect studio prefixes
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
return { shootId, title };
}
function getPoster(posterElement, sceneId) {
const posterStyle = posterElement.attr('style');
if (posterStyle) {
return posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
}
const posterRange = posterElement.attr('data-casting');
const posterRangeData = posterRange ? JSON.parse(posterRange) : null;
const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)];
if (!posterTimeRange) {
return null;
}
if (typeof posterTimeRange === 'number') {
// poster time is already a single time value
return `https://analvids.com/casting/${sceneId}/${posterTimeRange}`;
}
const [max, min] = posterTimeRange.split('-');
const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min));
return `https://analvids.com/casting/${sceneId}/${posterTime}`;
}
function scrapeAll(html) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.thumbnails > div').toArray();
return scenesElements.map((element) => {
const sceneLinkElement = $(element).find('.thumbnail-title a');
const url = sceneLinkElement.attr('href');
const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
const { shootId, title } = extractTitle(originalTitle);
const entryId = new URL(url).pathname.split('/')[2];
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
const sceneId = $(element).attr('data-content');
const posterElement = $(element).find('.thumbnail-avatar');
const poster = getPoster(posterElement, sceneId);
return {
url,
shootId,
entryId,
title,
date,
poster,
};
});
}
async function scrapeScene(html, url, site, useGallery) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const playerObject = $('script:contains("new WatchPage")').html();
const playerData = playerObject && playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.lastIndexOf('},') + 1);
const data = playerData && JSON.parse(playerData);
const release = { url };
const originalTitle = $('h1.watchpage-title').text().trim();
const { shootId, title } = extractTitle(originalTitle);
release.shootId = shootId;
release.entryId = new URL(url).pathname.split('/')[2];
release.title = title;
release.date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
const [actorsElement, tagsElement, descriptionElement] = $('.scene-description__row').toArray();
release.description = $('meta[name="description"]')?.attr('content')?.trim()
|| (descriptionElement && $(descriptionElement).find('dd').text().trim());
release.actors = $(actorsElement)
.find('a[href*="com/model"]')
.map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
release.tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const photos = useGallery
? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray()
: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray();
release.photos = photos.map((source) => {
// source without parameters sometimes serves larger preview photo
const { origin, pathname } = new URL(source);
return `${origin}${pathname}`;
});
const posterStyle = $('#player').attr('style');
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
release.poster = poster || release.photos.slice(Math.floor(release.photos.length / 3) * -1); // poster unavailable, try last 1/3rd of high res photos as fallback
if (data) {
const qualityMap = {
web: 240,
vga: 480,
hd: 720,
'1080p': 1080,
};
release.trailer = data.clip.qualities.map((trailer) => ({
src: trailer.src,
type: trailer.type,
quality: qualityMap[trailer.quality] || trailer.quality,
}));
}
const studioName = $('.watchpage-studioname').first().text().trim();
release.studio = slugify(studioName, '', { removePunctuation: true });
return release;
}
async function scrapeProfile(html, _url, actorName) {
const { document } = new JSDOM(html).window;
const profile = {
name: actorName,
};
const avatarEl = document.querySelector('.model--avatar img[src^="http"]');
const entries = Array.from(document.querySelectorAll('.model--description tr'), (el) => el.textContent.replace(/\n/g, '').split(':'));
const bio = entries
.filter((entry) => entry.length === 2) // ignore entries without ':' (About section, see Blanche Bradburry)
.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {});
profile.birthPlace = bio.Nationality;
if (bio.Age) profile.age = bio.Age;
if (avatarEl) profile.avatar = avatarEl.src;
profile.releases = scrapeAll(html);
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await http.get(`${site.url}/new-videos/${page}`);
return scrapeAll(res.body.toString(), site);
}
async function fetchScene(url, site) {
const useGallery = true;
// TODO: fall back on screenshots when gallery is not available
const res = useGallery
? await http.get(`${url}/gallery#gallery`)
: await http.get(`${url}/screenshots#screenshots`);
return scrapeScene(res.body.toString(), url, site, useGallery);
}
async function fetchProfile({ name: actorName }) {
const res = await http.get(`https://www.analvids.com/api/autocomplete/search?q=${actorName.replace(' ', '+')}`);
const data = res.body;
const result = data.terms.find((item) => item.type === 'model');
if (result) {
const bioRes = await http.get(result.url);
const html = bioRes.body.toString();
return scrapeProfile(html, result.url, actorName);
}
return null;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
};