
269 lines
7.4 KiB

'use strict';
const moment = require('moment');
const qu = require('../utils/q');
const slugify = require('../utils/slugify');
const { feetInchesToCm, lbsToKg } = require('../utils/convert');
function scrapeAll(scenes, channel) {
return{ query }) => {
const release = {};
release.url = query.url('a', 'href', { origin: channel.url });
// release.entryId = new URL(release.url).pathname.match(/\/Collection\/(\d+)/)[1]; can't be matched with upcoming scenes
release.shootId = query.cnt('a span:nth-of-type(1)').match(/^\d+/)?.[0];
release.entryId = release.shootId; ='a span:nth-of-type(2)', 'YYYY-MM-DD');
release.actors = (query.q('a img', 'alt') || query.cnt('a span:nth-of-type(1)'))?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g);
release.poster = release.shootId
? `${release.shootId}.jpg`
: query.img('a img', 'src', { origin: channel.url });
return release;
function scrapeUpcoming(scenes, channel) {
return{ query }) => {
const release = {};
const title = query.cnt('span');
release.entryId = title.match(/^\d+/)[0];
release.actors = title.slice(0, title.indexOf('-')).match(/[a-zA-Z]+(\s[a-zA-Z]+)*/g);
const date = moment.utc(title.match(/\w+ \d+\w+$/)[0], 'MMM Do');
if (date.isBefore()) {
// date is next year = date.add(1, 'year').toDate();
} else { = date.toDate();
release.poster = [
query.img('img', 'src', { origin: channel.url }),
return release;
function scrapeProfileScenes(items, actorName, channel) {
return{ query }) => {
const release = {};
if (slugify(query.cnt()) === 'no-other-collections') {
return null;
const details = query.cnts('figure p').reduce((acc, info) => {
const [key, value] = info.split(':');
return {
[slugify(key, '_')]: value?.trim(),
}, {});
release.url = query.url('a', 'href', { origin: channel.url });
release.shootId = details.collection.match(/\d+/)[0];
release.entryId = release.shootId; = qu.parseDate(details.release_date, 'YYYY-MM-DD');
release.actors = [actorName];
/* rely on clip length
const durationString = Object.keys(details).find(info => /\d+_min_video/.test(info));
release.duration = durationString && Number(durationString.match(/^\d+/)?.[0]) * 60;
release.productionLocation = details.shoot_location;
release.poster = [
query.img('img', 'src', { origin: channel.url }),
return release;
function scrapeProfile({ query }, actorName, actorAvatar, channel, releasesFromScene) {
const profile = {};
const bio = query.cnts(releasesFromScene ? 'ul li' : 'div.modelInfo li').reduce((acc, info) => {
const [key, value] = info.split(':');
return {
[slugify(key, '_')]: value.trim(),
}, {}); = actorName ||;
profile.gender = 'female';
profile.birthPlace = bio.nationality;
if (bio.height) profile.height = feetInchesToCm(bio.height);
if (bio.weight) profile.weight = lbsToKg(bio.weight);
profile.releases = releasesFromScene?.[] || scrapeProfileScenes(qu.initAll(query.all('.Models li')), actorName, channel);
// avatar is the poster of a scene, find scene and use its high quality poster instead
const avatarRelease = profile.releases.find(release => new URL(release.poster[1]).pathname === new URL(actorAvatar).pathname);
profile.avatar = avatarRelease?.poster[0];
return profile;
async function fetchSceneActors(entryId, _release, channel) {
const url = `${entryId}`;
const res = await qu.get(url);
if (res.ok) {
const actorTabs = qu.initAll(res.item.query.all('#ModelTabs li')).map(({ query }) => ({
name: query.cnt('a'),
id: query.q('a', 'data-model'),
const actorReleasesByActorName = actorTabs.reduce((acc, { name, id }) => {
const releaseEls = qu.initAll(res.item.query.all(`#Model-${id} li`));
const releases = scrapeProfileScenes(releaseEls, name, channel);
return {
[name]: releases,
}, {});
const actors = qu.initAll(res.item.query.all('.modelInfo > li')).map((item) => {
const avatar = item.query.img('img', 'src', { origin: channel.url });
const profile = scrapeProfile(item, null, avatar, channel, actorReleasesByActorName);
return profile;
return actors;
return null;
async function scrapeScene({ query, html }, url, channel) {
const release = {};
const entryId = new URL(url).pathname.match(/\/Collection\/(\d+)/)[1];
release.shootId = query.cnt('h2 span').match(/^\d+/)?.[0];
release.entryId = release.shootId; // site entry ID can't be matched with upcoming scenes
const actors = await fetchSceneActors(entryId, release, channel);
release.actors = actors || query.cnt('h2 span')?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g);
release.description = query.cnt('p#CollectionDescription');
release.productionLocation = query.cnt('.modelCollectionHeader p')?.match(/Shoot Location: (.*)/)?.[1];
release.poster = qu.prefixUrl(html.match(/background-image: url\('(.*)'\)/)?.[1], channel.url);
release.chapters = query.all('.ClipOuter').map((el) => {
const chapter = {};
chapter.title = query.text(el, 'h4');
chapter.description = query.cnt(el, 'p');
chapter.duration = query.dur(el, '.InlineDuration');
const posterStyle =, '.clipImage', 'background-image');
const poster = qu.prefixUrl(posterStyle.match(/url\((.*)\)/)?.[1], channel.url);
if (poster) {
const { origin, pathname } = new URL(poster);
chapter.poster = [
`${origin}${pathname}`, // full size
if (query.exists(el, '.ThreeDInfo')) {
chapter.tags = ['3d'];
return chapter;
return release;
async function fetchLatest(channel, page = 1) {
const year = moment().subtract(page - 1, ' year').year();
const url = `${channel.url}/Collections/Date/${year}`;
const res = await qu.getAll(url, '.collectionGridLayout li');
if (res.ok) {
return scrapeAll(res.items, channel);
return res.status;
async function fetchUpcoming(channel) {
const res = await qu.getAll(channel.url, '#ComingSoon li');
if (res.ok) {
return scrapeUpcoming(res.items, channel);
return res.status;
async function fetchScene(url, channel) {
const res = await qu.get(url);
if (res.ok) {
return scrapeScene(res.item, url, channel);
return res.status;
async function fetchProfile({ name: actorName }, channel, _include) {
const firstLetter = actorName.charAt(0).toUpperCase();
const url = `${channel.url}/Collections/Name/${firstLetter}`;
const res = await qu.getAll(url, '.collectionGridLayout li');
if (res.ok) {
const actorItem = res.items.find(({ query }) => slugify(query.cnt('span')) === slugify(actorName));
if (actorItem) {
const actorUrl = actorItem.query.url('a', 'href', { origin: channel.url });
const actorAvatar = actorItem.query.img('img', 'src', { origin: channel.url });
const actorRes = await qu.get(actorUrl);
if (actorRes.ok) {
return scrapeProfile(actorRes.item, actorName, actorAvatar, channel);
return actorRes.status;
return null;
return res.status;
module.exports = {