|
|
|
|
@ -1,268 +1,251 @@
|
|
|
|
|
'use strict';
|
|
|
|
|
|
|
|
|
|
const moment = require('moment');
|
|
|
|
|
const unprint = require('unprint');
|
|
|
|
|
|
|
|
|
|
const qu = require('../utils/q');
|
|
|
|
|
const slugify = require('../utils/slugify');
|
|
|
|
|
const { feetInchesToCm, lbsToKg } = require('../utils/convert');
|
|
|
|
|
|
|
|
|
|
function scrapeAll(scenes, channel) {
|
|
|
|
|
return scenes.map(({ query }) => {
|
|
|
|
|
const release = {};
|
|
|
|
|
function scrapeProfile(model, channel) {
|
|
|
|
|
const profile = {};
|
|
|
|
|
|
|
|
|
|
release.url = query.url('a', 'href', { origin: channel.url });
|
|
|
|
|
// release.entryId = new URL(release.url).pathname.match(/\/Collection\/(\d+)/)[1]; can't be matched with upcoming scenes
|
|
|
|
|
profile.name = model.name; // used by shallow scrape
|
|
|
|
|
profile.entryId = model.id;
|
|
|
|
|
|
|
|
|
|
release.shootId = query.cnt('a span:nth-of-type(1)').match(/^\d+/)?.[0];
|
|
|
|
|
release.entryId = release.shootId;
|
|
|
|
|
profile.dateOfBirth = unprint.extractDate(model.birthdate, 'YYYY-MM-DD');
|
|
|
|
|
|
|
|
|
|
release.date = query.date('a span:nth-of-type(2)', 'YYYY-MM-DD');
|
|
|
|
|
release.actors = (query.q('a img', 'alt') || query.cnt('a span:nth-of-type(1)'))?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g);
|
|
|
|
|
|
|
|
|
|
release.poster = release.shootId
|
|
|
|
|
? `https://inthecrack.com/assets/images/posters/collections/${release.shootId}.jpg`
|
|
|
|
|
: query.img('a img', 'src', { origin: channel.url });
|
|
|
|
|
|
|
|
|
|
return release;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function scrapeUpcoming(scenes, channel) {
|
|
|
|
|
return scenes.map(({ query }) => {
|
|
|
|
|
const release = {};
|
|
|
|
|
|
|
|
|
|
const title = query.cnt('span');
|
|
|
|
|
|
|
|
|
|
release.entryId = title.match(/^\d+/)[0];
|
|
|
|
|
release.actors = title.slice(0, title.indexOf('-')).match(/[a-zA-Z]+(\s[a-zA-Z]+)*/g);
|
|
|
|
|
|
|
|
|
|
const date = moment.utc(title.match(/\w+ \d+\w+$/)[0], 'MMM Do');
|
|
|
|
|
|
|
|
|
|
if (date.isBefore()) {
|
|
|
|
|
// date is next year
|
|
|
|
|
release.date = date.add(1, 'year').toDate();
|
|
|
|
|
} else {
|
|
|
|
|
release.date = date.toDate();
|
|
|
|
|
profile.birthPlace = model.countries?.map((country) => {
|
|
|
|
|
if (country.name) {
|
|
|
|
|
return country.name;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
release.poster = [
|
|
|
|
|
`https://inthecrack.com/assets/images/posters/collections/${release.entryId}.jpg`,
|
|
|
|
|
query.img('img', 'src', { origin: channel.url }),
|
|
|
|
|
];
|
|
|
|
|
if (country.isO2 || country.iso2) { // sic
|
|
|
|
|
return country.isO2 || country.iso2;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return release;
|
|
|
|
|
});
|
|
|
|
|
if (typeof country === 'string') {
|
|
|
|
|
return country;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return null;
|
|
|
|
|
}).filter(Boolean)[0];
|
|
|
|
|
|
|
|
|
|
profile.height = model.height;
|
|
|
|
|
profile.weight = model.weight;
|
|
|
|
|
|
|
|
|
|
const ethnicity = model.ethnicity?.title || model.ethnicity;
|
|
|
|
|
|
|
|
|
|
if (!/none/i.test(ethnicity)) {
|
|
|
|
|
profile.ethnicity = ethnicity;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (model.id) {
|
|
|
|
|
profile.url = `${channel.origin}/modelcollections/${model.id}`;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return profile;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function scrapeProfileScenes(items, actorName, channel) {
|
|
|
|
|
return items.map(({ query }) => {
|
|
|
|
|
const release = {};
|
|
|
|
|
function mergeModels(sceneModels, models, channel) {
|
|
|
|
|
if (!Array.isArray(sceneModels) || !models) {
|
|
|
|
|
return [];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (slugify(query.cnt()) === 'no-other-collections') {
|
|
|
|
|
return sceneModels.map((modelId) => {
|
|
|
|
|
const model = models[modelId?.id || modelId];
|
|
|
|
|
|
|
|
|
|
if (!model) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const details = query.cnts('figure p').reduce((acc, info) => {
|
|
|
|
|
const [key, value] = info.split(':');
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
...acc,
|
|
|
|
|
[slugify(key, '_')]: value?.trim(),
|
|
|
|
|
};
|
|
|
|
|
}, {});
|
|
|
|
|
|
|
|
|
|
release.url = query.url('a', 'href', { origin: channel.url });
|
|
|
|
|
|
|
|
|
|
release.shootId = details.collection.match(/\d+/)[0];
|
|
|
|
|
release.entryId = release.shootId;
|
|
|
|
|
|
|
|
|
|
release.date = qu.parseDate(details.release_date, 'YYYY-MM-DD');
|
|
|
|
|
release.actors = [actorName];
|
|
|
|
|
|
|
|
|
|
/* rely on clip length
|
|
|
|
|
const durationString = Object.keys(details).find(info => /\d+_min_video/.test(info));
|
|
|
|
|
release.duration = durationString && Number(durationString.match(/^\d+/)?.[0]) * 60;
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
release.productionLocation = details.shoot_location;
|
|
|
|
|
|
|
|
|
|
release.poster = [
|
|
|
|
|
`https://inthecrack.com/assets/images/posters/collections/${release.entryId}.jpg`,
|
|
|
|
|
query.img('img', 'src', { origin: channel.url }),
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
return release;
|
|
|
|
|
return scrapeProfile(model, channel);
|
|
|
|
|
}).filter(Boolean);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function scrapeProfile({ query }, actorName, actorAvatar, channel, releasesFromScene) {
|
|
|
|
|
const profile = {};
|
|
|
|
|
|
|
|
|
|
const bio = query.cnts(releasesFromScene ? 'ul li' : 'div.modelInfo li').reduce((acc, info) => {
|
|
|
|
|
const [key, value] = info.split(':');
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
...acc,
|
|
|
|
|
[slugify(key, '_')]: value.trim(),
|
|
|
|
|
};
|
|
|
|
|
}, {});
|
|
|
|
|
|
|
|
|
|
profile.name = actorName || bio.name;
|
|
|
|
|
profile.gender = 'female';
|
|
|
|
|
profile.birthPlace = bio.nationality;
|
|
|
|
|
|
|
|
|
|
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
|
|
|
|
if (bio.weight) profile.weight = lbsToKg(bio.weight);
|
|
|
|
|
|
|
|
|
|
profile.releases = releasesFromScene?.[profile.name] || scrapeProfileScenes(qu.initAll(query.all('.Models li')), actorName, channel);
|
|
|
|
|
|
|
|
|
|
// avatar is the poster of a scene, find scene and use its high quality poster instead
|
|
|
|
|
const avatarRelease = profile.releases.find((release) => new URL(release.poster[1]).pathname === new URL(actorAvatar).pathname);
|
|
|
|
|
profile.avatar = avatarRelease?.poster[0];
|
|
|
|
|
|
|
|
|
|
return profile;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchSceneActors(entryId, _release, channel) {
|
|
|
|
|
const url = `https://inthecrack.com/Collection/Biography/${entryId}`;
|
|
|
|
|
const res = await qu.get(url);
|
|
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
|
const actorTabs = qu.initAll(res.item.query.all('#ModelTabs li')).map(({ query }) => ({
|
|
|
|
|
name: query.cnt('a'),
|
|
|
|
|
id: query.q('a', 'data-model'),
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
const actorReleasesByActorName = actorTabs.reduce((acc, { name, id }) => {
|
|
|
|
|
const releaseEls = qu.initAll(res.item.query.all(`#Model-${id} li`));
|
|
|
|
|
const releases = scrapeProfileScenes(releaseEls, name, channel);
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
...acc,
|
|
|
|
|
[name]: releases,
|
|
|
|
|
};
|
|
|
|
|
}, {});
|
|
|
|
|
|
|
|
|
|
const actors = qu.initAll(res.item.query.all('.modelInfo > li')).map((item) => {
|
|
|
|
|
const avatar = item.query.img('img', 'src', { origin: channel.url });
|
|
|
|
|
const profile = scrapeProfile(item, null, avatar, channel, actorReleasesByActorName);
|
|
|
|
|
|
|
|
|
|
return profile;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return actors;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function scrapeScene({ query, html }, url, channel) {
|
|
|
|
|
function scrapeAll(scenes, channel, models = {}, isUpcoming = false) {
|
|
|
|
|
return scenes.map((scene) => {
|
|
|
|
|
const release = {};
|
|
|
|
|
|
|
|
|
|
const entryId = new URL(url).pathname.match(/\/Collection\/(\d+)/)[1];
|
|
|
|
|
release.entryId = scene.id;
|
|
|
|
|
release.shootId = scene.id;
|
|
|
|
|
|
|
|
|
|
release.shootId = query.cnt('h2 span').match(/^\d+/)?.[0];
|
|
|
|
|
release.entryId = release.shootId; // site entry ID can't be matched with upcoming scenes
|
|
|
|
|
release.title = scene.title;
|
|
|
|
|
release.date = unprint.extractDate(scene.releaseDate, 'YYYY-MM-DD');
|
|
|
|
|
|
|
|
|
|
const actors = await fetchSceneActors(entryId, release, channel);
|
|
|
|
|
release.actors = actors || query.cnt('h2 span')?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g);
|
|
|
|
|
release.poster = `https://api.inthecrack.com/image/resize/images/posters/collections/${scene.id}.jpg?w=1400`;
|
|
|
|
|
|
|
|
|
|
release.description = query.cnt('p#CollectionDescription');
|
|
|
|
|
release.productionLocation = query.cnt('.modelCollectionHeader p')?.match(/Shoot Location: (.*)/)?.[1];
|
|
|
|
|
// coming soon photo remains available after release date
|
|
|
|
|
release.photos = [`https://api.inthecrack.com/FileStore/images/coming_soon/${scene.id}.jpg`];
|
|
|
|
|
|
|
|
|
|
release.poster = qu.prefixUrl(html.match(/background-image: url\('(.*)'\)/)?.[1], channel.url);
|
|
|
|
|
|
|
|
|
|
release.chapters = query.all('.ClipOuter').map((el) => {
|
|
|
|
|
const chapter = {};
|
|
|
|
|
|
|
|
|
|
chapter.title = query.text(el, 'h4');
|
|
|
|
|
chapter.description = query.cnt(el, 'p');
|
|
|
|
|
chapter.duration = query.dur(el, '.InlineDuration');
|
|
|
|
|
|
|
|
|
|
const posterStyle = query.style(el, '.clipImage', 'background-image');
|
|
|
|
|
const poster = qu.prefixUrl(posterStyle.match(/url\((.*)\)/)?.[1], channel.url);
|
|
|
|
|
|
|
|
|
|
if (poster) {
|
|
|
|
|
const { origin, pathname } = new URL(poster);
|
|
|
|
|
|
|
|
|
|
chapter.poster = [
|
|
|
|
|
`${origin}${pathname}`, // full size
|
|
|
|
|
poster,
|
|
|
|
|
];
|
|
|
|
|
if (isUpcoming) {
|
|
|
|
|
return release;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (query.exists(el, '.ThreeDInfo')) {
|
|
|
|
|
chapter.tags = ['3d'];
|
|
|
|
|
}
|
|
|
|
|
release.url = `${channel.origin}/collection/${scene.id}`;
|
|
|
|
|
|
|
|
|
|
return chapter;
|
|
|
|
|
});
|
|
|
|
|
release.duration = scene.clipMinutesTotal * 60 || null;
|
|
|
|
|
release.actors = mergeModels(scene.models, models, channel);
|
|
|
|
|
|
|
|
|
|
release.productionDate = unprint.extractDate(scene.shootDate, 'YYYY-MM-DD');
|
|
|
|
|
release.photoCount = scene.picTotal;
|
|
|
|
|
|
|
|
|
|
release.productionLocation = scene.shootLocation;
|
|
|
|
|
|
|
|
|
|
return release;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchLatest(channel, page = 1) {
|
|
|
|
|
const year = moment().subtract(page - 1, ' year').year();
|
|
|
|
|
|
|
|
|
|
const url = `${channel.url}/Collections/Date/${year}`;
|
|
|
|
|
const res = await qu.getAll(url, '.collectionGridLayout li');
|
|
|
|
|
async function fetchLatest(channel, page, context) {
|
|
|
|
|
const res = await unprint.get('https://api.inthecrack.com/Collection/');
|
|
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
|
return scrapeAll(res.items, channel);
|
|
|
|
|
// API has no pagination, simulate so it doesn't blow up the rest of the guts
|
|
|
|
|
return scrapeAll(res.data.slice((page - 1) * 100, page * 100), channel, context.beforeFetchLatest);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res.status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchUpcoming(channel) {
|
|
|
|
|
const res = await qu.getAll(channel.url, '#ComingSoon li');
|
|
|
|
|
const res = await unprint.get('https://api.inthecrack.com/Home/coming_soon');
|
|
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
|
return scrapeUpcoming(res.items, channel);
|
|
|
|
|
// API has no pagination, simulate so it doesn't blow up the rest of the guts
|
|
|
|
|
return scrapeAll(res.data, channel, null, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res.status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchScene(url, channel) {
|
|
|
|
|
const res = await qu.get(url);
|
|
|
|
|
const qualityMap = {
|
|
|
|
|
// unsnure about 2 and 5
|
|
|
|
|
1: 360,
|
|
|
|
|
3: 720,
|
|
|
|
|
4: 1080,
|
|
|
|
|
6: 2160,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
|
return scrapeScene(res.item, url, channel);
|
|
|
|
|
function scrapeScene(scene, channel, baseRelease, models = {}) {
|
|
|
|
|
const release = {};
|
|
|
|
|
|
|
|
|
|
release.entryId = scene.id;
|
|
|
|
|
release.shootId = scene.id;
|
|
|
|
|
|
|
|
|
|
release.url = `${channel.origin}/collection/${scene.id}`;
|
|
|
|
|
|
|
|
|
|
release.title = scene.title;
|
|
|
|
|
release.description = scene.description;
|
|
|
|
|
|
|
|
|
|
release.actors = mergeModels(scene.models, models, channel);
|
|
|
|
|
|
|
|
|
|
release.productionDate = unprint.extractDate(scene.shootDate, 'YYYY-MM-DD');
|
|
|
|
|
release.productionLocation = scene.shootLocation;
|
|
|
|
|
|
|
|
|
|
release.poster = `https://api.inthecrack.com/image/resize/images/posters/collections/${scene.id}.jpg?w=1400`;
|
|
|
|
|
|
|
|
|
|
release.photos = scene.galleryImages
|
|
|
|
|
?.filter((image) => image.imageType === 1) // type 1 and 2 are dupes as far as thumbs are concerned
|
|
|
|
|
.slice(0, 15) // only first 15 photos have a free thumb
|
|
|
|
|
.map((image) => image.filename && `https://api.inthecrack.com/FileStore/images/gallerysamples/${scene.id}/${image.filename}`).filter(Boolean);
|
|
|
|
|
|
|
|
|
|
release.chapters = scene.clips?.map((clip) => ({
|
|
|
|
|
entryId: clip.id,
|
|
|
|
|
title: clip.title,
|
|
|
|
|
description: clip.description,
|
|
|
|
|
date: unprint.extractDate(clip.releaseDate, 'YYYY-MM-DD'),
|
|
|
|
|
duration: clip.length,
|
|
|
|
|
// this is how the site itself renders the thumbnails, I shit you not. does not return valid image without ?w parameter
|
|
|
|
|
poster: `https://api.inthecrack.com/image/resize/images/posters/clips/${clip.videos?.[0]?.filename.match(/^(.*?)(?=\d+x\d+\.mp4)/)[0]}.jpg?w=1400`,
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
release.qualities = scene.clips?.[0]?.videos?.map((video) => qualityMap[video.videoResolutionId]).filter(Boolean);
|
|
|
|
|
|
|
|
|
|
if (!baseRelease.date) {
|
|
|
|
|
// base release has 'official' release date, deep data only has chapter dates
|
|
|
|
|
// though, this is probably how they calculate the collection date, too
|
|
|
|
|
release.date = release.chapters
|
|
|
|
|
?.map((chapter) => chapter.date)
|
|
|
|
|
.filter(Boolean)
|
|
|
|
|
.toSorted((dateA, dateB) => dateA - dateB)[0];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res.status;
|
|
|
|
|
return release;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchProfile({ name: actorName }, channel, _include) {
|
|
|
|
|
const firstLetter = actorName.charAt(0).toUpperCase();
|
|
|
|
|
const url = `${channel.url}/Collections/Name/${firstLetter}`;
|
|
|
|
|
const res = await qu.getAll(url, '.collectionGridLayout li');
|
|
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
|
const actorItem = res.items.find(({ query }) => slugify(query.cnt('span')) === slugify(actorName));
|
|
|
|
|
|
|
|
|
|
if (actorItem) {
|
|
|
|
|
const actorUrl = actorItem.query.url('a', 'href', { origin: channel.url });
|
|
|
|
|
const actorAvatar = actorItem.query.img('img', 'src', { origin: channel.url });
|
|
|
|
|
const actorRes = await qu.get(actorUrl);
|
|
|
|
|
|
|
|
|
|
if (actorRes.ok) {
|
|
|
|
|
return scrapeProfile(actorRes.item, actorName, actorAvatar, channel);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return actorRes.status;
|
|
|
|
|
}
|
|
|
|
|
async function fetchScene(url, channel, baseRelease, context) {
|
|
|
|
|
const entryId = new URL(url).pathname.match(/\/collection\/(\d+)/)?.[1];
|
|
|
|
|
|
|
|
|
|
if (!entryId) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const res = await unprint.get(`https://api.inthecrack.com/Collection/${entryId}`);
|
|
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
|
return scrapeScene(res.data, channel, baseRelease, context.beforeFetchScenes);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res.status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchModels() {
|
|
|
|
|
const res = await unprint.get('https://api.inthecrack.com/Model/');
|
|
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
|
try {
|
|
|
|
|
const modelsById = Object.fromEntries(res.data.map((model) => [model.id, model]));
|
|
|
|
|
|
|
|
|
|
return modelsById;
|
|
|
|
|
} catch (error) {
|
|
|
|
|
// we can continue, we just won't have model names
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return {};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function getModelId(actor) {
|
|
|
|
|
if (actor.entryId) {
|
|
|
|
|
return actor.entryId;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (actor.url) {
|
|
|
|
|
const modelId = new URL(actor.url).pathname.match(/\/modelcollection\/(\d+)/)?.[1];
|
|
|
|
|
|
|
|
|
|
if (modelId) {
|
|
|
|
|
return modelId;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const modelsById = await fetchModels();
|
|
|
|
|
const model = Object.values(modelsById).find((searchModel) => slugify(searchModel.name) === slugify(actor.name));
|
|
|
|
|
|
|
|
|
|
if (model) {
|
|
|
|
|
return model.id;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchProfile(actor, channel) {
|
|
|
|
|
const modelId = await getModelId(actor);
|
|
|
|
|
|
|
|
|
|
if (!modelId) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const res = await unprint.get(`https://api.inthecrack.com/Model/${modelId}`);
|
|
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
|
return scrapeProfile(res.data, channel);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
module.exports = {
|
|
|
|
|
fetchLatest,
|
|
|
|
|
fetchUpcoming,
|
|
|
|
|
fetchScene,
|
|
|
|
|
fetchProfile,
|
|
|
|
|
beforeFetchLatest: fetchModels,
|
|
|
|
|
beforeFetchScenes: fetchModels,
|
|
|
|
|
};
|
|
|
|
|
|