'use strict'; const moment = require('moment'); const qu = require('../utils/q'); const slugify = require('../utils/slugify'); const { feetInchesToCm, lbsToKg } = require('../utils/convert'); function scrapeAll(scenes, channel) { return scenes.map(({ query }) => { const release = {}; release.url = query.url('a', 'href', { origin: channel.url }); // release.entryId = new URL(release.url).pathname.match(/\/Collection\/(\d+)/)[1]; can't be matched with upcoming scenes release.shootId = query.cnt('a span:nth-of-type(1)').match(/^\d+/)?.[0]; release.entryId = release.shootId; release.date = query.date('a span:nth-of-type(2)', 'YYYY-MM-DD'); release.actors = (query.q('a img', 'alt') || query.cnt('a span:nth-of-type(1)'))?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g); release.poster = release.shootId ? `https://inthecrack.com/assets/images/posters/collections/${release.shootId}.jpg` : query.img('a img', 'src', { origin: channel.url }); return release; }); } function scrapeUpcoming(scenes, channel) { return scenes.map(({ query }) => { const release = {}; const title = query.cnt('span'); release.entryId = title.match(/^\d+/)[0]; release.actors = title.slice(0, title.indexOf('-')).match(/[a-zA-Z]+(\s[a-zA-Z]+)*/g); const date = moment.utc(title.match(/\w+ \d+\w+$/)[0], 'MMM Do'); if (date.isBefore()) { // date is next year release.date = date.add(1, 'year').toDate(); } else { release.date = date.toDate(); } release.poster = [ `https://inthecrack.com/assets/images/posters/collections/${release.entryId}.jpg`, query.img('img', 'src', { origin: channel.url }), ]; return release; }); } function scrapeProfileScenes(items, actorName, channel) { return items.map(({ query }) => { const release = {}; if (slugify(query.cnt()) === 'no-other-collections') { return null; } const details = query.cnts('figure p').reduce((acc, info) => { const [key, value] = info.split(':'); return { ...acc, [slugify(key, '_')]: value?.trim(), }; }, {}); release.url = query.url('a', 'href', { origin: channel.url }); release.shootId = details.collection.match(/\d+/)[0]; release.entryId = release.shootId; release.date = qu.parseDate(details.release_date, 'YYYY-MM-DD'); release.actors = [actorName]; /* rely on clip length const durationString = Object.keys(details).find(info => /\d+_min_video/.test(info)); release.duration = durationString && Number(durationString.match(/^\d+/)?.[0]) * 60; */ release.productionLocation = details.shoot_location; release.poster = [ `https://inthecrack.com/assets/images/posters/collections/${release.entryId}.jpg`, query.img('img', 'src', { origin: channel.url }), ]; return release; }).filter(Boolean); } function scrapeProfile({ query }, actorName, actorAvatar, channel, releasesFromScene) { const profile = {}; const bio = query.cnts(releasesFromScene ? 'ul li' : 'div.modelInfo li').reduce((acc, info) => { const [key, value] = info.split(':'); return { ...acc, [slugify(key, '_')]: value.trim(), }; }, {}); profile.name = actorName || bio.name; profile.gender = 'female'; profile.birthPlace = bio.nationality; if (bio.height) profile.height = feetInchesToCm(bio.height); if (bio.weight) profile.weight = lbsToKg(bio.weight); profile.releases = releasesFromScene?.[profile.name] || scrapeProfileScenes(qu.initAll(query.all('.Models li')), actorName, channel); // avatar is the poster of a scene, find scene and use its high quality poster instead const avatarRelease = profile.releases.find((release) => new URL(release.poster[1]).pathname === new URL(actorAvatar).pathname); profile.avatar = avatarRelease?.poster[0]; return profile; } async function fetchSceneActors(entryId, _release, channel) { const url = `https://inthecrack.com/Collection/Biography/${entryId}`; const res = await qu.get(url); if (res.ok) { const actorTabs = qu.initAll(res.item.query.all('#ModelTabs li')).map(({ query }) => ({ name: query.cnt('a'), id: query.q('a', 'data-model'), })); const actorReleasesByActorName = actorTabs.reduce((acc, { name, id }) => { const releaseEls = qu.initAll(res.item.query.all(`#Model-${id} li`)); const releases = scrapeProfileScenes(releaseEls, name, channel); return { ...acc, [name]: releases, }; }, {}); const actors = qu.initAll(res.item.query.all('.modelInfo > li')).map((item) => { const avatar = item.query.img('img', 'src', { origin: channel.url }); const profile = scrapeProfile(item, null, avatar, channel, actorReleasesByActorName); return profile; }); return actors; } return null; } async function scrapeScene({ query, html }, url, channel) { const release = {}; const entryId = new URL(url).pathname.match(/\/Collection\/(\d+)/)[1]; release.shootId = query.cnt('h2 span').match(/^\d+/)?.[0]; release.entryId = release.shootId; // site entry ID can't be matched with upcoming scenes const actors = await fetchSceneActors(entryId, release, channel); release.actors = actors || query.cnt('h2 span')?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g); release.description = query.cnt('p#CollectionDescription'); release.productionLocation = query.cnt('.modelCollectionHeader p')?.match(/Shoot Location: (.*)/)?.[1]; release.poster = qu.prefixUrl(html.match(/background-image: url\('(.*)'\)/)?.[1], channel.url); release.chapters = query.all('.ClipOuter').map((el) => { const chapter = {}; chapter.title = query.text(el, 'h4'); chapter.description = query.cnt(el, 'p'); chapter.duration = query.dur(el, '.InlineDuration'); const posterStyle = query.style(el, '.clipImage', 'background-image'); const poster = qu.prefixUrl(posterStyle.match(/url\((.*)\)/)?.[1], channel.url); if (poster) { const { origin, pathname } = new URL(poster); chapter.poster = [ `${origin}${pathname}`, // full size poster, ]; } if (query.exists(el, '.ThreeDInfo')) { chapter.tags = ['3d']; } return chapter; }); return release; } async function fetchLatest(channel, page = 1) { const year = moment().subtract(page - 1, ' year').year(); const url = `${channel.url}/Collections/Date/${year}`; const res = await qu.getAll(url, '.collectionGridLayout li'); if (res.ok) { return scrapeAll(res.items, channel); } return res.status; } async function fetchUpcoming(channel) { const res = await qu.getAll(channel.url, '#ComingSoon li'); if (res.ok) { return scrapeUpcoming(res.items, channel); } return res.status; } async function fetchScene(url, channel) { const res = await qu.get(url); if (res.ok) { return scrapeScene(res.item, url, channel); } return res.status; } async function fetchProfile({ name: actorName }, channel, _include) { const firstLetter = actorName.charAt(0).toUpperCase(); const url = `${channel.url}/Collections/Name/${firstLetter}`; const res = await qu.getAll(url, '.collectionGridLayout li'); if (res.ok) { const actorItem = res.items.find(({ query }) => slugify(query.cnt('span')) === slugify(actorName)); if (actorItem) { const actorUrl = actorItem.query.url('a', 'href', { origin: channel.url }); const actorAvatar = actorItem.query.img('img', 'src', { origin: channel.url }); const actorRes = await qu.get(actorUrl); if (actorRes.ok) { return scrapeProfile(actorRes.item, actorName, actorAvatar, channel); } return actorRes.status; } return null; } return res.status; } module.exports = { fetchLatest, fetchUpcoming, fetchScene, fetchProfile, };