diff --git a/assets/components/home/home.vue b/assets/components/home/home.vue index 3aee84d6..14fb0e2c 100644 --- a/assets/components/home/home.vue +++ b/assets/components/home/home.vue @@ -82,8 +82,8 @@ export default { diff --git a/assets/components/releases/clips.vue b/assets/components/releases/clips.vue new file mode 100644 index 00000000..6e683560 --- /dev/null +++ b/assets/components/releases/clips.vue @@ -0,0 +1,126 @@ + + + + + diff --git a/assets/components/releases/details.vue b/assets/components/releases/details.vue index 5f2a0a11..561e481c 100644 --- a/assets/components/releases/details.vue +++ b/assets/components/releases/details.vue @@ -3,7 +3,6 @@
- {{ formatDate(release.date, 'MMM D, YYYY', release.datePrecision) }} - {{ formatDate(release.date, 'MMMM D, YYYY', release.datePrecision) }} + {{ release.date ? formatDate(release.date, 'MMM D, YYYY', release.datePrecision) : 'Date N/A' }} + {{ release.date ? formatDate(release.date, 'MMMM D, YYYY', release.datePrecision) : 'Date unknown' }} chapter.poster) : []); + const clipPostersById = (this.release.clips || []).reduce((acc, clip) => ({ ...acc, [clip.poster.id]: clip.poster }), {}); + const uniqueClipPosters = Array.from(new Set(this.release.clips.map(clip => clip.poster.id) || [])).map(posterId => clipPostersById[posterId]); + const photosWithClipPosters = (this.release.photos || []).concat(uniqueClipPosters); if (this.release.trailer || this.release.teaser) { // poster will be on trailer video - return photosWithChapterPosters; + return photosWithClipPosters; } if (this.release.poster) { // no trailer, add poster to photos - return [this.release.poster].concat(this.release.photos).concat(photosWithChapterPosters); + return [this.release.poster].concat(this.release.photos).concat(photosWithClipPosters); } // no poster available - return photosWithChapterPosters; + return photosWithClipPosters; } export default { diff --git a/assets/components/releases/scene-tile.vue b/assets/components/releases/scene-tile.vue index 5fd53b39..4540f91b 100644 --- a/assets/components/releases/scene-tile.vue +++ b/assets/components/releases/scene-tile.vue @@ -87,10 +87,17 @@
- {{ release.shootId }} + + {{ release.shootId }}
    {{ release.description }}

-
-
Duration -
- {{ Math.floor(release.duration / 3600).toString().padStart(2, '0') }}: - {{ Math.floor((release.duration % 3600) / 60).toString().padStart(2, '0') }}: - {{ (release.duration % 60).toString().padStart(2, '0') }} -
+
{{ formatDuration(release.duration) }}
+
+ Clips + + +
+
curateRelease(scene)); if (release.movies) curatedRelease.movies = release.movies.map(({ movie }) => curateRelease(movie)); - if (release.chapters) curatedRelease.chapters = release.chapters.map(chapter => curateRelease(chapter)); + if (release.clips) curatedRelease.clips = release.clips.map(clip => curateRelease(clip)); if (release.photos) curatedRelease.photos = release.photos.map(({ media }) => media); if (release.covers) curatedRelease.covers = release.covers.map(({ media }) => media); if (release.trailer) curatedRelease.trailer = release.trailer.media; diff --git a/assets/js/format.js b/assets/js/format.js new file mode 100644 index 00000000..e37e6fe8 --- /dev/null +++ b/assets/js/format.js @@ -0,0 +1,29 @@ +import dayjs from 'dayjs'; + +export function formatDuration(duration, forceHours) { + const hours = Math.floor(duration / 3600); + const minutes = Math.floor((duration % 3600) / 60); + const seconds = Math.floor(duration % 60); + + const [formattedHours, formattedMinutes, formattedSeconds] = [hours, minutes, seconds].map(segment => segment.toString().padStart(2, '0')); + + if (duration >= 3600 || forceHours) { + return `${formattedHours}:${formattedMinutes}:${formattedSeconds}`; + } + + return `${formattedMinutes}:${formattedSeconds}`; +} + +export function formatDate(date, format = 'MMMM D, YYYY', precision = 'day') { + if (precision === 'year') { + const newFormat = format.match(/Y+/); + return dayjs(date).format(newFormat ? newFormat[0] : 'YYYY'); + } + + if (precision === 'month') { + const newFormat = format.match(/(M{1,4})|(Y{2,4})/g); + return dayjs(date).format(newFormat ? newFormat.join(' ') : 'MMMM YYYY'); + } + + return dayjs(date).format(format); +} diff --git a/assets/js/fragments.js b/assets/js/fragments.js index 45f015e4..5e3c084e 100644 --- a/assets/js/fragments.js +++ b/assets/js/fragments.js @@ -165,6 +165,7 @@ const releaseTrailerFragment = ` const releaseTeaserFragment = ` teaser: releasesTeaserByReleaseId { media { + id index path thumbnail @@ -255,20 +256,21 @@ const releaseFragment = ` ${releaseTrailerFragment} ${releaseTeaserFragment} ${siteFragment} - chapters { + clips { id title description duration - tags: chaptersTags { + tags: clipsTags { tag { id name slug } } - poster: chaptersPosterByChapterId { + poster: clipsPosterByClipId { media { + id index path thumbnail @@ -297,6 +299,7 @@ const releaseFragment = ` slug covers: moviesCovers { media { + id index path thumbnail diff --git a/assets/js/main.js b/assets/js/main.js index faeae36c..6adb4cd2 100644 --- a/assets/js/main.js +++ b/assets/js/main.js @@ -5,29 +5,16 @@ import dayjs from 'dayjs'; import router from './router'; import initStore from './store'; - import initUiObservers from './ui/observers'; +import { formatDate, formatDuration } from './format'; + import '../css/style.scss'; import Container from '../components/container/container.vue'; import Icon from '../components/icon/icon.vue'; import Footer from '../components/footer/footer.vue'; -function formatDate(date, format = 'MMMM D, YYYY', precision = 'day') { - if (precision === 'year') { - const newFormat = format.match(/Y+/); - return dayjs(date).format(newFormat ? newFormat[0] : 'YYYY'); - } - - if (precision === 'month') { - const newFormat = format.match(/(M{1,4})|(Y{2,4})/g); - return dayjs(date).format(newFormat ? newFormat.join(' ') : 'MMMM YYYY'); - } - - return dayjs(date).format(format); -} - function init() { const store = initStore(router); @@ -54,6 +41,7 @@ function init() { }, methods: { formatDate, + formatDuration, isAfter: (dateA, dateB) => dayjs(dateA).isAfter(dateB), isBefore: (dateA, dateB) => dayjs(dateA).isBefore(dateB), }, diff --git a/migrations/20190325001339_releases.js b/migrations/20190325001339_releases.js index 2f3d2cb1..bbad1083 100644 --- a/migrations/20190325001339_releases.js +++ b/migrations/20190325001339_releases.js @@ -853,7 +853,7 @@ exports.up = knex => Promise.resolve() .references('id') .inTable('media'); })) - .then(() => knex.schema.createTable('chapters', (table) => { + .then(() => knex.schema.createTable('clips', (table) => { table.increments('id', 16); table.integer('release_id', 12) @@ -861,9 +861,9 @@ exports.up = knex => Promise.resolve() .inTable('releases') .notNullable(); - table.integer('chapter', 6); + table.integer('clip', 6); - table.unique(['release_id', 'chapter']); + table.unique(['release_id', 'clip']); table.text('title'); table.text('description'); @@ -882,44 +882,44 @@ exports.up = knex => Promise.resolve() table.datetime('created_at') .defaultTo(knex.fn.now()); })) - .then(() => knex.schema.createTable('chapters_posters', (table) => { - table.integer('chapter_id', 16) + .then(() => knex.schema.createTable('clips_posters', (table) => { + table.integer('clip_id', 16) .notNullable() .references('id') - .inTable('chapters'); + .inTable('clips'); table.text('media_id', 21) .notNullable() .references('id') .inTable('media'); - table.unique('chapter_id'); + table.unique('clip_id'); })) - .then(() => knex.schema.createTable('chapters_photos', (table) => { - table.integer('chapter_id', 16) + .then(() => knex.schema.createTable('clips_photos', (table) => { + table.integer('clip_id', 16) .notNullable() .references('id') - .inTable('chapters'); + .inTable('clips'); table.text('media_id', 21) .notNullable() .references('id') .inTable('media'); - table.unique(['chapter_id', 'media_id']); + table.unique(['clip_id', 'media_id']); })) - .then(() => knex.schema.createTable('chapters_tags', (table) => { + .then(() => knex.schema.createTable('clips_tags', (table) => { table.integer('tag_id', 12) .notNullable() .references('id') .inTable('tags'); - table.integer('chapter_id', 16) + table.integer('clip_id', 16) .notNullable() .references('id') - .inTable('chapters'); + .inTable('clips'); - table.unique(['tag_id', 'chapter_id']); + table.unique(['tag_id', 'clip_id']); })) // SEARCH .then(() => { // eslint-disable-line arrow-body-style @@ -1100,9 +1100,9 @@ exports.down = (knex) => { // eslint-disable-line arrow-body-style DROP TABLE IF EXISTS movies_scenes CASCADE; DROP TABLE IF EXISTS movies_trailers CASCADE; - DROP TABLE IF EXISTS chapters_tags CASCADE; - DROP TABLE IF EXISTS chapters_posters CASCADE; - DROP TABLE IF EXISTS chapters_photos CASCADE; + DROP TABLE IF EXISTS clips_tags CASCADE; + DROP TABLE IF EXISTS clips_posters CASCADE; + DROP TABLE IF EXISTS clips_photos CASCADE; DROP TABLE IF EXISTS batches CASCADE; @@ -1122,7 +1122,7 @@ exports.down = (knex) => { // eslint-disable-line arrow-body-style DROP TABLE IF EXISTS tags_posters CASCADE; DROP TABLE IF EXISTS tags_photos CASCADE; DROP TABLE IF EXISTS movies CASCADE; - DROP TABLE IF EXISTS chapters CASCADE; + DROP TABLE IF EXISTS clips CASCADE; DROP TABLE IF EXISTS releases CASCADE; DROP TABLE IF EXISTS actors CASCADE; DROP TABLE IF EXISTS directors CASCADE; diff --git a/public/img/tags/anal-toys/0.jpeg b/public/img/tags/anal-toys/0.jpeg new file mode 100644 index 00000000..6f136bed Binary files /dev/null and b/public/img/tags/anal-toys/0.jpeg differ diff --git a/public/img/tags/anal-toys/lazy/0.jpeg b/public/img/tags/anal-toys/lazy/0.jpeg new file mode 100644 index 00000000..b04f67ca Binary files /dev/null and b/public/img/tags/anal-toys/lazy/0.jpeg differ diff --git a/public/img/tags/anal-toys/thumbs/0.jpeg b/public/img/tags/anal-toys/thumbs/0.jpeg new file mode 100644 index 00000000..566a6fbd Binary files /dev/null and b/public/img/tags/anal-toys/thumbs/0.jpeg differ diff --git a/public/img/tags/double-dildo/0.jpeg b/public/img/tags/double-dildo/0.jpeg index e4b7c9c1..b89cef6a 100644 Binary files a/public/img/tags/double-dildo/0.jpeg and b/public/img/tags/double-dildo/0.jpeg differ diff --git a/public/img/tags/double-dildo/lazy/0.jpeg b/public/img/tags/double-dildo/lazy/0.jpeg index e3f23e63..cf0de44c 100644 Binary files a/public/img/tags/double-dildo/lazy/0.jpeg and b/public/img/tags/double-dildo/lazy/0.jpeg differ diff --git a/public/img/tags/double-dildo/thumbs/0.jpeg b/public/img/tags/double-dildo/thumbs/0.jpeg index c40bfff1..2a6c8d46 100644 Binary files a/public/img/tags/double-dildo/thumbs/0.jpeg and b/public/img/tags/double-dildo/thumbs/0.jpeg differ diff --git a/seeds/00_tags.js b/seeds/00_tags.js index cd532887..cfc25f98 100644 --- a/seeds/00_tags.js +++ b/seeds/00_tags.js @@ -58,7 +58,7 @@ const groups = [ const tags = [ { - name: '3d', + name: '3D', slug: '3d', description: 'Available in 3D.', }, diff --git a/seeds/02_sites.js b/seeds/02_sites.js index f44141b8..88d0974f 100644 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -2649,7 +2649,7 @@ const sites = [ { slug: 'inthecrack', name: 'InTheCrack', - url: 'https://inthecrack.com/', + url: 'https://inthecrack.com', }, // INTERRACIAL PASS { diff --git a/seeds/03_studios.js b/seeds/03_studios.js index 3f210473..19de0282 100644 --- a/seeds/03_studios.js +++ b/seeds/03_studios.js @@ -146,6 +146,12 @@ const studios = [ url: 'https://www.legalporno.com/studios/kinky-sex', parent: 'legalporno', }, + { + slug: 'sexyangelproductions', + name: 'Sexy Angel Productions', + url: 'https://www.legalporno.com/studios/sexy-angel-productions', + parent: 'legalporno', + }, { slug: 'nfstudio', name: 'N&F Studio', diff --git a/seeds/04_media.js b/seeds/04_media.js index 7244e90e..eb20e034 100644 --- a/seeds/04_media.js +++ b/seeds/04_media.js @@ -589,6 +589,7 @@ const tagPosters = [ ['airtight', 6, 'Remy Lacroix in "Ass Worship 14" for Jules Jordan'], ['anal', 0, 'Adriana Chechik in "Manuel Creampies Their Asses 3" for Jules Jordan'], ['anal-creampie', 1, 'Aleska Diamond in "Aleska Wants More" for Asshole Fever'], + ['anal-toys', 0, 'Kira Noir in 1225 for InTheCrack'], ['ass-eating', 0, 'Angelica Heart and Leanna Sweet in "ATM Bitches" for Asshole Fever'], ['asian', 0, 'Jade Kush for Erotica X'], ['atm', 2, 'Jureka Del Mar in "Stretched Out" for Her Limit'], diff --git a/src/app.js b/src/app.js index eb9aca00..e849fb89 100644 --- a/src/app.js +++ b/src/app.js @@ -38,10 +38,10 @@ async function init() { : [...(updateBaseScenes || []), ...(actorBaseScenes || [])]; const sceneMovies = deepScenes && deepScenes.map(scene => scene.movie).filter(Boolean); - const deepMovies = await fetchMovies([...(argv.movie || []), ...(sceneMovies || [])]); + const deepMovies = argv.sceneMovies && await fetchMovies([...(argv.movie || []), ...(sceneMovies || [])]); - const movieScenes = deepMovies.map(movie => movie.scenes).flat().filter(Boolean); - const deepMovieScenes = await fetchScenes(movieScenes); + const movieScenes = deepMovies.map(movie => movie.scenes.map(scene => ({ ...scene, entity: movie.entity }))).flat().filter(Boolean); + const deepMovieScenes = argv.movieScenes ? await fetchScenes(movieScenes) : movieScenes; if (argv.inspect) { console.log(util.inspect(deepScenes)); @@ -52,7 +52,7 @@ async function init() { if (deepMovies.length > 0) { const storedMovieScenes = await storeScenes(deepMovieScenes); - await storeMovies(deepMovies, storedMovieScenes); + await storeMovies(deepMovies, storedMovieScenes || []); } if (deepScenes.length > 0 || deepMovieScenes.length > 0) { diff --git a/src/argv.js b/src/argv.js index d7560590..bfa15d78 100644 --- a/src/argv.js +++ b/src/argv.js @@ -67,6 +67,7 @@ const { argv } = yargs describe: 'Fetch all scenes for an actor', type: 'boolean', default: false, + alias: 'actor-scenes', }) .option('actors-sources', { describe: 'Use these scrapers for actor data', diff --git a/src/deep.js b/src/deep.js index 1916b99c..3603ae0e 100644 --- a/src/deep.js +++ b/src/deep.js @@ -135,6 +135,10 @@ async function scrapeRelease(baseRelease, entities, type = 'scene') { } catch (error) { logger.error(`Deep scrape failed for ${baseRelease.url}: ${error.message}`); + if (argv.debug) { + console.error(error); + } + if (error.code === 'NO_ENTRY_ID') { return null; } diff --git a/src/scrapers/inthecrack.js b/src/scrapers/inthecrack.js index 3dad77a3..8bef0ef9 100644 --- a/src/scrapers/inthecrack.js +++ b/src/scrapers/inthecrack.js @@ -4,17 +4,19 @@ const moment = require('moment'); const qu = require('../utils/q'); const slugify = require('../utils/slugify'); +const { feetInchesToCm, lbsToKg } = require('../utils/convert'); function scrapeAll(scenes, channel) { return scenes.map(({ query }) => { const release = {}; release.url = query.url('a', 'href', { origin: channel.url }); - release.entryId = new URL(release.url).pathname.match(/\/Collection\/(\d+)/)[1]; + // release.entryId = new URL(release.url).pathname.match(/\/Collection\/(\d+)/)[1]; can't be matched with upcoming scenes release.shootId = query.cnt('a span:nth-of-type(1)').match(/^\d+/)?.[0]; - release.date = query.date('a span:nth-of-type(2)', 'YYYY-MM-DD'); + release.entryId = release.shootId; + release.date = query.date('a span:nth-of-type(2)', 'YYYY-MM-DD'); release.actors = (query.q('a img', 'alt') || query.cnt('a span:nth-of-type(1)'))?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g); release.poster = release.shootId @@ -25,20 +27,152 @@ function scrapeAll(scenes, channel) { }); } -function scrapeScene({ query, html }, url, channel) { +function scrapeUpcoming(scenes, channel) { + return scenes.map(({ query }) => { + const release = {}; + + const title = query.cnt('span'); + + release.entryId = title.match(/^\d+/)[0]; + release.actors = title.slice(0, title.indexOf('-')).match(/[a-zA-Z]+(\s[a-zA-Z]+)*/g); + + const date = moment.utc(title.match(/\w+ \d+\w+$/)[0], 'MMM Do'); + + if (date.isBefore()) { + // date is next year + release.date = date.add(1, 'year').toDate(); + } else { + release.date = date.toDate(); + } + + release.poster = [ + `https://inthecrack.com/assets/images/posters/collections/${release.entryId}.jpg`, + query.img('img', 'src', { origin: channel.url }), + ]; + + return release; + }); +} + +function scrapeProfileScenes(items, actorName, channel) { + return items.map(({ query }) => { + const release = {}; + + if (slugify(query.cnt()) === 'no-other-collections') { + return null; + } + + const details = query.cnts('figure p').reduce((acc, info) => { + const [key, value] = info.split(':'); + + return { + ...acc, + [slugify(key, '_')]: value?.trim(), + }; + }, {}); + + release.url = query.url('a', 'href', { origin: channel.url }); + + release.shootId = details.collection.match(/\d+/)[0]; + release.entryId = release.shootId; + + release.date = qu.parseDate(details.release_date, 'YYYY-MM-DD'); + release.actors = [actorName]; + + /* rely on clip length + const durationString = Object.keys(details).find(info => /\d+_min_video/.test(info)); + release.duration = durationString && Number(durationString.match(/^\d+/)?.[0]) * 60; + */ + + release.productionLocation = details.shoot_location; + + release.poster = [ + `https://inthecrack.com/assets/images/posters/collections/${release.entryId}.jpg`, + query.img('img', 'src', { origin: channel.url }), + ]; + + return release; + }).filter(Boolean); +} + +function scrapeProfile({ query }, actorName, actorAvatar, channel, releasesFromScene) { + const profile = {}; + + const bio = query.cnts(releasesFromScene ? 'ul li' : 'div.modelInfo li').reduce((acc, info) => { + const [key, value] = info.split(':'); + + return { + ...acc, + [slugify(key, '_')]: value.trim(), + }; + }, {}); + + profile.name = actorName || bio.name; + profile.gender = 'female'; + profile.birthPlace = bio.nationality; + + if (bio.height) profile.height = feetInchesToCm(bio.height); + if (bio.weight) profile.weight = lbsToKg(bio.weight); + + profile.releases = releasesFromScene?.[profile.name] || scrapeProfileScenes(qu.initAll(query.all('.Models li')), actorName, channel); + + // avatar is the poster of a scene, find scene and use its high quality poster instead + const avatarRelease = profile.releases.find(release => new URL(release.poster[1]).pathname === new URL(actorAvatar).pathname); + profile.avatar = avatarRelease?.poster[0]; + + return profile; +} + +async function fetchSceneActors(entryId, _release, channel) { + const url = `https://inthecrack.com/Collection/Biography/${entryId}`; + const res = await qu.get(url); + + if (res.ok) { + const actorTabs = qu.initAll(res.item.query.all('#ModelTabs li')).map(({ query }) => ({ + name: query.cnt('a'), + id: query.q('a', 'data-model'), + })); + + const actorReleasesByActorName = actorTabs.reduce((acc, { name, id }) => { + const releaseEls = qu.initAll(res.item.query.all(`#Model-${id} li`)); + const releases = scrapeProfileScenes(releaseEls, name, channel); + + return { + ...acc, + [name]: releases, + }; + }, {}); + + const actors = qu.initAll(res.item.query.all('.modelInfo > li')).map((item) => { + const avatar = item.query.img('img', 'src', { origin: channel.url }); + const profile = scrapeProfile(item, null, avatar, channel, actorReleasesByActorName); + + return profile; + }); + + return actors; + } + + return null; +} + +async function scrapeScene({ query, html }, url, channel) { const release = {}; - release.entryId = new URL(url).pathname.match(/\/Collection\/(\d+)/)[1]; - release.shootId = query.cnt('h2 span').match(/^\d+/)?.[0]; + const entryId = new URL(url).pathname.match(/\/Collection\/(\d+)/)[1]; - release.actors = query.cnt('h2 span')?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g); + release.shootId = query.cnt('h2 span').match(/^\d+/)?.[0]; + release.entryId = release.shootId; // site entry ID can't be matched with upcoming scenes + + const actors = await fetchSceneActors(entryId, release, channel); + release.actors = actors || query.cnt('h2 span')?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g); release.description = query.cnt('p#CollectionDescription'); release.productionLocation = query.cnt('.modelCollectionHeader p')?.match(/Shoot Location: (.*)/)?.[1]; release.poster = qu.prefixUrl(html.match(/background-image: url\('(.*)'\)/)?.[1], channel.url); - release.chapters = query.all('.ClipOuter').map((el) => { + release.clips = query.all('.ClipOuter').map((el) => { const chapter = {}; chapter.title = query.text(el, 'h4'); @@ -67,22 +201,6 @@ function scrapeScene({ query, html }, url, channel) { return release; } -function scrapeProfile({ query, el }, actorName, entity, include) { - const profile = {}; - - profile.description = query.cnt('.bio-text'); - profile.birthPlace = query.cnt('.birth-place span'); - - profile.avatar = query.img('.actor-photo img'); - - if (include.releases) { - return scrapeAll(qu.initAll(el, '.scene')); - } - - console.log(profile); - return profile; -} - async function fetchLatest(channel, page = 1) { const year = moment().subtract(page - 1, ' year').year(); @@ -96,6 +214,16 @@ async function fetchLatest(channel, page = 1) { return res.status; } +async function fetchUpcoming(channel) { + const res = await qu.getAll(channel.url, '#ComingSoon li'); + + if (res.ok) { + return scrapeUpcoming(res.items, channel); + } + + return res.status; +} + async function fetchScene(url, channel) { const res = await qu.get(url); @@ -106,12 +234,27 @@ async function fetchScene(url, channel) { return res.status; } -async function fetchProfile({ name: actorName }, entity, include) { - const url = `${entity.url}/actors/${slugify(actorName, '_')}`; - const res = await qu.get(url); +async function fetchProfile({ name: actorName }, channel, _include) { + const firstLetter = actorName.charAt(0).toUpperCase(); + const url = `${channel.url}/Collections/Name/${firstLetter}`; + const res = await qu.getAll(url, '.collectionGridLayout li'); if (res.ok) { - return scrapeProfile(res.item, actorName, entity, include); + const actorItem = res.items.find(({ query }) => slugify(query.cnt('span')) === slugify(actorName)); + + if (actorItem) { + const actorUrl = actorItem.query.url('a', 'href', { origin: channel.url }); + const actorAvatar = actorItem.query.img('img', 'src', { origin: channel.url }); + const actorRes = await qu.get(actorUrl); + + if (actorRes.ok) { + return scrapeProfile(actorRes.item, actorName, actorAvatar, channel); + } + + return actorRes.status; + } + + return null; } return res.status; @@ -119,6 +262,7 @@ async function fetchProfile({ name: actorName }, entity, include) { module.exports = { fetchLatest, + fetchUpcoming, fetchScene, - // fetchProfile, + fetchProfile, }; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index f67add47..a1a0ec5c 100644 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -197,6 +197,7 @@ module.exports = { iconmale, interracialpass: hush, interracialpovs: hush, + inthecrack, jamesdeen: fullpornnetwork, julesjordan, kellymadison, diff --git a/src/scrapers/vixen.js b/src/scrapers/vixen.js index cc940f11..9ec8f280 100644 --- a/src/scrapers/vixen.js +++ b/src/scrapers/vixen.js @@ -49,7 +49,10 @@ async function getTrailer(scene, site, url) { file: scene.previewVideoUrl1080P, sizes: qualities.join('+'), type: 'trailer', - }, { referer: url }); + }, { + referer: url, + origin: site.url, + }); if (!tokenRes.ok) { return null; diff --git a/src/store-releases.js b/src/store-releases.js index 1cfd0998..b977a25c 100644 --- a/src/store-releases.js +++ b/src/store-releases.js @@ -243,44 +243,44 @@ async function updateReleasesSearch(releaseIds) { } } -async function storeChapters(releases) { - const chapters = releases.map(release => release.chapters?.map((chapter, index) => ({ - title: chapter.title, - description: chapter.description, +async function storeClips(releases) { + const clips = releases.map(release => release.clips?.map((clip, index) => ({ + title: clip.title, + description: clip.description, releaseId: release.id, - chapter: index + 1, - duration: chapter.duration, - poster: chapter.poster, - photos: chapter.photos, - tags: chapter.tags, + clip: index + 1, + duration: clip.duration, + poster: clip.poster, + photos: clip.photos, + tags: clip.tags, }))).flat().filter(Boolean); - const curatedChapterEntries = chapters.map(chapter => ({ - title: chapter.title, - description: chapter.description, - duration: chapter.duration, - release_id: chapter.releaseId, - chapter: chapter.chapter, + const curatedClipEntries = clips.map(clip => ({ + title: clip.title, + description: clip.description, + duration: clip.duration, + release_id: clip.releaseId, + clip: clip.clip, })); - const storedChapters = await bulkInsert('chapters', curatedChapterEntries); - const chapterIdsByReleaseIdAndChapter = storedChapters.reduce((acc, chapter) => ({ + const storedClips = await bulkInsert('clips', curatedClipEntries, ['release_id', 'clip']); + const clipIdsByReleaseIdAndClip = storedClips.reduce((acc, clip) => ({ ...acc, - [chapter.release_id]: { - ...acc[chapter.release_id], - [chapter.chapter]: chapter.id, + [clip.release_id]: { + ...acc[clip.release_id], + [clip.clip]: clip.id, }, }), {}); - const chaptersWithId = chapters.map(chapter => ({ - ...chapter, - id: chapterIdsByReleaseIdAndChapter[chapter.releaseId][chapter.chapter], + const clipsWithId = clips.map(clip => ({ + ...clip, + id: clipIdsByReleaseIdAndClip[clip.releaseId][clip.clip], })); - await associateReleaseTags(chaptersWithId, 'chapter'); + await associateReleaseTags(clipsWithId, 'clip'); // media is more error-prone, associate separately - await associateReleaseMedia(chaptersWithId, 'chapter'); + await associateReleaseMedia(clipsWithId, 'clip'); } async function storeScenes(releases) { @@ -318,7 +318,7 @@ async function storeScenes(releases) { await scrapeActors(actors.map(actor => actor.name)); } - await storeChapters(releasesWithId); + await storeClips(releasesWithId); logger.info(`Stored ${storedReleaseEntries.length} releases`); diff --git a/src/updates.js b/src/updates.js index d4536309..3db6a6a1 100644 --- a/src/updates.js +++ b/src/updates.js @@ -37,81 +37,58 @@ async function filterUniqueReleases(latestReleases, accReleases) { return uniqueReleases; } -function needNextPage(uniqueReleases, pageAccReleases, hasDates) { - if (uniqueReleases.length === 0) { - return false; +function needNextPage(releases, uniqueReleases, totalReleases, hasDates) { + if (argv.last) { + return totalReleases + releases.length < argv.last; } - if (argv.last && pageAccReleases.length < argv.last) { - // TODO: find a way to paginate if scraper filters page with multiple channels, see Kelly Madison + if (!hasDates) { + return totalReleases + releases.length < argv.nullDateLimit; + } + + const oldestReleaseOnPage = releases + .sort((releaseA, releaseB) => releaseB.date - releaseA.date) + .slice(-1)[0]; + + if (moment(oldestReleaseOnPage.date).isAfter(argv.after)) { + // oldest release on page is newer than the specified date cut-off return true; } - if (hasDates) { - const oldestReleaseOnPage = uniqueReleases - .sort((releaseA, releaseB) => releaseB.date - releaseA.date) - .slice(-1)[0]; - - if (moment(oldestReleaseOnPage.date).isAfter(argv.after)) { - // oldest release on page is newer than the specified date cut-off - return true; - } - } - - // dates missing, and limit for scenes without dates not yet reached - return pageAccReleases.length <= argv.nullDateLimit; + return false; } -async function scrapeReleases(scraper, entity, preData, upcoming = false) { - const scrapePage = async (page = 1, accReleases = []) => { - const latestReleases = upcoming - ? await scraper.fetchUpcoming(entity, page, include, preData) - : await scraper.fetchLatest(entity, page, include, preData); - - if (!Array.isArray(latestReleases)) { - // scraper is unable to fetch the releases and returned a HTTP code or null - logger.warn(`Scraper returned ${latestReleases} when fetching latest from '${entity.name}' (${entity.parent?.name})`); - return accReleases; - } - - const latestReleasesWithEntity = latestReleases.map(release => ({ - ...release, - entity: release.entity || entity, // allow override - })); // attach entity the release is assigned to when stored - - const hasDates = latestReleasesWithEntity.every(release => !!release.date); - - console.log(hasDates); - - const uniqueReleases = argv.redownload - ? latestReleasesWithEntity - : await filterUniqueReleases(latestReleasesWithEntity, accReleases); - - const pageAccReleases = accReleases.concat(uniqueReleases); - - logger.verbose(`Scraped '${entity.name}' (${entity.parent?.name}) ${upcoming ? 'upcoming' : 'latest'} page ${page}, found ${uniqueReleases.length} unique updates`); - - if (needNextPage(uniqueReleases, pageAccReleases, hasDates)) { - return scrapePage(page + 1, pageAccReleases); - } - - return pageAccReleases; - }; - - const rawReleases = await scrapePage(argv.page || 1, []); +async function scrapeReleases(scraper, entity, preData, upcoming = false, page = 1, accReleases = [], totalReleases = 0) { const releases = upcoming - ? rawReleases.map(rawRelease => ({ ...rawRelease, upcoming: true })) - : rawReleases; + ? await scraper.fetchUpcoming(entity, page, include, preData) + : await scraper.fetchLatest(entity, page, include, preData); - if (argv.last) { - return releases.slice(0, argv.last); + if (!Array.isArray(releases)) { + // scraper is unable to fetch the releases and returned a HTTP code or null + logger.warn(`Scraper returned ${releases} when fetching latest from '${entity.name}' (${entity.parent?.name})`); + return accReleases; } - if (releases.every(release => release.date)) { - return releases.filter(release => moment(release.date).isAfter(argv.after)); + const releasesWithEntity = releases.map(release => ({ + ...release, + entity: release.entity || entity, // allow override + })); // attach entity the release is assigned to when stored + + const hasDates = releasesWithEntity.every(release => !!release.date); + + const limitedReleases = (argv.last && releasesWithEntity.slice(0, Math.max(argv.last - totalReleases, 0))) + || (hasDates && releasesWithEntity.filter(release => moment(release.date).isAfter(argv.after))) + || releasesWithEntity.slice(0, Math.max(argv.nullDateLimit - totalReleases, 0)); + + const uniqueReleases = argv.force + ? limitedReleases + : await filterUniqueReleases(limitedReleases, accReleases); + + if (needNextPage(releases, uniqueReleases, totalReleases, hasDates)) { + return scrapeReleases(scraper, entity, preData, upcoming, page + 1, accReleases.concat(uniqueReleases), totalReleases + releases.length); } - return releases.slice(0, argv.nullDateLimit); + return accReleases.concat(uniqueReleases); } async function scrapeLatestReleases(scraper, entity, preData) { @@ -120,9 +97,12 @@ async function scrapeLatestReleases(scraper, entity, preData) { } try { - return await scrapeReleases(scraper, entity, preData, false); + return await scrapeReleases(scraper, entity, preData, false, argv.page || 1); } catch (error) { - console.trace(error); + if (argv.debug) { + console.trace(error); + } + logger.warn(`Failed to scrape latest updates for '${entity.slug}' (${entity.parent?.slug}): ${error.message}`); } @@ -137,6 +117,10 @@ async function scrapeUpcomingReleases(scraper, entity, preData) { try { return await scrapeReleases(scraper, entity, preData, true); } catch (error) { + if (argv.debug) { + console.trace(error); + } + logger.warn(`Failed to scrape upcoming updates for '${entity.slug}' (${entity.parent?.slug}): ${error.message}`); } @@ -159,7 +143,7 @@ async function scrapeMovies(scraper, entity) { } async function scrapeChannelReleases(scraper, channelEntity, preData) { - const [latestReleases, upcomingReleases, movies] = await Promise.all([ + const [latestReleases, upcomingReleases] = await Promise.all([ argv.latest ? scrapeLatestReleases(scraper, channelEntity, preData) : [], @@ -171,8 +155,6 @@ async function scrapeChannelReleases(scraper, channelEntity, preData) { : [], ]); - console.log(movies); - logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${channelEntity.name}' (${channelEntity.parent?.name})`); return [...latestReleases, ...upcomingReleases];