forked from DebaucheryLibrarian/traxxx
Added movie support to MindGeek scraper.
This commit is contained in:
parent
50b7f521b5
commit
c6e977f842
|
@ -108,6 +108,7 @@
|
||||||
:fetch-releases="fetchEntity"
|
:fetch-releases="fetchEntity"
|
||||||
:items-total="totalCount"
|
:items-total="totalCount"
|
||||||
:items-per-page="limit"
|
:items-per-page="limit"
|
||||||
|
:available-tags="entity.tags"
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<div class="releases">
|
<div class="releases">
|
||||||
|
|
|
@ -3,12 +3,6 @@
|
||||||
<div class="content-inner">
|
<div class="content-inner">
|
||||||
<SearchBar :placeholder="`Search ${totalCount} movies`" />
|
<SearchBar :placeholder="`Search ${totalCount} movies`" />
|
||||||
|
|
||||||
<TagFilter
|
|
||||||
class="filters-filter"
|
|
||||||
:filter="filter"
|
|
||||||
:available-tags="availableTags"
|
|
||||||
/>
|
|
||||||
|
|
||||||
<div
|
<div
|
||||||
ref="tiles"
|
ref="tiles"
|
||||||
class="tiles"
|
class="tiles"
|
||||||
|
@ -36,7 +30,6 @@
|
||||||
import MovieTile from './movie-tile.vue';
|
import MovieTile from './movie-tile.vue';
|
||||||
import SearchBar from '../search/bar.vue';
|
import SearchBar from '../search/bar.vue';
|
||||||
import Pagination from '../pagination/pagination.vue';
|
import Pagination from '../pagination/pagination.vue';
|
||||||
import TagFilter from '../filters/tag-filter.vue';
|
|
||||||
|
|
||||||
async function fetchMovies() {
|
async function fetchMovies() {
|
||||||
if (this.$route.query.query) {
|
if (this.$route.query.query) {
|
||||||
|
@ -80,7 +73,6 @@ export default {
|
||||||
MovieTile,
|
MovieTile,
|
||||||
SearchBar,
|
SearchBar,
|
||||||
Pagination,
|
Pagination,
|
||||||
TagFilter,
|
|
||||||
},
|
},
|
||||||
data() {
|
data() {
|
||||||
return {
|
return {
|
||||||
|
|
|
@ -105,6 +105,7 @@ function curateEntity(entity, parent, releases) {
|
||||||
};
|
};
|
||||||
|
|
||||||
if (entity.tags) curatedEntity.tags = entity.tags.map(({ tag }) => tag);
|
if (entity.tags) curatedEntity.tags = entity.tags.map(({ tag }) => tag);
|
||||||
|
if (entity.sceneTags) curatedEntity.sceneTags = entity.sceneTags;
|
||||||
|
|
||||||
if (entity.children) {
|
if (entity.children) {
|
||||||
if (entity.children.nodes) {
|
if (entity.children.nodes) {
|
||||||
|
|
|
@ -41,6 +41,11 @@ function initEntitiesActions(store, router) {
|
||||||
slug
|
slug
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
sceneTags {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
slug
|
||||||
|
}
|
||||||
children: childEntitiesConnection(
|
children: childEntitiesConnection(
|
||||||
orderBy: [PRIORITY_DESC, NAME_ASC],
|
orderBy: [PRIORITY_DESC, NAME_ASC],
|
||||||
filter: {
|
filter: {
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
exports.up = async (knex) => knex.raw(`
|
|
||||||
CREATE VIEW movies_tagged AS
|
|
||||||
SELECT * FROM movies;
|
|
||||||
`);
|
|
||||||
|
|
||||||
exports.down = async (knex) => knex.raw(`
|
|
||||||
DROP VIEW IF EXISTS movies_tagged;
|
|
||||||
`);
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
exports.up = async (knex) => knex.raw(`
|
||||||
|
CREATE FUNCTION entities_scene_tags(entity entities, selectable_tags text[]) RETURNS SETOF tags AS $$
|
||||||
|
SELECT tags.*
|
||||||
|
FROM releases
|
||||||
|
LEFT JOIN
|
||||||
|
releases_tags ON releases_tags.release_id = releases.id
|
||||||
|
LEFT JOIN
|
||||||
|
tags ON tags.id = releases_tags.tag_id
|
||||||
|
WHERE
|
||||||
|
releases.entity_id = entity.id
|
||||||
|
AND
|
||||||
|
CASE WHEN array_length(selectable_tags, 1) IS NOT NULL
|
||||||
|
THEN tags.slug = ANY(selectable_tags)
|
||||||
|
ELSE true
|
||||||
|
END
|
||||||
|
GROUP BY tags.id
|
||||||
|
ORDER BY tags.name;
|
||||||
|
$$ LANGUAGE SQL STABLE;
|
||||||
|
`);
|
||||||
|
|
||||||
|
exports.down = async (knex) => knex.raw(`
|
||||||
|
DROP FUNCTION IF EXISTS entities_tags;
|
||||||
|
`);
|
|
@ -20,6 +20,7 @@ const scrapers = require('./scrapers/scrapers').actors;
|
||||||
const argv = require('./argv');
|
const argv = require('./argv');
|
||||||
const include = require('./utils/argv-include')(argv);
|
const include = require('./utils/argv-include')(argv);
|
||||||
const bulkInsert = require('./utils/bulk-insert');
|
const bulkInsert = require('./utils/bulk-insert');
|
||||||
|
const chunk = require('./utils/chunk');
|
||||||
const logger = require('./logger')(__filename);
|
const logger = require('./logger')(__filename);
|
||||||
|
|
||||||
const { toBaseReleases } = require('./deep');
|
const { toBaseReleases } = require('./deep');
|
||||||
|
@ -1048,7 +1049,8 @@ async function flushProfiles(actorIdsOrNames) {
|
||||||
logger.info(`Removed ${deleteCount} profiles`);
|
logger.info(`Removed ${deleteCount} profiles`);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function deleteActors(actorIdsOrNames) {
|
async function deleteActors(allActorIdsOrNames) {
|
||||||
|
const deleteCounts = await Promise.map(chunk(allActorIdsOrNames), async (actorIdsOrNames) => {
|
||||||
const actors = await knex('actors')
|
const actors = await knex('actors')
|
||||||
.whereIn('id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number'))
|
.whereIn('id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number'))
|
||||||
.orWhere((builder) => {
|
.orWhere((builder) => {
|
||||||
|
@ -1072,9 +1074,17 @@ async function deleteActors(actorIdsOrNames) {
|
||||||
.delete(),
|
.delete(),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
return { deletedScenesCount, deletedActorsCount };
|
||||||
|
}, { concurrency: 10 });
|
||||||
|
|
||||||
|
const deletedActorsCount = deleteCounts.reduce((acc, count) => acc + count.deletedActorsCount, 0);
|
||||||
|
const deletedScenesCount = deleteCounts.reduce((acc, count) => acc + count.deletedScenesCount, 0);
|
||||||
|
|
||||||
await flushOrphanedMedia();
|
await flushOrphanedMedia();
|
||||||
|
|
||||||
logger.info(`Removed ${deletedActorsCount} actors with ${deletedScenesCount} scenes`);
|
logger.info(`Removed ${deletedActorsCount} actors with ${deletedScenesCount} scenes`);
|
||||||
|
|
||||||
|
return deletedActorsCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function flushActors() {
|
async function flushActors() {
|
||||||
|
|
|
@ -961,9 +961,12 @@ async function flushOrphanedMedia() {
|
||||||
await deleteS3Objects(orphanedMedia.filter((media) => media.is_s3));
|
await deleteS3Objects(orphanedMedia.filter((media) => media.is_s3));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
await fsPromises.rm(path.join(config.media.path, 'temp'), { recursive: true });
|
await fsPromises.rm(path.join(config.media.path, 'temp'), { recursive: true });
|
||||||
|
|
||||||
logger.info('Cleared temporary media directory');
|
logger.info('Cleared temporary media directory');
|
||||||
|
} catch (error) {
|
||||||
|
logger.warn(`Failed to clear temporary media directory: ${error.message}`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|
|
@ -11,6 +11,12 @@ const slugify = require('../utils/slugify');
|
||||||
const http = require('../utils/http');
|
const http = require('../utils/http');
|
||||||
const { inchesToCm, lbsToKg } = require('../utils/convert');
|
const { inchesToCm, lbsToKg } = require('../utils/convert');
|
||||||
|
|
||||||
|
function getBasePath(channel, path = '/scene') {
|
||||||
|
return channel.parameters?.scene
|
||||||
|
|| ((channel.parameters?.native || channel.type === 'network') && `${channel.url}${path}`)
|
||||||
|
|| `${channel.parent.url}${path}`;
|
||||||
|
}
|
||||||
|
|
||||||
function getThumbs(scene) {
|
function getThumbs(scene) {
|
||||||
if (scene.images.poster) {
|
if (scene.images.poster) {
|
||||||
return Object.values(scene.images.poster) // can be { 0: {}, 1: {}, ... } instead of array
|
return Object.values(scene.images.poster) // can be { 0: {}, 1: {}, ... } instead of array
|
||||||
|
@ -18,7 +24,7 @@ function getThumbs(scene) {
|
||||||
.map((image) => image.xl.url);
|
.map((image) => image.xl.url);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scene.images.card_main_rect) {
|
if (Array.isArray(scene.images.card_main_rect)) {
|
||||||
return scene.images.card_main_rect
|
return scene.images.card_main_rect
|
||||||
.concat(scene.images.card_secondary_rect || [])
|
.concat(scene.images.card_secondary_rect || [])
|
||||||
.map((image) => image.xl.url.replace('.thumb', ''));
|
.map((image) => image.xl.url.replace('.thumb', ''));
|
||||||
|
@ -27,6 +33,20 @@ function getThumbs(scene) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getCovers(images) {
|
||||||
|
return [
|
||||||
|
[
|
||||||
|
images.cover[0].md?.url,
|
||||||
|
images.cover[0].sm?.url,
|
||||||
|
images.cover[0].xs?.url,
|
||||||
|
// bigger but usually upscaled
|
||||||
|
images.cover[0].xx?.url,
|
||||||
|
images.cover[0].xl?.url,
|
||||||
|
images.cover[0].lg?.url,
|
||||||
|
],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
function getVideos(data) {
|
function getVideos(data) {
|
||||||
const teaserSources = data.videos.mediabook?.files;
|
const teaserSources = data.videos.mediabook?.files;
|
||||||
const trailerSources = data.children.find((child) => child.type === 'trailer')?.videos.full?.files;
|
const trailerSources = data.children.find((child) => child.type === 'trailer')?.videos.full?.files;
|
||||||
|
@ -51,9 +71,7 @@ function scrapeLatestX(data, site, filterChannel) {
|
||||||
description: data.description,
|
description: data.description,
|
||||||
};
|
};
|
||||||
|
|
||||||
const basepath = site.parameters?.scene
|
const basepath = getBasePath(site);
|
||||||
|| (site.parameters?.native && `${site.url}/scene`)
|
|
||||||
|| `${site.parent.url}/scene`;
|
|
||||||
|
|
||||||
release.url = `${basepath}/${release.entryId}/${slugify(release.title)}`;
|
release.url = `${basepath}/${release.entryId}/${slugify(release.title)}`;
|
||||||
release.date = new Date(data.dateReleased);
|
release.date = new Date(data.dateReleased);
|
||||||
|
@ -96,7 +114,7 @@ async function scrapeLatest(items, site, filterChannel) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene(data, url, _site, networkName) {
|
function scrapeRelease(data, url, channel, networkName) {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
const { id: entryId, title, description } = data;
|
const { id: entryId, title, description } = data;
|
||||||
|
@ -129,6 +147,29 @@ function scrapeScene(data, url, _site, networkName) {
|
||||||
|
|
||||||
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
|
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
|
||||||
|
|
||||||
|
if (data.parent?.type === 'movie') {
|
||||||
|
release.movie = {
|
||||||
|
entryId: data.parent.id,
|
||||||
|
url: `${getBasePath(channel, '/movie')}/${data.parent.id}/${slugify(data.parent.title, '-', { removePunctuation: true })}`,
|
||||||
|
title: data.parent.title,
|
||||||
|
description: data.parent.description,
|
||||||
|
date: new Date(data.parent.dateReleased),
|
||||||
|
channel: slugify(data.parent.collections?.name || data.parent.brand),
|
||||||
|
covers: getCovers(data.parent.images),
|
||||||
|
shallow: true,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.type === 'movie') {
|
||||||
|
release.covers = getCovers(data.images);
|
||||||
|
release.scenes = data.children?.map((scene) => ({
|
||||||
|
entryId: scene.id,
|
||||||
|
url: `${getBasePath(channel)}/${scene.id}/${slugify(scene.title)}`,
|
||||||
|
title: scene.title,
|
||||||
|
shallow: true,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -230,7 +271,7 @@ function scrapeProfile(data, html, releases = [], networkName) {
|
||||||
profile.naturalBoobs = false;
|
profile.naturalBoobs = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
profile.releases = releases.map((release) => scrapeScene(release, null, null, networkName));
|
profile.releases = releases.map((release) => scrapeRelease(release, null, null, networkName));
|
||||||
|
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
@ -292,8 +333,8 @@ async function fetchUpcoming(site, page, options) {
|
||||||
return res.statusCode;
|
return res.statusCode;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site, baseScene, options) {
|
async function fetchRelease(url, site, baseScene, options) {
|
||||||
if (baseScene?.entryId) {
|
if (baseScene?.entryId && !baseScene.shallow) {
|
||||||
// overview and deep data is the same, don't hit server unnecessarily
|
// overview and deep data is the same, don't hit server unnecessarily
|
||||||
return baseScene;
|
return baseScene;
|
||||||
}
|
}
|
||||||
|
@ -312,7 +353,7 @@ async function fetchScene(url, site, baseScene, options) {
|
||||||
|
|
||||||
if (res.status === 200 && res.body.result) {
|
if (res.status === 200 && res.body.result) {
|
||||||
return {
|
return {
|
||||||
scene: scrapeScene(res.body.result, url, site),
|
scene: scrapeRelease(res.body.result, url, site),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -374,6 +415,7 @@ module.exports = {
|
||||||
scrapeLatestX,
|
scrapeLatestX,
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
fetchUpcoming,
|
fetchUpcoming,
|
||||||
fetchScene,
|
fetchScene: fetchRelease,
|
||||||
|
fetchMovie: fetchRelease,
|
||||||
fetchProfile,
|
fetchProfile,
|
||||||
};
|
};
|
||||||
|
|
|
@ -142,6 +142,7 @@ async function getTrailer(scene, channel, url) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
async function getPhotosLegacy(url) {
|
async function getPhotosLegacy(url) {
|
||||||
const htmlRes = await http.get(url, {
|
const htmlRes = await http.get(url, {
|
||||||
extract: {
|
extract: {
|
||||||
|
@ -169,6 +170,7 @@ async function getPhotosLegacy(url) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
async function getPhotos(url) {
|
async function getPhotos(url) {
|
||||||
const htmlRes = await http.get(url, {
|
const htmlRes = await http.get(url, {
|
||||||
|
|
|
@ -392,7 +392,8 @@ async function associateMovieScenes(movies, movieScenes) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const sceneMovie = moviesByEntityIdAndEntryId[scene.entity.id]?.[scene.movie.entryId];
|
const sceneMovie = moviesByEntityIdAndEntryId[scene.entity.id]?.[scene.movie.entryId]
|
||||||
|
|| moviesByEntityIdAndEntryId[scene.entity.parent?.id]?.[scene.movie.entryId];
|
||||||
|
|
||||||
if (sceneMovie?.id) {
|
if (sceneMovie?.id) {
|
||||||
return {
|
return {
|
||||||
|
|
Loading…
Reference in New Issue