Added Top Web Models update and scene scraper.

This commit is contained in:
DebaucheryLibrarian
2021-01-15 04:04:32 +01:00
parent 451ffdc48b
commit b8df8e6507
13 changed files with 158 additions and 77 deletions

View File

@@ -32,6 +32,19 @@ function curateEntity(entity, includeParameters = false) {
parent: curateEntity(entity.parent, includeParameters),
} : {};
if (entity.tags) {
curatedEntity.tags = entity.tags.map(tag => ({
id: tag.id,
name: tag.name,
slug: tag.slug,
priority: tag.priority,
}));
}
if (includeParameters) {
curatedEntity.parameters = entity.parameters;
}
if (entity.children) {
curatedEntity.children = entity.children.map(child => curateEntity({
...child,
@@ -46,19 +59,6 @@ function curateEntity(entity, includeParameters = false) {
}, includeParameters));
}
if (entity.tags) {
curatedEntity.tags = entity.tags.map(tag => ({
id: tag.id,
name: tag.name,
slug: tag.slug,
priority: tag.priority,
}));
}
if (includeParameters) {
curatedEntity.parameters = entity.parameters;
}
return curatedEntity;
}

View File

@@ -400,7 +400,7 @@ async function storeFile(media, options) {
}
}
if (media.meta.type === 'image') {
if (media.meta.type === 'image' && media.meta.subtype !== 'gif') {
return storeImageFile(media, hashDir, hashSubDir, filename, filedir, filepath, options);
}

View File

@@ -4,50 +4,48 @@ const qu = require('../utils/qu');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
function scrapeSceneX(scene) {
const release = {};
release.entryId = scene.id;
release.url = `https://tour.topwebmodels.com/scenes/${scene.id}/${slugify(scene.title, '-', { removePunctuation: true })}`;
release.title = scene.title;
release.description = scene.description;
release.duration = qu.durationToSeconds(scene.videos_duration);
release.date = new Date(scene.release_date);
release.actors = scene.models.map(actor => ({
name: actor.name,
gender: actor.gender || null,
avatar: actor.thumb,
url: `https://tour.topwebmodels.com/models/${actor.id}/${slugify(actor.name)}`,
}));
release.stars = scene.rating;
release.tags = scene.tags.map(tag => tag.name);
release.poster = scene.thumb;
release.channel = slugify(scene.sites[0]?.name, '');
return release;
}
function scrapeAll(scenes) {
return scenes.map((scene) => {
const release = {};
release.entryId = scene.id;
release.url = `https://tour.topwebmodels.com/scenes/${scene.id}/${slugify(scene.title)}`;
release.title = scene.title;
release.description = scene.description;
release.duration = qu.durationToSeconds(scene.videos_duration);
release.date = new Date(scene.release_date);
release.actors = scene.models.map(actor => ({
name: actor.name,
gender: actor.gender || null,
avatar: actor.thumb,
url: `https://tour.topwebmodels.com/models/${actor.id}/${slugify(actor.name)}`,
}));
release.stars = scene.rating;
release.tags = scene.tags.map(tag => tag.name);
release.poster = scene.thumb;
release.channel = slugify(scene.sites[0]?.name, '');
console.log(scene);
console.log(release);
return release;
});
return scenes.map(scrapeSceneX);
}
async function fetchLatest(channel, page) {
const session = http.session();
await http.get(channel.url);
const res = await http.get(`https://tour.topwebmodels.com/api/sites/${channel.parameters?.slug || channel.slug}?page=${page}`, {
session,
headers: {
Referer: 'https://tour.topwebmodels.com',
'api-key': channel.parameters?.apiKey || channel.parent?.parameters?.apiKey,
'x-Requested-With': 'XMLHttpRequest',
},
});
console.log(res);
if (res.ok) {
return scrapeAll(res.body.videos.items);
}
@@ -55,6 +53,21 @@ async function fetchLatest(channel, page) {
return res.status;
}
async function fetchScene(url) {
const res = await http.get(url, { extract: { runScripts: 'dangerously' } });
if (res.ok) {
return {
...scrapeSceneX(res.window.__DATA__.data.video),
...(/\.gif/.test(res.window.__DATA__.data.video.thumb) && { teaser: res.window.__DATA__.data.video.thumb }),
poster: res.window.__DATA__.data.file_poster,
};
}
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
};