Storing scene language and production date precision. Refactored Teen Core Club.
This commit is contained in:
@@ -2,83 +2,82 @@
|
||||
|
||||
const unprint = require('unprint');
|
||||
|
||||
const slugify = require('../utils/slugify');
|
||||
function pickLocale(item) {
|
||||
if (!item) {
|
||||
return null;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
if (item.en) {
|
||||
return item.en;
|
||||
}
|
||||
|
||||
release.url = query.url('.title a');
|
||||
release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)/)[1];
|
||||
|
||||
release.title = query.content('.title a');
|
||||
|
||||
release.date = query.date('.date', 'MMM DD, YYYY');
|
||||
release.duration = query.duration('.duration');
|
||||
|
||||
release.actors = query.all('.models a.model').map((actorEl) => ({
|
||||
name: unprint.query.content(actorEl),
|
||||
url: unprint.query.url(actorEl, null),
|
||||
}));
|
||||
|
||||
release.poster = query.img('img.poster');
|
||||
release.teaser = query.video('.teaser video');
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
});
|
||||
return Object.values(item)[0];
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}/${page}`;
|
||||
const res = await unprint.get(url, { selectAll: '.scene' });
|
||||
function scrapeScene(scene, channel) {
|
||||
const release = {};
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
release.entryId = scene.id;
|
||||
release.url = `${channel.url}/video/${scene.id}/${scene.slug}`;
|
||||
|
||||
release.title = pickLocale(scene.title);
|
||||
release.description = pickLocale(scene.description);
|
||||
|
||||
release.date = new Date(scene.publication_date);
|
||||
release.duration = scene.meta?.duration_seconds || unprint.extractDuration(scene.meta?.duration);
|
||||
|
||||
release.productionDate = scene.meta.year && new Date(Date.UTC(scene.meta.year, 0, 1));
|
||||
release.productionDatePrecision = 'year';
|
||||
|
||||
release.actors = scene.actors?.map((actor) => ({
|
||||
name: actor.name,
|
||||
entryId: actor.id,
|
||||
url: `${channel.url}/videos/browse/cast/${actor.id}`,
|
||||
}));
|
||||
|
||||
const poster = scene.artwork?.original;
|
||||
const photos = [scene.artwork_f16, scene.cover].map((art) => art.original).filter(Boolean);
|
||||
|
||||
if (poster) {
|
||||
release.poster = poster;
|
||||
release.photos = photos;
|
||||
} else {
|
||||
// not observed, but artwork_f16 is suitable as poster
|
||||
release.poster = photos[0];
|
||||
release.photos = photos.slice(1);
|
||||
}
|
||||
|
||||
release.caps = scene.screenshots?.map((src) => unprint.prefixUrl(src, 'https://s02.uni73d.net')) || [];
|
||||
release.teaser = unprint.prefixUrl(scene.preview?.url, 'https://s02.uni73d.net');
|
||||
|
||||
release.tags = scene.display_genres?.map((genre) => pickLocale(genre.title)).filter(Boolean) || [];
|
||||
|
||||
if (scene.is_gay) {
|
||||
release.tags = release.tags.concat('gay');
|
||||
}
|
||||
|
||||
release.language = scene.meta?.language;
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1, { parameters }) {
|
||||
const url = `https://api.fundorado.com/api/videos/browse/labels/${parameters.legacySiteId}?page=${page}&sg=false&sort=release&video_type=scene&lang=en`;
|
||||
const res = await unprint.get(url);
|
||||
|
||||
if (res.ok && res.data?.videos?.data) {
|
||||
return res.data.videos.data.map((scene) => scrapeScene(scene, channel));
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, { url }) {
|
||||
const release = {};
|
||||
async function fetchScene(url, channel) {
|
||||
const entryId = new URL(url).pathname.match(/\/video\/(\d+)/)[1];
|
||||
const res = await unprint.get(`https://api.fundorado.com/api/videodetail/${entryId}`);
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)/)[1];
|
||||
|
||||
release.title = query.content('h3.title');
|
||||
release.description = query.content('p.description');
|
||||
|
||||
release.date = query.date('.date', 'MMMM D, YYYY');
|
||||
release.duration = query.duration('.duration');
|
||||
|
||||
[release.poster, ...release.photos] = query.imgs('.preview-thumb');
|
||||
release.trailer = query.video('.trailer video');
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }) {
|
||||
const profile = {};
|
||||
|
||||
profile.description = query.content('.bio-text');
|
||||
profile.birthPlace = query.content('.birth-place span');
|
||||
|
||||
profile.avatar = query.img('.actor-photo img');
|
||||
|
||||
console.log(profile);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, entity) {
|
||||
const url = `${entity.url}/actors/${slugify(actorName, '_')}`;
|
||||
const res = await unprint.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.context, entity);
|
||||
if (res.ok && res.data?.video) {
|
||||
return scrapeScene(res.data.video, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
@@ -86,6 +85,5 @@ async function fetchProfile({ name: actorName }, entity) {
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
scrapeScene,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -70,6 +70,7 @@ async function curateReleaseEntry(release, batchId, existingRelease, type = 'sce
|
||||
if (type === 'scene') {
|
||||
curatedRelease.shoot_id = release.shootId || null;
|
||||
curatedRelease.production_date = Number(release.productionDate) ? release.productionDate : null;
|
||||
curatedRelease.production_date_precision = release.productionDatePrecision;
|
||||
curatedRelease.duration = Math.round(release.duration) || null; // float may happen if scraper converts duration from milliseconds with a simple / 1000
|
||||
curatedRelease.qualities = Array.from(new Set(release.qualities?.map(Number).filter(Boolean))).sort((qualityA, qualityB) => qualityB - qualityA);
|
||||
}
|
||||
@@ -89,6 +90,20 @@ async function curateReleaseEntry(release, batchId, existingRelease, type = 'sce
|
||||
}
|
||||
}
|
||||
|
||||
if (release.language) {
|
||||
const curatedLanguage = release.language.toLowerCase();
|
||||
|
||||
const language = await knex('languages')
|
||||
.where(knex.raw('lower(alpha2)'), curatedLanguage)
|
||||
.orWhere(knex.raw('lower(name)'), curatedLanguage)
|
||||
.orWhere(knex.raw('lower(name_native)'), curatedLanguage)
|
||||
.first();
|
||||
|
||||
if (language) {
|
||||
curatedRelease.language_alpha2 = language.alpha2;
|
||||
}
|
||||
}
|
||||
|
||||
if (!existingRelease && !release.id) {
|
||||
curatedRelease.created_batch_id = batchId;
|
||||
}
|
||||
@@ -443,12 +458,15 @@ async function storeScenes(releases, useBatchId) {
|
||||
description = COALESCE(new.description, releases.description),
|
||||
shoot_id = COALESCE(new.shoot_id, releases.shoot_id),
|
||||
duration = COALESCE(new.duration, releases.duration),
|
||||
production_date = COALESCE(new.production_date, releases.production_date),
|
||||
production_date_precision = COALESCE(new.production_date_precision, releases.production_date_precision),
|
||||
language_alpha2 = COALESCE(new.language_alpha2, releases.language_alpha2),
|
||||
comment = COALESCE(new.comment, releases.comment),
|
||||
attributes = COALESCE(new.attributes::jsonb || releases.attributes::jsonb, new.attributes::jsonb, releases.attributes::jsonb),
|
||||
deep = new.url IS NOT NULL,
|
||||
updated_at = NOW()
|
||||
FROM json_to_recordset(:scenes)
|
||||
AS new(id int, url text, date timestamptz, entity json, title text, description text, shoot_id text, duration integer, comment text, attributes json, deep boolean)
|
||||
AS new(id int, url text, date timestamptz, entity json, title text, description text, shoot_id text, duration integer, production_date timestamptz, production_date_precision text, language_alpha2 text, comment text, attributes json, deep boolean)
|
||||
WHERE releases.id = new.id
|
||||
RETURNING releases.*
|
||||
`, {
|
||||
|
||||
Reference in New Issue
Block a user