From d3f15a6a2b55c4d0ec018aefba0e7602472f41b0 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Fri, 13 Sep 2024 01:22:46 +0200 Subject: [PATCH] Tweaked Spizoo scraper for Goth Girlfriends. --- migrations/20240904234305_content_versions.js | 68 ++++++++++++++++--- seeds/02_sites.js | 9 +++ src/scrapers/spizoo.js | 23 +++++-- src/sites.js | 24 +++++-- 4 files changed, 106 insertions(+), 18 deletions(-) diff --git a/migrations/20240904234305_content_versions.js b/migrations/20240904234305_content_versions.js index adf18916..b79fa283 100644 --- a/migrations/20240904234305_content_versions.js +++ b/migrations/20240904234305_content_versions.js @@ -5,31 +5,83 @@ exports.up = async (knex) => { table.integer('scene_id') .notNullable() .references('id') - .inTable('releases'); + .inTable('releases') + .onDelete('set null'); table.integer('user_id') .references('id') - .inTable('users'); + .inTable('users') + .onDelete('set null'); - table.json('base'); - table.json('deltas'); + table.json('base') + .notNullable(); + + table.json('deltas') + .notNullable(); + + table.text('hash') + .notNullable(); table.text('comment'); - table.datetime('applied_at'); + table.boolean('approved'); - table.integer('approved_by') + table.integer('reviewed_by') .references('id') - .inTable('users'); + .inTable('users') + .onDelete('set null'); - table.text('error'); + table.datetime('reviewed_at'); + table.text('feedback'); + + table.datetime('applied_at'); table.datetime('created_at') .notNullable() .defaultTo(knex.fn.now()); }); + + await knex.schema.createTable('bans', (table) => { + table.increments('id'); + + table.integer('user_id') + .references('id') + .inTable('users') + .onDelete('set null'); + + table.string('username'); + table.specificType('ip', 'cidr'); + + table.boolean('match_all') + .notNullable() + .defaultTo(false); + + table.string('scope'); + table.boolean('shadow'); + + table.integer('banned_by') + .references('id') + .inTable('users') + .onDelete('set null'); + + table.datetime('expires_at') + .notNullable(); + + table.datetime('created_at') + .notNullable() + .defaultTo(knex.fn.now()); + }); + + await knex.schema.alterTable('users', (table) => { + table.specificType('last_ip', 'cidr'); + }); }; exports.down = async (knex) => { await knex.schema.dropTable('scenes_revisions'); + await knex.schema.dropTable('bans'); + + await knex.schema.alterTable('users', (table) => { + table.dropColumn('last_ip'); + }); }; diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 3e1a607b..c8663f86 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -10734,6 +10734,15 @@ const sites = [ tags: ['stripper'], parent: 'spizoo', }, + { + slug: 'gothgirlfriends', + name: 'Goth Girlfriends', + url: 'https://www.gothgirlfriends.com', + parent: 'spizoo', + parameters: { + latest: '/categories/videos_{page}_d.html', + }, + }, { slug: 'intimatelesbians', name: 'Intimate Lesbians', diff --git a/src/scrapers/spizoo.js b/src/scrapers/spizoo.js index 59c43272..aab0988c 100755 --- a/src/scrapers/spizoo.js +++ b/src/scrapers/spizoo.js @@ -1,5 +1,8 @@ 'use strict'; +const unprint = require('unprint'); +const format = require('template-format'); + const qu = require('../utils/qu'); const slugify = require('../utils/slugify'); @@ -14,17 +17,19 @@ function scrapeAll(scenes) { release.url = query.url('a'); release.entryId = getEntryId(release.url); - release.title = query.cnt('.title-label a, .thumb-title a, .p-7, .text h3'); + release.title = query.content('.title-label a, .thumb-title a, .p-7, .text h3'); release.date = query.date('.date-label', 'MM/DD/YYYY'); release.actors = query.all(['.update_models a', '.tour_update_models a', '.pornstar-label span']).map((el) => ({ - name: query.cnt(el), - url: query.url(el, null), + name: unprint.query.content(el), + url: unprint.query.url(el, null), })); release.poster = query.img('a img'); release.teaser = query.video('.leVideo source'); + console.log(release); + return release; }); } @@ -47,7 +52,7 @@ function scrapeScene({ query }, url) { release.tags = query.cnts('.categories-holder a, #sceneInfo a[href*="/categories"], #trailer-data a[href*="/categories"]'); - const poster = query.img(['#video-holder .update_thumb', '#noMore .update_thumb', '#hpromo .update_thumb', '.trailer-thumb']) || query.poster('#trailervideo'); + const poster = query.img(['#video-holder .update_thumb', '#video-holder .thumb', '#noMore .update_thumb', '#hpromo .update_thumb', '.trailer-thumb']) || query.poster('#trailervideo'); const posterPathname = poster && new URL(poster)?.pathname; release.poster = [poster, poster?.replace(/imgw=\w+/, 'imgw=680')]; @@ -62,6 +67,8 @@ function scrapeScene({ query }, url) { release.trailer = query.video('#trailervideo source[type="video/mp4"], #FulsSizeVideo source[type="video/mp4"]'); // sic release.teaser = query.video('#trailer-video source[src*="/videothumbs"]'); + console.log(release); + return release; } @@ -131,10 +138,14 @@ function scrapeProfile({ query, el }) { } async function fetchLatest(channel, page) { - const res = await qu.getAll(`${channel.url}/categories/movies_${page}_d.html`, '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail'); + // const res = await qu.getAll(`${channel.url}/categories/movies_${page}_d.html`, '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail'); + + const res = await unprint.get(`${channel.url}${format(channel.parameters?.latest || '/categories/movies_{page}_d.html', { page })}`, { + selectAll: '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail', + }); if (res.ok) { - return scrapeAll(res.items, channel); + return scrapeAll(res.context, channel); } return res.status; diff --git a/src/sites.js b/src/sites.js index 79cc2d4c..cc84ed77 100755 --- a/src/sites.js +++ b/src/sites.js @@ -71,7 +71,11 @@ async function findSiteByUrl(url) { .leftJoin('networks', 'sites.network_id', 'networks.id') .select( 'sites.*', - 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters', + 'networks.name as network_name', + 'networks.slug as network_slug', + 'networks.url as network_url', + 'networks.description as network_description', + 'networks.parameters as network_parameters', ) .where('sites.url', url) .orWhere('sites.url', origin) @@ -114,7 +118,11 @@ async function fetchSitesFromArgv() { const rawSites = await knex('sites') .select( 'sites.*', - 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters', + 'networks.name as network_name', + 'networks.slug as network_slug', + 'networks.url as network_url', + 'networks.description as network_description', + 'networks.parameters as network_parameters', ) .whereIn('sites.slug', argv.sites || []) .orWhereIn('networks.slug', argv.networks || []) @@ -133,7 +141,11 @@ async function fetchSitesFromConfig() { const rawSites = await knex('sites') .select( 'sites.*', - 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters', + 'networks.name as network_name', + 'networks.slug as network_slug', + 'networks.url as network_url', + 'networks.description as network_description', + 'networks.parameters as network_parameters', ) .leftJoin('networks', 'sites.network_id', 'networks.id') .where((builder) => { @@ -168,7 +180,11 @@ async function fetchSites(queryObject) { .where((builder) => whereOr(queryObject, 'sites', builder)) .select( 'sites.*', - 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters', + 'networks.name as network_name', +'networks.slug as network_slug', +'networks.url as network_url', +'networks.description as network_description', +'networks.parameters as network_parameters', ) .leftJoin('networks', 'sites.network_id', 'networks.id') .limit(100);