Tweaked Spizoo scraper for Goth Girlfriends.
This commit is contained in:
parent
b41317706f
commit
d3f15a6a2b
|
@ -5,31 +5,83 @@ exports.up = async (knex) => {
|
|||
table.integer('scene_id')
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('releases');
|
||||
.inTable('releases')
|
||||
.onDelete('set null');
|
||||
|
||||
table.integer('user_id')
|
||||
.references('id')
|
||||
.inTable('users');
|
||||
.inTable('users')
|
||||
.onDelete('set null');
|
||||
|
||||
table.json('base');
|
||||
table.json('deltas');
|
||||
table.json('base')
|
||||
.notNullable();
|
||||
|
||||
table.json('deltas')
|
||||
.notNullable();
|
||||
|
||||
table.text('hash')
|
||||
.notNullable();
|
||||
|
||||
table.text('comment');
|
||||
|
||||
table.datetime('applied_at');
|
||||
table.boolean('approved');
|
||||
|
||||
table.integer('approved_by')
|
||||
table.integer('reviewed_by')
|
||||
.references('id')
|
||||
.inTable('users');
|
||||
.inTable('users')
|
||||
.onDelete('set null');
|
||||
|
||||
table.text('error');
|
||||
table.datetime('reviewed_at');
|
||||
table.text('feedback');
|
||||
|
||||
table.datetime('applied_at');
|
||||
|
||||
table.datetime('created_at')
|
||||
.notNullable()
|
||||
.defaultTo(knex.fn.now());
|
||||
});
|
||||
|
||||
await knex.schema.createTable('bans', (table) => {
|
||||
table.increments('id');
|
||||
|
||||
table.integer('user_id')
|
||||
.references('id')
|
||||
.inTable('users')
|
||||
.onDelete('set null');
|
||||
|
||||
table.string('username');
|
||||
table.specificType('ip', 'cidr');
|
||||
|
||||
table.boolean('match_all')
|
||||
.notNullable()
|
||||
.defaultTo(false);
|
||||
|
||||
table.string('scope');
|
||||
table.boolean('shadow');
|
||||
|
||||
table.integer('banned_by')
|
||||
.references('id')
|
||||
.inTable('users')
|
||||
.onDelete('set null');
|
||||
|
||||
table.datetime('expires_at')
|
||||
.notNullable();
|
||||
|
||||
table.datetime('created_at')
|
||||
.notNullable()
|
||||
.defaultTo(knex.fn.now());
|
||||
});
|
||||
|
||||
await knex.schema.alterTable('users', (table) => {
|
||||
table.specificType('last_ip', 'cidr');
|
||||
});
|
||||
};
|
||||
|
||||
exports.down = async (knex) => {
|
||||
await knex.schema.dropTable('scenes_revisions');
|
||||
await knex.schema.dropTable('bans');
|
||||
|
||||
await knex.schema.alterTable('users', (table) => {
|
||||
table.dropColumn('last_ip');
|
||||
});
|
||||
};
|
||||
|
|
|
@ -10734,6 +10734,15 @@ const sites = [
|
|||
tags: ['stripper'],
|
||||
parent: 'spizoo',
|
||||
},
|
||||
{
|
||||
slug: 'gothgirlfriends',
|
||||
name: 'Goth Girlfriends',
|
||||
url: 'https://www.gothgirlfriends.com',
|
||||
parent: 'spizoo',
|
||||
parameters: {
|
||||
latest: '/categories/videos_{page}_d.html',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'intimatelesbians',
|
||||
name: 'Intimate Lesbians',
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
'use strict';
|
||||
|
||||
const unprint = require('unprint');
|
||||
const format = require('template-format');
|
||||
|
||||
const qu = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
|
@ -14,17 +17,19 @@ function scrapeAll(scenes) {
|
|||
release.url = query.url('a');
|
||||
release.entryId = getEntryId(release.url);
|
||||
|
||||
release.title = query.cnt('.title-label a, .thumb-title a, .p-7, .text h3');
|
||||
release.title = query.content('.title-label a, .thumb-title a, .p-7, .text h3');
|
||||
release.date = query.date('.date-label', 'MM/DD/YYYY');
|
||||
|
||||
release.actors = query.all(['.update_models a', '.tour_update_models a', '.pornstar-label span']).map((el) => ({
|
||||
name: query.cnt(el),
|
||||
url: query.url(el, null),
|
||||
name: unprint.query.content(el),
|
||||
url: unprint.query.url(el, null),
|
||||
}));
|
||||
|
||||
release.poster = query.img('a img');
|
||||
release.teaser = query.video('.leVideo source');
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
@ -47,7 +52,7 @@ function scrapeScene({ query }, url) {
|
|||
|
||||
release.tags = query.cnts('.categories-holder a, #sceneInfo a[href*="/categories"], #trailer-data a[href*="/categories"]');
|
||||
|
||||
const poster = query.img(['#video-holder .update_thumb', '#noMore .update_thumb', '#hpromo .update_thumb', '.trailer-thumb']) || query.poster('#trailervideo');
|
||||
const poster = query.img(['#video-holder .update_thumb', '#video-holder .thumb', '#noMore .update_thumb', '#hpromo .update_thumb', '.trailer-thumb']) || query.poster('#trailervideo');
|
||||
const posterPathname = poster && new URL(poster)?.pathname;
|
||||
|
||||
release.poster = [poster, poster?.replace(/imgw=\w+/, 'imgw=680')];
|
||||
|
@ -62,6 +67,8 @@ function scrapeScene({ query }, url) {
|
|||
release.trailer = query.video('#trailervideo source[type="video/mp4"], #FulsSizeVideo source[type="video/mp4"]'); // sic
|
||||
release.teaser = query.video('#trailer-video source[src*="/videothumbs"]');
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
|
@ -131,10 +138,14 @@ function scrapeProfile({ query, el }) {
|
|||
}
|
||||
|
||||
async function fetchLatest(channel, page) {
|
||||
const res = await qu.getAll(`${channel.url}/categories/movies_${page}_d.html`, '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail');
|
||||
// const res = await qu.getAll(`${channel.url}/categories/movies_${page}_d.html`, '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail');
|
||||
|
||||
const res = await unprint.get(`${channel.url}${format(channel.parameters?.latest || '/categories/movies_{page}_d.html', { page })}`, {
|
||||
selectAll: '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail',
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, channel);
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
|
|
24
src/sites.js
24
src/sites.js
|
@ -71,7 +71,11 @@ async function findSiteByUrl(url) {
|
|||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
'networks.name as network_name',
|
||||
'networks.slug as network_slug',
|
||||
'networks.url as network_url',
|
||||
'networks.description as network_description',
|
||||
'networks.parameters as network_parameters',
|
||||
)
|
||||
.where('sites.url', url)
|
||||
.orWhere('sites.url', origin)
|
||||
|
@ -114,7 +118,11 @@ async function fetchSitesFromArgv() {
|
|||
const rawSites = await knex('sites')
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
'networks.name as network_name',
|
||||
'networks.slug as network_slug',
|
||||
'networks.url as network_url',
|
||||
'networks.description as network_description',
|
||||
'networks.parameters as network_parameters',
|
||||
)
|
||||
.whereIn('sites.slug', argv.sites || [])
|
||||
.orWhereIn('networks.slug', argv.networks || [])
|
||||
|
@ -133,7 +141,11 @@ async function fetchSitesFromConfig() {
|
|||
const rawSites = await knex('sites')
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
'networks.name as network_name',
|
||||
'networks.slug as network_slug',
|
||||
'networks.url as network_url',
|
||||
'networks.description as network_description',
|
||||
'networks.parameters as network_parameters',
|
||||
)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.where((builder) => {
|
||||
|
@ -168,7 +180,11 @@ async function fetchSites(queryObject) {
|
|||
.where((builder) => whereOr(queryObject, 'sites', builder))
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
'networks.name as network_name',
|
||||
'networks.slug as network_slug',
|
||||
'networks.url as network_url',
|
||||
'networks.description as network_description',
|
||||
'networks.parameters as network_parameters',
|
||||
)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.limit(100);
|
||||
|
|
Loading…
Reference in New Issue