Tweaked Spizoo scraper for Goth Girlfriends.
This commit is contained in:
@@ -1,5 +1,8 @@
|
||||
'use strict';
|
||||
|
||||
const unprint = require('unprint');
|
||||
const format = require('template-format');
|
||||
|
||||
const qu = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
@@ -14,17 +17,19 @@ function scrapeAll(scenes) {
|
||||
release.url = query.url('a');
|
||||
release.entryId = getEntryId(release.url);
|
||||
|
||||
release.title = query.cnt('.title-label a, .thumb-title a, .p-7, .text h3');
|
||||
release.title = query.content('.title-label a, .thumb-title a, .p-7, .text h3');
|
||||
release.date = query.date('.date-label', 'MM/DD/YYYY');
|
||||
|
||||
release.actors = query.all(['.update_models a', '.tour_update_models a', '.pornstar-label span']).map((el) => ({
|
||||
name: query.cnt(el),
|
||||
url: query.url(el, null),
|
||||
name: unprint.query.content(el),
|
||||
url: unprint.query.url(el, null),
|
||||
}));
|
||||
|
||||
release.poster = query.img('a img');
|
||||
release.teaser = query.video('.leVideo source');
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
@@ -47,7 +52,7 @@ function scrapeScene({ query }, url) {
|
||||
|
||||
release.tags = query.cnts('.categories-holder a, #sceneInfo a[href*="/categories"], #trailer-data a[href*="/categories"]');
|
||||
|
||||
const poster = query.img(['#video-holder .update_thumb', '#noMore .update_thumb', '#hpromo .update_thumb', '.trailer-thumb']) || query.poster('#trailervideo');
|
||||
const poster = query.img(['#video-holder .update_thumb', '#video-holder .thumb', '#noMore .update_thumb', '#hpromo .update_thumb', '.trailer-thumb']) || query.poster('#trailervideo');
|
||||
const posterPathname = poster && new URL(poster)?.pathname;
|
||||
|
||||
release.poster = [poster, poster?.replace(/imgw=\w+/, 'imgw=680')];
|
||||
@@ -62,6 +67,8 @@ function scrapeScene({ query }, url) {
|
||||
release.trailer = query.video('#trailervideo source[type="video/mp4"], #FulsSizeVideo source[type="video/mp4"]'); // sic
|
||||
release.teaser = query.video('#trailer-video source[src*="/videothumbs"]');
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
@@ -131,10 +138,14 @@ function scrapeProfile({ query, el }) {
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page) {
|
||||
const res = await qu.getAll(`${channel.url}/categories/movies_${page}_d.html`, '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail');
|
||||
// const res = await qu.getAll(`${channel.url}/categories/movies_${page}_d.html`, '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail');
|
||||
|
||||
const res = await unprint.get(`${channel.url}${format(channel.parameters?.latest || '/categories/movies_{page}_d.html', { page })}`, {
|
||||
selectAll: '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail',
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, channel);
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
|
||||
24
src/sites.js
24
src/sites.js
@@ -71,7 +71,11 @@ async function findSiteByUrl(url) {
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
'networks.name as network_name',
|
||||
'networks.slug as network_slug',
|
||||
'networks.url as network_url',
|
||||
'networks.description as network_description',
|
||||
'networks.parameters as network_parameters',
|
||||
)
|
||||
.where('sites.url', url)
|
||||
.orWhere('sites.url', origin)
|
||||
@@ -114,7 +118,11 @@ async function fetchSitesFromArgv() {
|
||||
const rawSites = await knex('sites')
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
'networks.name as network_name',
|
||||
'networks.slug as network_slug',
|
||||
'networks.url as network_url',
|
||||
'networks.description as network_description',
|
||||
'networks.parameters as network_parameters',
|
||||
)
|
||||
.whereIn('sites.slug', argv.sites || [])
|
||||
.orWhereIn('networks.slug', argv.networks || [])
|
||||
@@ -133,7 +141,11 @@ async function fetchSitesFromConfig() {
|
||||
const rawSites = await knex('sites')
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
'networks.name as network_name',
|
||||
'networks.slug as network_slug',
|
||||
'networks.url as network_url',
|
||||
'networks.description as network_description',
|
||||
'networks.parameters as network_parameters',
|
||||
)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.where((builder) => {
|
||||
@@ -168,7 +180,11 @@ async function fetchSites(queryObject) {
|
||||
.where((builder) => whereOr(queryObject, 'sites', builder))
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
'networks.name as network_name',
|
||||
'networks.slug as network_slug',
|
||||
'networks.url as network_url',
|
||||
'networks.description as network_description',
|
||||
'networks.parameters as network_parameters',
|
||||
)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.limit(100);
|
||||
|
||||
Reference in New Issue
Block a user