Compare commits

...

4 Commits

Author SHA1 Message Date
DebaucheryLibrarian 4af7597441 1.138.9 2020-10-29 16:06:27 +01:00
DebaucheryLibrarian c37d4ad01f Filtering invalid actors from releases before storing. 2020-10-29 16:06:20 +01:00
DebaucheryLibrarian 2801732f57 1.138.8 2020-10-29 15:21:08 +01:00
DebaucheryLibrarian b188bc5744 Filtering out empty or unidentified scenes from update scraper, with warning. Improved Jesse Loads Monster Facials reliability. 2020-10-29 15:20:59 +01:00
13 changed files with 70 additions and 39 deletions

2
package-lock.json generated
View File

@ -1,6 +1,6 @@
{
"name": "traxxx",
"version": "1.138.7",
"version": "1.138.9",
"lockfileVersion": 1,
"requires": true,
"dependencies": {

View File

@ -1,6 +1,6 @@
{
"name": "traxxx",
"version": "1.138.7",
"version": "1.138.9",
"description": "All the latest porn releases in one place",
"main": "src/app.js",
"scripts": {

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 20 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 5.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -3415,21 +3415,21 @@ const sites = [
{
slug: 'paintoy',
name: 'Paintoy',
url: 'https://www.paintoy.com',
url: 'http://www.paintoy.com',
tags: ['bdsm'],
parent: 'insex',
},
{
slug: 'aganmedon',
name: 'Agan Medon',
url: 'https://www.aganmedon.com',
url: 'http://www.aganmedon.com',
tags: ['bdsm', 'animated'],
parent: 'insex',
},
{
slug: 'sensualpain',
name: 'Sensual Pain',
url: 'https://www.sensualpain.com',
url: 'http://www.sensualpain.com',
tags: ['bdsm'],
parent: 'insex',
},

View File

@ -114,29 +114,37 @@ function getAverage(items) {
}
function toBaseActors(actorsOrNames, release) {
return actorsOrNames.map((actorOrName) => {
const [baseName, entryId] = (actorOrName.name || actorOrName).split(':');
if (!actorsOrNames) {
return [];
}
const name = capitalize(baseName);
const slug = slugify(name);
const baseActors = actorsOrNames
.filter(actorOrName => actorOrName && (typeof actorOrName === 'string' || actorOrName.name))
.map((actorOrName) => {
const [baseName, entryId] = (actorOrName.name || actorOrName).split(':');
const baseActor = {
name,
slug,
entryId: entryId || actorOrName.entryId || null,
entity: release?.entity?.parent || release?.entity || null,
hasProfile: !!actorOrName.name, // actor contains profile information
};
const name = capitalize(baseName);
const slug = slugify(name);
if (actorOrName.name) {
return {
...actorOrName,
...baseActor,
const baseActor = {
name,
slug,
entryId: entryId || actorOrName.entryId || null,
entity: release?.entity?.parent || release?.entity || null,
hasProfile: !!actorOrName.name, // actor contains profile information
};
}
return baseActor;
});
if (actorOrName.name) {
return {
...actorOrName,
...baseActor,
};
}
return baseActor;
});
return baseActors;
}
function curateActor(actor, withDetails = false, isProfile = false) {
@ -832,4 +840,5 @@ module.exports = {
fetchActor,
scrapeActors,
searchActors,
toBaseActors,
};

View File

@ -30,20 +30,20 @@ const { argv } = yargs
type: 'boolean',
alias: 'web',
})
.option('networks', {
.option('include-networks', {
describe: 'Network to scrape all channels from (overrides configuration)',
type: 'array',
alias: 'network',
alias: ['include-network', 'networks', 'network'],
})
.option('exclude-networks', {
describe: 'Network not to scrape any channels from (overrides configuration)',
type: 'array',
alias: 'exclude-network',
})
.option('channels', {
.option('include-channels', {
describe: 'Channel to scrape (overrides configuration)',
type: 'array',
alias: 'channel',
alias: ['include-channel', 'channels', 'channel'],
})
.option('exclude-channels', {
describe: 'Channel not to scrape (overrides configuration)',

View File

@ -64,8 +64,8 @@ async function fetchIncludedEntities() {
includeAll: !argv.networks && !argv.channels && !config.include?.networks && !config.include?.channels,
includedNetworks: argv.networks || (!argv.channels && config.include?.networks) || [],
includedChannels: argv.channels || (!argv.networks && config.include?.channels) || [],
excludedNetworks: argv.excludeNetworks || config.exclude?.networks || [],
excludedChannels: argv.excludeChannels || config.exclude?.channels || [],
excludedNetworks: argv.excludeNetworks || config.exclude?.networks.filter(network => !argv.networks?.includes(network)) || [], // ignore explicitly included networks
excludedChannels: argv.excludeChannels || config.exclude?.channels.filter(channel => !argv.channels?.includes(channel)) || [], // ignore explicitly included channels
};
const rawNetworks = await knex.raw(`

View File

@ -1,23 +1,35 @@
'use strict';
const { get, initAll } = require('../utils/qu');
const { get, initAll, formatDate } = require('../utils/qu');
function scrapeLatest(scenes, dates, site) {
return scenes.map(({ qu }, index) => {
const release = {};
const path = qu.url('a[href*="videos/"]');
const path = qu.url('a');
release.url = `${site.url}/visitors/${path}`;
release.entryId = path.match(/videos\/([a-zA-Z0-9]+)(?:_hd)?_trailer/)?.[1];
if (path) {
release.url = `${site.url}/visitors/${path}`;
}
if (dates && dates[index]) {
release.date = dates[index].qu.date(null, 'MM/DD/YYYY');
}
const entryId = path?.match(/videos\/([a-zA-Z0-9]+)(?:_hd)?_trailer/)?.[1]
|| qu.img('img[src*="graphics/fft"]')?.match(/fft_(\w+).gif/)?.[1];
if (!entryId) {
return null;
}
release.entryId = release.date ? `${formatDate(release.date, 'YYYY-MM-DD')}-${entryId}` : entryId;
release.description = qu.q('tbody tr:nth-child(3) font', true);
const infoLine = qu.q('font[color="#663366"]', true);
if (infoLine) release.duration = Number(infoLine.match(/(\d+) min/)[1]) * 60;
if (infoLine) {
release.duration = Number(infoLine.match(/(\d+) min/i)?.[1] || infoLine.match(/video: (\d+)/i)?.[1]) * 60 || null;
}
const poster = qu.img('img[src*="photos/"][width="400"]');
release.poster = `${site.url}/visitors/${poster}`;

View File

@ -258,7 +258,8 @@ async function fetchLatest(entity, page, options) {
.limit(faker.random.number({ min: 2, max: 15 }))
.pluck('name');
release.actors = actors(release);
// release.actors = actors(release);
release.actors = [null, 'Charles Darwin'];
release.title = title(release);
return release;

View File

@ -9,7 +9,7 @@ const slugify = require('./utils/slugify');
const bulkInsert = require('./utils/bulk-insert');
const resolvePlace = require('./utils/resolve-place');
const { formatDate } = require('./utils/qu');
const { associateActors, scrapeActors } = require('./actors');
const { associateActors, scrapeActors, toBaseActors } = require('./actors');
const { associateReleaseTags } = require('./tags');
const { curateEntity } = require('./entities');
const { associateReleaseMedia } = require('./media');
@ -291,7 +291,8 @@ async function storeScenes(releases) {
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
const releasesWithChannels = await attachChannelEntities(releases);
const releasesWithStudios = await attachStudios(releasesWithChannels);
const releasesWithBaseActors = releasesWithChannels.map(release => ({ ...release, actors: toBaseActors(release.actors) }));
const releasesWithStudios = await attachStudios(releasesWithBaseActors);
// uniqueness is entity ID + entry ID, filter uniques after adding entities
const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios);

View File

@ -109,7 +109,12 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
return accReleases;
}
const pageReleasesWithEntity = pageReleases.map(release => ({ ...release, entity: release.entity || entity }));
const validPageReleases = pageReleases.filter(release => release?.entryId); // filter out empty and unidentified releases
const pageReleasesWithEntity = validPageReleases.map(release => ({ ...release, entity: release.entity || entity }));
if (pageReleases.length > validPageReleases.length) {
logger.warn(`Found ${pageReleases.length - validPageReleases.length} empty or unidentified releases on page ${page} for '${entity.name}'`);
}
if (needNextPage(pageReleasesWithEntity, accReleases, isUpcoming)) {
return scrapeReleasesPage(page + 1, accReleases.concat(pageReleasesWithEntity), isUpcoming);
@ -119,6 +124,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
}
const releases = await scrapeReleasesPage(argv.page || 1, []);
const hasDates = releases.every(release => !!release.date);
const limitedReleases = (argv.last && releases.slice(0, Math.max(argv.last, 0)))
@ -133,7 +139,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
}
async function scrapeLatestReleases(scraper, entity, preData) {
if ((!argv.latest && !argv.last && !argv.after) || !scraper.fetchLatest) {
if ((!argv.latest && !argv.last) || !scraper.fetchLatest) {
return emptyReleases;
}