Improved release storage module. Added new tags module. Added movie scraping.
|
@ -352,7 +352,7 @@ exports.up = knex => Promise.resolve()
|
||||||
|
|
||||||
table.string('shoot_id');
|
table.string('shoot_id');
|
||||||
table.string('entry_id');
|
table.string('entry_id');
|
||||||
table.unique(['site_id', 'entry_id']);
|
table.unique(['site_id', 'entry_id', 'type']);
|
||||||
|
|
||||||
table.string('url', 1000);
|
table.string('url', 1000);
|
||||||
table.string('title');
|
table.string('title');
|
||||||
|
|
Before Width: | Height: | Size: 37 KiB After Width: | Height: | Size: 40 KiB |
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 27 KiB |
After Width: | Height: | Size: 27 KiB |
After Width: | Height: | Size: 44 KiB |
|
@ -0,0 +1,58 @@
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<!-- Generator: Adobe Illustrator 23.1.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
|
||||||
|
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
|
||||||
|
viewBox="0 0 180 42" style="enable-background:new 0 0 180 42;" xml:space="preserve">
|
||||||
|
<style type="text/css">
|
||||||
|
.st0{fill:url(#SVGID_1_);}
|
||||||
|
.st1{fill:url(#SVGID_2_);}
|
||||||
|
.st2{fill:url(#SVGID_3_);}
|
||||||
|
.st3{fill:url(#SVGID_4_);}
|
||||||
|
.st4{fill:url(#SVGID_5_);}
|
||||||
|
.st5{fill:#FFFFFF;}
|
||||||
|
</style>
|
||||||
|
<g>
|
||||||
|
|
||||||
|
<linearGradient id="SVGID_1_" gradientUnits="userSpaceOnUse" x1="92.25" y1="10.6716" x2="92.25" y2="35.0304" gradientTransform="matrix(1 0 0 -1 0 44)">
|
||||||
|
<stop offset="0" style="stop-color:#E93F3A"/>
|
||||||
|
<stop offset="1" style="stop-color:#F48B40"/>
|
||||||
|
</linearGradient>
|
||||||
|
<polygon class="st0" points="98.9,9.5 97,20.4 94.7,9.5 89.8,9.5 87.6,20.4 85.6,9.5 78.9,9.5 84.3,32.8 90.3,32.8 92.2,22.7
|
||||||
|
94.2,32.8 100.2,32.8 105.6,9.5 "/>
|
||||||
|
|
||||||
|
<linearGradient id="SVGID_2_" gradientUnits="userSpaceOnUse" x1="115" y1="10.6716" x2="115" y2="35.0304" gradientTransform="matrix(1 0 0 -1 0 44)">
|
||||||
|
<stop offset="0" style="stop-color:#E93F3A"/>
|
||||||
|
<stop offset="1" style="stop-color:#F48B40"/>
|
||||||
|
</linearGradient>
|
||||||
|
<path class="st1" d="M115,9.2c-5.9,0-9.9,4.8-9.9,12s4,12,9.9,12s9.9-4.8,9.9-12S121,9.2,115,9.2z M115,27.5
|
||||||
|
c-2.6,0-3.5-3.4-3.5-6.3s0.9-6.3,3.5-6.3c2.7,0,3.6,3.4,3.6,6.3S117.7,27.5,115,27.5z"/>
|
||||||
|
|
||||||
|
<linearGradient id="SVGID_3_" gradientUnits="userSpaceOnUse" x1="135.55" y1="10.6716" x2="135.55" y2="35.0304" gradientTransform="matrix(1 0 0 -1 0 44)">
|
||||||
|
<stop offset="0" style="stop-color:#E93F3A"/>
|
||||||
|
<stop offset="1" style="stop-color:#F48B40"/>
|
||||||
|
</linearGradient>
|
||||||
|
<path class="st2" d="M140,23.9c1.8-1,3.4-3.1,3.4-6.6c0-4.5-3.1-7.7-7.5-7.7H127v23.3h6.2v-7.8h0.7l3.3,7.8h6.9L140,23.9z
|
||||||
|
M135.3,19.4h-2.1v-4.2h2.1c1.6,0,1.8,1.5,1.8,2.1C137.1,18.3,136.6,19.4,135.3,19.4z"/>
|
||||||
|
|
||||||
|
<linearGradient id="SVGID_4_" gradientUnits="userSpaceOnUse" x1="152.3" y1="10.6716" x2="152.3" y2="35.0304" gradientTransform="matrix(1 0 0 -1 0 44)">
|
||||||
|
<stop offset="0" style="stop-color:#E93F3A"/>
|
||||||
|
<stop offset="1" style="stop-color:#F48B40"/>
|
||||||
|
</linearGradient>
|
||||||
|
<polygon class="st3" points="151.8,27.1 151.8,9.5 145.6,9.5 145.6,32.8 159,32.8 159,27.1 "/>
|
||||||
|
|
||||||
|
<linearGradient id="SVGID_5_" gradientUnits="userSpaceOnUse" x1="169.8" y1="10.6716" x2="169.8" y2="35.0304" gradientTransform="matrix(1 0 0 -1 0 44)">
|
||||||
|
<stop offset="0" style="stop-color:#E93F3A"/>
|
||||||
|
<stop offset="1" style="stop-color:#F48B40"/>
|
||||||
|
</linearGradient>
|
||||||
|
<path class="st4" d="M168.6,9.5h-7.9v23.3h7.9c6.2,0,10.3-4.6,10.3-11.6C178.9,14.1,174.9,9.5,168.6,9.5z M167,15.2h1.6
|
||||||
|
c3.4,0,4,3.7,4,6c0,1.4-0.3,5.9-4,5.9H167V15.2z"/>
|
||||||
|
</g>
|
||||||
|
<g>
|
||||||
|
<path class="st5" d="M9.3,9.5H0.5v23.3h6.2V25h2.6c5.2,0,7.6-3.9,7.6-7.8C16.9,13.4,14.6,9.5,9.3,9.5z M6.7,15.2h2.1
|
||||||
|
c1.1,0,1.8,0.8,1.8,2.1c0,1-0.5,2.1-1.8,2.1H6.7V15.2z"/>
|
||||||
|
<path class="st5" d="M27.6,9.2c-5.9,0-9.9,4.8-9.9,12s4,12,9.9,12s9.9-4.8,9.9-12S33.5,9.2,27.6,9.2z M27.6,27.5
|
||||||
|
c-2.6,0-3.5-3.4-3.5-6.3s0.9-6.3,3.5-6.3c2.7,0,3.6,3.4,3.6,6.3S30.2,27.5,27.6,27.5z"/>
|
||||||
|
<path class="st5" d="M52.5,23.9c1.8-1,3.4-3.1,3.4-6.6c0-4.5-3.1-7.7-7.5-7.7h-8.9v23.3h6.2v-7.8h0.7l3.3,7.8h6.9L52.5,23.9z
|
||||||
|
M47.8,19.4h-2.1v-4.2h2.1c1.6,0,1.8,1.5,1.8,2.1C49.6,18.3,49.1,19.4,47.8,19.4z"/>
|
||||||
|
<polygon class="st5" points="70.4,9.5 70.4,19.8 64.9,9.5 59,9.5 59,32.8 65.2,32.8 65.2,22 71,32.8 76.7,32.8 76.7,9.5 "/>
|
||||||
|
</g>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 3.4 KiB |
After Width: | Height: | Size: 1.7 MiB |
After Width: | Height: | Size: 119 KiB |
After Width: | Height: | Size: 1.9 MiB |
After Width: | Height: | Size: 123 KiB |
After Width: | Height: | Size: 252 KiB |
After Width: | Height: | Size: 101 KiB |
|
@ -1,40 +1,40 @@
|
||||||
const upsert = require('../src/utils/upsert');
|
const upsert = require('../src/utils/upsert');
|
||||||
|
|
||||||
const tagPosters = [
|
const tagPosters = [
|
||||||
|
['airtight', 1, 'Jynx Maze in "Pump My Ass Full of Cum 3" for Jules Jordan'],
|
||||||
|
['anal', 2, 'Sheena Shaw for Bang Bros'],
|
||||||
['anal-creampie', 0, 'Gina Valentina and Jane Wilde in "A Very Special Anniversary" for Tushy'],
|
['anal-creampie', 0, 'Gina Valentina and Jane Wilde in "A Very Special Anniversary" for Tushy'],
|
||||||
|
['asian', 'poster', 'Vina Sky in "Young and Glamorous 10" for Jules Jordan'],
|
||||||
['ass-to-mouth', 'poster', 'Alysa Gap and Logan in "Anal Buffet 4" for Evil Angel'],
|
['ass-to-mouth', 'poster', 'Alysa Gap and Logan in "Anal Buffet 4" for Evil Angel'],
|
||||||
['bdsm', 0, 'Dani Daniels in "The Traning of Dani Daniels, Day 2" for The Training of O at Kink'],
|
['bdsm', 0, 'Dani Daniels in "The Traning of Dani Daniels, Day 2" for The Training of O at Kink'],
|
||||||
['behind-the-scenes', 0, 'Janice Griffith in "Day With A Pornstar: Janice" for Brazzers'],
|
['behind-the-scenes', 0, 'Janice Griffith in "Day With A Pornstar: Janice" for Brazzers'],
|
||||||
|
['blowbang', 'poster'],
|
||||||
|
['blowjob', 0, 'Adriana Chechik in "The Dinner Party" for Real Wife Stories (Brazzers)'],
|
||||||
|
['bukkake', 'poster'],
|
||||||
|
['caucasian', 1, 'Sheena Shaw for Brazzers'],
|
||||||
|
['creampie', 'poster'],
|
||||||
['da-tp', 0, 'Natasha Teen in LegalPorno SZ2164'],
|
['da-tp', 0, 'Natasha Teen in LegalPorno SZ2164'],
|
||||||
['deepthroat', 0, 'Chanel Grey in "Deepthroating Is Fun" for Throated'],
|
['deepthroat', 0, 'Chanel Grey in "Deepthroating Is Fun" for Throated'],
|
||||||
['double-anal', 5, 'Riley Reid in "The Gangbang of Riley Reid" for Jules Jordan'],
|
['double-anal', 2, 'Lana Rhoades in "Lana Rhoades Unleashed" for HardX'],
|
||||||
['double-penetration', 'poster', 'Mia Malkova in "DP!" for HardX'],
|
['double-penetration', 'poster', 'Mia Malkova in "DP!" for HardX'],
|
||||||
['double-vaginal', 'poster', 'Riley Reid in "Pizza That Ass" for Reid My Lips'],
|
['double-vaginal', 'poster', 'Riley Reid in "Pizza That Ass" for Reid My Lips'],
|
||||||
['dv-tp', 'poster', 'Juelz Ventura in "Gangbanged 5" for Elegant Angel'],
|
['dv-tp', 'poster', 'Juelz Ventura in "Gangbanged 5" for Elegant Angel'],
|
||||||
['oral-creampie', 1, 'Keisha Grey in Brazzers House'],
|
|
||||||
['triple-anal', 'poster', 'Kristy Black in SZ1986 for LegalPorno'],
|
|
||||||
['airtight', 1, 'Jynx Maze in "Pump My Ass Full of Cum 3" for Jules Jordan'],
|
|
||||||
['anal', 1, 'Veronica Leal and Tina Kay in "Agents On Anal Mission" for Asshole Fever'],
|
|
||||||
['asian', 'poster', 'Vina Sky in "Young and Glamorous 10" for Jules Jordan'],
|
|
||||||
['blowjob', 0, 'Adriana Chechik in "The Dinner Party" for Real Wife Stories (Brazzers)'],
|
|
||||||
['blowbang', 'poster'],
|
|
||||||
['bukkake', 'poster'],
|
|
||||||
['caucasian', 'poster'],
|
|
||||||
['creampie', 'poster'],
|
|
||||||
['ebony', 1, 'Sarah Banks for Brazzers'],
|
['ebony', 1, 'Sarah Banks for Brazzers'],
|
||||||
['facial', 'poster'],
|
|
||||||
['facefucking', '1', 'Carrie for Young Throats'],
|
['facefucking', '1', 'Carrie for Young Throats'],
|
||||||
|
['facial', 'poster'],
|
||||||
['gangbang', 'poster', 'Kristen Scott in "Interracial Gangbang!" for Jules Jordan'],
|
['gangbang', 'poster', 'Kristen Scott in "Interracial Gangbang!" for Jules Jordan'],
|
||||||
['gaping', 1, 'Vina Sky in "Vina Sky Does Anal" for HardX'],
|
['gaping', 1, 'Vina Sky in "Vina Sky Does Anal" for HardX'],
|
||||||
['interracial', 'poster'],
|
['interracial', 'poster'],
|
||||||
['latina', 'poster'],
|
['latina', 'poster'],
|
||||||
['mff', 0, 'Madison Ivy and Adriana Chechik in "Day With A Pornstar" for Brazzers'],
|
['mff', 0, 'Madison Ivy and Adriana Chechik in "Day With A Pornstar" for Brazzers'],
|
||||||
['mfm', 'poster'],
|
['mfm', 'poster'],
|
||||||
|
['oral-creampie', 1, 'Keisha Grey in Brazzers House'],
|
||||||
['orgy', 'poster'],
|
['orgy', 'poster'],
|
||||||
['schoolgirl', 1, 'Eliza Ibarra for Brazzers'],
|
['schoolgirl', 1, 'Eliza Ibarra for Brazzers'],
|
||||||
['swallowing', 'poster'],
|
['swallowing', 'poster'],
|
||||||
['tattoo', 'poster', 'Kali Roses in "Goes All In For Anal" for Hussie Pass'],
|
['tattoo', 'poster', 'Kali Roses in "Goes All In For Anal" for Hussie Pass'],
|
||||||
['trainbang', 'poster', 'Kali Roses in "Passing Me Around" for Blacked'],
|
['trainbang', 'poster', 'Kali Roses in "Passing Me Around" for Blacked'],
|
||||||
|
['triple-anal', 'poster', 'Kristy Black in SZ1986 for LegalPorno'],
|
||||||
]
|
]
|
||||||
.map(([slug, filename, comment], index) => ({
|
.map(([slug, filename, comment], index) => ({
|
||||||
tagSlug: slug,
|
tagSlug: slug,
|
||||||
|
@ -49,12 +49,15 @@ const tagPhotos = [
|
||||||
['airtight', 2, 'Dakota Skye in "Dakota Goes Nuts" for ArchAngel'],
|
['airtight', 2, 'Dakota Skye in "Dakota Goes Nuts" for ArchAngel'],
|
||||||
['airtight', 3, 'Anita Bellini in "Triple Dick Gangbang" for Hands On Hardcore (DDF Network)'],
|
['airtight', 3, 'Anita Bellini in "Triple Dick Gangbang" for Hands On Hardcore (DDF Network)'],
|
||||||
['anal', 'poster', 'Jynx Maze in "Anal Buffet 6" for Evil Angel'],
|
['anal', 'poster', 'Jynx Maze in "Anal Buffet 6" for Evil Angel'],
|
||||||
|
['anal', 1, 'Veronica Leal and Tina Kay in "Agents On Anal Mission" for Asshole Fever'],
|
||||||
['anal', 0],
|
['anal', 0],
|
||||||
|
['caucasian', 'poster'],
|
||||||
['da-tp', 1, 'Francys Belle in SZ1702 for LegalPorno'],
|
['da-tp', 1, 'Francys Belle in SZ1702 for LegalPorno'],
|
||||||
['da-tp', 2, 'Angel Smalls in GIO408 for LegalPorno'],
|
['da-tp', 2, 'Angel Smalls in GIO408 for LegalPorno'],
|
||||||
['da-tp', 3, 'Evelina Darling in GIO294'],
|
['da-tp', 3, 'Evelina Darling in GIO294'],
|
||||||
['da-tp', 4, 'Ninel Mojado aka Mira Cuckold in GIO063 for LegalPorno'],
|
['da-tp', 4, 'Ninel Mojado aka Mira Cuckold in GIO063 for LegalPorno'],
|
||||||
['double-anal', 2, 'Lana Rhoades in "Gangbang Me 3" for HardX'],
|
['double-anal', 6, 'Sheena Shaw in "Ass Worship 14" for Jules Jordan'],
|
||||||
|
['double-anal', 5, 'Riley Reid in "The Gangbang of Riley Reid" for Jules Jordan'],
|
||||||
['double-anal', 'poster', 'Haley Reed in "Young Hot Ass" for Evil Angel'],
|
['double-anal', 'poster', 'Haley Reed in "Young Hot Ass" for Evil Angel'],
|
||||||
['double-anal', 0, 'Nicole Black doing double anal during a gangbang in GIO971 for LegalPorno'],
|
['double-anal', 0, 'Nicole Black doing double anal during a gangbang in GIO971 for LegalPorno'],
|
||||||
['double-anal', 1, 'Ria Sunn in SZ1801 for LegalPorno'],
|
['double-anal', 1, 'Ria Sunn in SZ1801 for LegalPorno'],
|
||||||
|
|
11
src/app.js
|
@ -24,18 +24,19 @@ async function init() {
|
||||||
}
|
}
|
||||||
|
|
||||||
const updateBaseScenes = (argv.scrape || argv.sites || argv.networks) && await fetchUpdates();
|
const updateBaseScenes = (argv.scrape || argv.sites || argv.networks) && await fetchUpdates();
|
||||||
const deepScenes = await fetchScenes([...(argv.scenes || []), ...(updateBaseScenes || [])]);
|
const deepScenes = argv.deep && await fetchScenes([...(argv.scenes || []), ...(updateBaseScenes || [])]);
|
||||||
|
|
||||||
console.log(deepScenes.map(scene => scene.movie));
|
const sceneMovies = deepScenes && argv.sceneMovies && deepScenes.map(scene => scene.movie).filter(Boolean);
|
||||||
|
const deepMovies = await fetchMovies([...(argv.movies || []), ...(sceneMovies || [])]);
|
||||||
const argvDeepMovies = argv.movies && await fetchMovies(argv.movies);
|
|
||||||
|
|
||||||
|
if (argv.save) {
|
||||||
await storeReleases([
|
await storeReleases([
|
||||||
...(deepScenes || []),
|
...(deepScenes || []),
|
||||||
...(argvDeepMovies || []),
|
...(deepMovies || []),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
// await storeReleaseActors(updateReleases);
|
// await storeReleaseActors(updateReleases);
|
||||||
|
}
|
||||||
|
|
||||||
knex.destroy();
|
knex.destroy();
|
||||||
}
|
}
|
||||||
|
|
16
src/argv.js
|
@ -29,21 +29,27 @@ const { argv } = yargs
|
||||||
type: 'array',
|
type: 'array',
|
||||||
alias: 'actor',
|
alias: 'actor',
|
||||||
})
|
})
|
||||||
.option('with-scenes', {
|
.option('actor-scenes', {
|
||||||
describe: 'Fetch all scenes for an actor or movie',
|
describe: 'Fetch all scenes for an actor',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
alias: 'with-releases',
|
alias: 'with-releases',
|
||||||
default: false,
|
default: false,
|
||||||
})
|
})
|
||||||
.option('with-movies', {
|
.option('movie-scenes', {
|
||||||
|
describe: 'Fetch all scenes for a movie',
|
||||||
|
type: 'boolean',
|
||||||
|
alias: 'with-releases',
|
||||||
|
default: false,
|
||||||
|
})
|
||||||
|
.option('scene-movies', {
|
||||||
describe: 'Fetch movies for scenes',
|
describe: 'Fetch movies for scenes',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
default: true,
|
default: true,
|
||||||
})
|
})
|
||||||
.option('with-profiles', {
|
.option('profiles', {
|
||||||
describe: 'Scrape profiles for new actors after fetching scenes',
|
describe: 'Scrape profiles for new actors after fetching scenes',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
alias: 'with-actors',
|
alias: 'bios',
|
||||||
default: false,
|
default: false,
|
||||||
})
|
})
|
||||||
.option('scene', {
|
.option('scene', {
|
||||||
|
|
|
@ -99,7 +99,7 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const scraper = scrapers.releases[site.slug];
|
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
||||||
|
|
||||||
if (!scraper) {
|
if (!scraper) {
|
||||||
logger.warn(`Could not find scraper for ${baseRelease.url}`);
|
logger.warn(`Could not find scraper for ${baseRelease.url}`);
|
||||||
|
@ -124,6 +124,7 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') {
|
||||||
};
|
};
|
||||||
|
|
||||||
if (scrapedRelease && baseRelease?.tags) {
|
if (scrapedRelease && baseRelease?.tags) {
|
||||||
|
// accumulate all available tags
|
||||||
mergedRelease.tags = baseRelease.tags.concat(scrapedRelease.tags);
|
mergedRelease.tags = baseRelease.tags.concat(scrapedRelease.tags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -141,7 +141,9 @@ async function fetchLatest(site, page = 1) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site) {
|
||||||
const res = await bhttp.get(`https://ddfnetwork.com${new URL(url).pathname}`);
|
// DDF's main site moved to Porn World
|
||||||
|
// const res = await bhttp.get(`https://ddfnetwork.com${new URL(url).pathname}`);
|
||||||
|
const res = await bhttp.get(url);
|
||||||
|
|
||||||
return scrapeScene(res.body.toString(), url, site);
|
return scrapeScene(res.body.toString(), url, site);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,10 +2,10 @@
|
||||||
|
|
||||||
const config = require('config');
|
const config = require('config');
|
||||||
|
|
||||||
const argv = require('./argv');
|
|
||||||
const logger = require('./logger')(__filename);
|
const logger = require('./logger')(__filename);
|
||||||
const knex = require('./knex');
|
const knex = require('./knex');
|
||||||
const slugify = require('./utils/slugify');
|
const slugify = require('./utils/slugify');
|
||||||
|
const { associateTags } = require('./tags');
|
||||||
|
|
||||||
function curateReleaseEntry(release, batchId, existingRelease) {
|
function curateReleaseEntry(release, batchId, existingRelease) {
|
||||||
const slug = slugify(release.title, '-', {
|
const slug = slugify(release.title, '-', {
|
||||||
|
@ -34,7 +34,7 @@ function curateReleaseEntry(release, batchId, existingRelease) {
|
||||||
updated_batch_id: batchId,
|
updated_batch_id: batchId,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!existingRelease) {
|
if (!existingRelease && !release.id) {
|
||||||
curatedRelease.created_batch_id = batchId;
|
curatedRelease.created_batch_id = batchId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,7 +60,7 @@ async function attachChannelSites(releases) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.error(`Unable to match channel '${release.channel.slug || release.channel}' from generic URL ${release.url}`);
|
logger.error(`Unable to match channel '${release.channel?.slug || release.channel}' from generic URL ${release.url}`);
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
})
|
})
|
||||||
|
@ -93,15 +93,41 @@ async function attachStudios(releases) {
|
||||||
return releasesWithStudio;
|
return releasesWithStudio;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function attachReleaseIds(releases, storedReleases) {
|
||||||
|
const storedReleaseIdsBySiteIdAndEntryId = storedReleases.reduce((acc, release) => {
|
||||||
|
if (!acc[release.site_id]) acc[release.site_id] = {};
|
||||||
|
acc[release.site_id][release.entry_id] = release.id;
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
const releasesWithId = releases.map(release => ({
|
||||||
|
...release,
|
||||||
|
id: storedReleaseIdsBySiteIdAndEntryId[release.site.id][release.entryId],
|
||||||
|
}));
|
||||||
|
|
||||||
|
return releasesWithId;
|
||||||
|
}
|
||||||
|
|
||||||
async function extractUniqueReleases(releases) {
|
async function extractUniqueReleases(releases) {
|
||||||
const duplicateReleaseEntries = await knex('releases')
|
const duplicateReleaseEntries = await knex('releases')
|
||||||
.whereIn(['entry_id', 'site_id'], releases.map(release => [release.entryId, release.site.id]));
|
.whereIn(['entry_id', 'site_id'], releases.map(release => [release.entryId, release.site.id]));
|
||||||
|
|
||||||
const duplicateReleaseEntryKeys = new Set(duplicateReleaseEntries.map(releaseEntry => `${releaseEntry.site_id}_${releaseEntry.entry_id}`));
|
const duplicateReleasesBySiteIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => {
|
||||||
const duplicateReleases = releases.filter(release => duplicateReleaseEntryKeys.has(`${release.site.id}_${release.entryId}`));
|
if (!acc[release.site_id]) acc[release.site_id] = {};
|
||||||
const uniqueReleases = releases.filter(release => !duplicateReleaseEntryKeys.has(`${release.site.id}_${release.entryId}`));
|
acc[release.site_id][release.entry_id] = true;
|
||||||
|
|
||||||
return { duplicateReleases, uniqueReleases };
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
const duplicateReleases = releases.filter(release => duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]);
|
||||||
|
const uniqueReleases = releases.filter(release => !duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]);
|
||||||
|
|
||||||
|
return {
|
||||||
|
uniqueReleases,
|
||||||
|
duplicateReleases,
|
||||||
|
duplicateReleaseEntries,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function storeReleases(releases) {
|
async function storeReleases(releases) {
|
||||||
|
@ -111,19 +137,19 @@ async function storeReleases(releases) {
|
||||||
const releasesWithStudios = await attachStudios(releasesWithSites);
|
const releasesWithStudios = await attachStudios(releasesWithSites);
|
||||||
|
|
||||||
// uniqueness is site ID + entry ID, filter uniques after adding sites
|
// uniqueness is site ID + entry ID, filter uniques after adding sites
|
||||||
const { uniqueReleases, duplicateReleases } = await extractUniqueReleases(releasesWithStudios);
|
const { uniqueReleases, duplicateReleaseEntries } = await extractUniqueReleases(releasesWithStudios);
|
||||||
|
|
||||||
console.log(argv.redownload, duplicateReleases);
|
const curatedNewReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId));
|
||||||
|
|
||||||
const curatedReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId));
|
const storedReleases = await knex('releases').insert(curatedNewReleaseEntries).returning('*');
|
||||||
const storedReleases = await knex('releases').insert(curatedReleaseEntries).returning('*');
|
// TODO: update duplicate releases
|
||||||
|
|
||||||
if (Array.isArray(storedReleases)) {
|
const storedReleaseEntries = Array.isArray(storedReleases) ? storedReleases : [];
|
||||||
return storedReleases;
|
const releasesWithId = attachReleaseIds(releases, [].concat(storedReleaseEntries, duplicateReleaseEntries));
|
||||||
}
|
|
||||||
|
|
||||||
// nothing inserted
|
await associateTags(releasesWithId);
|
||||||
return [];
|
|
||||||
|
return releasesWithId;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|
|
@ -0,0 +1,110 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const logger = require('./logger')(__filename);
|
||||||
|
const knex = require('./knex');
|
||||||
|
const whereOr = require('./utils/where-or');
|
||||||
|
|
||||||
|
async function curateTag(tag) {
|
||||||
|
const [aliases, media] = await Promise.all([
|
||||||
|
knex('tags').where({ alias_for: tag.id }),
|
||||||
|
knex('media')
|
||||||
|
.where('domain', 'tags')
|
||||||
|
.andWhere('target_id', tag.id)
|
||||||
|
.orderBy('index'),
|
||||||
|
]);
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: tag.id,
|
||||||
|
name: tag.name,
|
||||||
|
slug: tag.slug,
|
||||||
|
description: tag.description,
|
||||||
|
poster: media.find(photo => photo.role === 'poster'),
|
||||||
|
photos: media.filter(photo => photo.role === 'photo'),
|
||||||
|
group: {
|
||||||
|
id: tag.group_id,
|
||||||
|
name: tag.group_name,
|
||||||
|
description: tag.group_description,
|
||||||
|
slug: tag.group_slug,
|
||||||
|
},
|
||||||
|
aliases: aliases.map(({ name }) => name),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function curateTags(tags) {
|
||||||
|
return Promise.all(tags.map(async tag => curateTag(tag)));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function matchTags(rawTags) {
|
||||||
|
const filteredTags = rawTags.filter(Boolean);
|
||||||
|
|
||||||
|
const tags = filteredTags
|
||||||
|
.concat(filteredTags.map(tag => tag.toLowerCase()))
|
||||||
|
.concat(filteredTags.map(tag => tag.toUpperCase()));
|
||||||
|
|
||||||
|
const tagEntries = await knex('tags')
|
||||||
|
.pluck('aliases.id')
|
||||||
|
.whereIn('tags.name', tags)
|
||||||
|
.leftJoin('tags as aliases', function join() {
|
||||||
|
this
|
||||||
|
.on('tags.alias_for', 'aliases.id')
|
||||||
|
.orOn('tags.id', 'aliases.id');
|
||||||
|
})
|
||||||
|
.where(function where() {
|
||||||
|
this
|
||||||
|
.whereNull('tags.alias_for')
|
||||||
|
.orWhereNull('aliases.alias_for');
|
||||||
|
})
|
||||||
|
.groupBy('aliases.id');
|
||||||
|
|
||||||
|
return tagEntries;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function associateTags(release, releaseId) {
|
||||||
|
const siteTags = release.site?.tags?.filter(tag => tag.inherit === true).map(tag => tag.id) || [];
|
||||||
|
|
||||||
|
const rawReleaseTags = release.tags?.filter(Boolean) || [];
|
||||||
|
const releaseTags = rawReleaseTags.some(tag => typeof tag === 'string')
|
||||||
|
? await matchTags(release.tags) // scraper returned raw tags
|
||||||
|
: rawReleaseTags; // tags already matched by (outdated) scraper
|
||||||
|
|
||||||
|
const tags = Array.from(new Set(releaseTags.concat(siteTags)));
|
||||||
|
|
||||||
|
if (tags.length === 0) {
|
||||||
|
logger.info(`No tags available for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const associationEntries = await knex('releases_tags')
|
||||||
|
.where('release_id', releaseId)
|
||||||
|
.whereIn('tag_id', tags);
|
||||||
|
|
||||||
|
const existingAssociations = new Set(associationEntries.map(association => association.tag_id));
|
||||||
|
const newAssociations = tags.filter(tagId => !existingAssociations.has(tagId));
|
||||||
|
|
||||||
|
await knex('releases_tags').insert(newAssociations.map(tagId => ({
|
||||||
|
tag_id: tagId,
|
||||||
|
release_id: releaseId,
|
||||||
|
})));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchTags(queryObject, groupsQueryObject, limit = 100) {
|
||||||
|
const tags = await knex('tags')
|
||||||
|
.where(builder => whereOr(queryObject, 'tags', builder))
|
||||||
|
.orWhere(builder => whereOr(groupsQueryObject, 'tags_groups', builder))
|
||||||
|
.andWhere({ 'tags.alias_for': null })
|
||||||
|
.select(
|
||||||
|
'tags.*',
|
||||||
|
'tags_groups.id as group_id', 'tags_groups.name as group_name', 'tags_groups.slug as group_slug', 'tags_groups.description as groups_description',
|
||||||
|
)
|
||||||
|
.leftJoin('tags_groups', 'tags.group_id', 'tags_groups.id')
|
||||||
|
.orderBy('name')
|
||||||
|
.limit(limit);
|
||||||
|
|
||||||
|
return curateTags(tags);
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
associateTags,
|
||||||
|
fetchTags,
|
||||||
|
matchTags,
|
||||||
|
};
|
167
src/tags.js
|
@ -1,110 +1,103 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const logger = require('./logger')(__filename);
|
|
||||||
const knex = require('./knex');
|
const knex = require('./knex');
|
||||||
const whereOr = require('./utils/where-or');
|
const slugify = require('./utils/slugify');
|
||||||
|
|
||||||
async function curateTag(tag) {
|
async function matchReleaseTags(releases) {
|
||||||
const [aliases, media] = await Promise.all([
|
const rawTags = releases
|
||||||
knex('tags').where({ alias_for: tag.id }),
|
.map(release => release.tags).flat()
|
||||||
knex('media')
|
.filter(Boolean);
|
||||||
.where('domain', 'tags')
|
|
||||||
.andWhere('target_id', tag.id)
|
|
||||||
.orderBy('index'),
|
|
||||||
]);
|
|
||||||
|
|
||||||
return {
|
const casedTags = Array.from(new Set(
|
||||||
id: tag.id,
|
rawTags
|
||||||
name: tag.name,
|
.concat(rawTags.map(tag => tag.toLowerCase()))
|
||||||
slug: tag.slug,
|
.concat(rawTags.map(tag => tag.toUpperCase())),
|
||||||
description: tag.description,
|
));
|
||||||
poster: media.find(photo => photo.role === 'poster'),
|
|
||||||
photos: media.filter(photo => photo.role === 'photo'),
|
|
||||||
group: {
|
|
||||||
id: tag.group_id,
|
|
||||||
name: tag.group_name,
|
|
||||||
description: tag.group_description,
|
|
||||||
slug: tag.group_slug,
|
|
||||||
},
|
|
||||||
aliases: aliases.map(({ name }) => name),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
function curateTags(tags) {
|
|
||||||
return Promise.all(tags.map(async tag => curateTag(tag)));
|
|
||||||
}
|
|
||||||
|
|
||||||
async function matchTags(rawTags) {
|
|
||||||
const filteredTags = rawTags.filter(Boolean);
|
|
||||||
|
|
||||||
const tags = filteredTags
|
|
||||||
.concat(filteredTags.map(tag => tag.toLowerCase()))
|
|
||||||
.concat(filteredTags.map(tag => tag.toUpperCase()));
|
|
||||||
|
|
||||||
const tagEntries = await knex('tags')
|
const tagEntries = await knex('tags')
|
||||||
.pluck('aliases.id')
|
.select('tags.id', 'tags.name', 'tags.alias_for')
|
||||||
.whereIn('tags.name', tags)
|
.whereIn('tags.name', casedTags);
|
||||||
.leftJoin('tags as aliases', function join() {
|
|
||||||
this
|
|
||||||
.on('tags.alias_for', 'aliases.id')
|
|
||||||
.orOn('tags.id', 'aliases.id');
|
|
||||||
})
|
|
||||||
.where(function where() {
|
|
||||||
this
|
|
||||||
.whereNull('tags.alias_for')
|
|
||||||
.orWhereNull('aliases.alias_for');
|
|
||||||
})
|
|
||||||
.groupBy('aliases.id');
|
|
||||||
|
|
||||||
return tagEntries;
|
const tagIdsBySlug = tagEntries
|
||||||
|
.reduce((acc, tag) => ({
|
||||||
|
...acc,
|
||||||
|
[slugify(tag.name)]: tag.alias_for || tag.id,
|
||||||
|
}), {});
|
||||||
|
|
||||||
|
return tagIdsBySlug;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function associateTags(release, releaseId) {
|
async function getSiteTags(releases) {
|
||||||
const siteTags = release.site?.tags?.filter(tag => tag.inherit === true).map(tag => tag.id) || [];
|
const siteIds = releases.map(release => release.site.id);
|
||||||
|
const siteTags = await knex('sites_tags').whereIn('site_id', siteIds);
|
||||||
|
|
||||||
const rawReleaseTags = release.tags?.filter(Boolean) || [];
|
const siteTagIdsBySiteId = siteTags.reduce((acc, siteTag) => {
|
||||||
const releaseTags = rawReleaseTags.some(tag => typeof tag === 'string')
|
if (!acc[siteTag.site_id]) {
|
||||||
? await matchTags(release.tags) // scraper returned raw tags
|
acc[siteTag.site_id] = [];
|
||||||
: rawReleaseTags; // tags already matched by (outdated) scraper
|
|
||||||
|
|
||||||
const tags = Array.from(new Set(releaseTags.concat(siteTags)));
|
|
||||||
|
|
||||||
if (tags.length === 0) {
|
|
||||||
logger.info(`No tags available for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const associationEntries = await knex('releases_tags')
|
acc[siteTag.site_id].push(siteTag.tag_id);
|
||||||
.where('release_id', releaseId)
|
|
||||||
.whereIn('tag_id', tags);
|
|
||||||
|
|
||||||
const existingAssociations = new Set(associationEntries.map(association => association.tag_id));
|
return acc;
|
||||||
const newAssociations = tags.filter(tagId => !existingAssociations.has(tagId));
|
}, {});
|
||||||
|
|
||||||
await knex('releases_tags').insert(newAssociations.map(tagId => ({
|
return siteTagIdsBySiteId;
|
||||||
tag_id: tagId,
|
|
||||||
release_id: releaseId,
|
|
||||||
})));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchTags(queryObject, groupsQueryObject, limit = 100) {
|
function buildReleaseTagAssociations(releases, tagIdsBySlug, siteTagIdsBySiteId) {
|
||||||
const tags = await knex('tags')
|
const tagAssociations = releases
|
||||||
.where(builder => whereOr(queryObject, 'tags', builder))
|
.map((release) => {
|
||||||
.orWhere(builder => whereOr(groupsQueryObject, 'tags_groups', builder))
|
const siteTagIds = siteTagIdsBySiteId[release.site.id];
|
||||||
.andWhere({ 'tags.alias_for': null })
|
|
||||||
.select(
|
|
||||||
'tags.*',
|
|
||||||
'tags_groups.id as group_id', 'tags_groups.name as group_name', 'tags_groups.slug as group_slug', 'tags_groups.description as groups_description',
|
|
||||||
)
|
|
||||||
.leftJoin('tags_groups', 'tags.group_id', 'tags_groups.id')
|
|
||||||
.orderBy('name')
|
|
||||||
.limit(limit);
|
|
||||||
|
|
||||||
return curateTags(tags);
|
const releaseTagIds = release.tags.every(tag => typeof tag === 'number')
|
||||||
|
? release.tags // obsolete scraper returned pre-matched tags
|
||||||
|
: release.tags.map(tag => tagIdsBySlug[slugify(tag)]);
|
||||||
|
|
||||||
|
return Array.from(new Set(
|
||||||
|
// filter duplicates and empties
|
||||||
|
releaseTagIds
|
||||||
|
.concat(siteTagIds)
|
||||||
|
.filter(Boolean),
|
||||||
|
))
|
||||||
|
.map(tagId => ({
|
||||||
|
release_id: release.id,
|
||||||
|
tag_id: tagId,
|
||||||
|
}));
|
||||||
|
})
|
||||||
|
.flat();
|
||||||
|
|
||||||
|
return tagAssociations;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function extractUniqueAssociations(tagAssociations) {
|
||||||
|
const duplicateAssociations = await knex('releases_tags').whereIn(['release_id', 'tag_id'], tagAssociations.map(association => [association.release_id, association.tag_id]));
|
||||||
|
|
||||||
|
const duplicateAssociationsByReleaseIdAndTagId = duplicateAssociations.reduce((acc, association) => {
|
||||||
|
if (!acc[association.release_id]) {
|
||||||
|
acc[association.release_id] = {};
|
||||||
|
}
|
||||||
|
|
||||||
|
acc[association.release_id][association.tag_id] = true;
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
const uniqueAssociations = tagAssociations
|
||||||
|
.filter(association => !duplicateAssociationsByReleaseIdAndTagId[association.release_id]?.[association.tag_id]);
|
||||||
|
|
||||||
|
return uniqueAssociations;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function associateTags(releases) {
|
||||||
|
const tagIdsBySlug = await matchReleaseTags(releases);
|
||||||
|
const siteTagIdsBySiteId = await getSiteTags(releases);
|
||||||
|
|
||||||
|
const tagAssociations = buildReleaseTagAssociations(releases, tagIdsBySlug, siteTagIdsBySiteId);
|
||||||
|
const uniqueAssociations = extractUniqueAssociations(tagAssociations);
|
||||||
|
|
||||||
|
await knex('releases_tags').insert(uniqueAssociations);
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
associateTags,
|
associateTags,
|
||||||
fetchTags,
|
|
||||||
matchTags,
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -34,7 +34,7 @@ async function extractUniqueReleases(latestReleases, accReleases) {
|
||||||
|
|
||||||
// add entry IDs of accumulated releases to prevent an infinite scrape loop
|
// add entry IDs of accumulated releases to prevent an infinite scrape loop
|
||||||
// when one page contains the same release as the previous
|
// when one page contains the same release as the previous
|
||||||
const duplicateReleaseIdentifiers = duplicateReleases
|
const duplicateReleasesSiteIdAndEntryIds = duplicateReleases
|
||||||
.concat(accReleases)
|
.concat(accReleases)
|
||||||
.reduce((acc, release) => {
|
.reduce((acc, release) => {
|
||||||
const siteId = release.site_id || release.site.id;
|
const siteId = release.site_id || release.site.id;
|
||||||
|
@ -47,7 +47,7 @@ async function extractUniqueReleases(latestReleases, accReleases) {
|
||||||
}, {});
|
}, {});
|
||||||
|
|
||||||
const uniqueReleases = latestReleases
|
const uniqueReleases = latestReleases
|
||||||
.filter(release => !duplicateReleaseIdentifiers[release.site.id]?.[release.entryId]);
|
.filter(release => !duplicateReleasesSiteIdAndEntryIds[release.site.id]?.[release.entryId]);
|
||||||
|
|
||||||
return uniqueReleases;
|
return uniqueReleases;
|
||||||
}
|
}
|
||||||
|
|
|
@ -52,11 +52,11 @@ queue.define('http', async ({
|
||||||
|
|
||||||
const reqOptions = {
|
const reqOptions = {
|
||||||
headers: {
|
headers: {
|
||||||
|
...(options.defaultHeaders !== false && defaultHeaders),
|
||||||
...headers,
|
...headers,
|
||||||
...defaultHeaders,
|
|
||||||
},
|
},
|
||||||
...options,
|
|
||||||
...defaultOptions,
|
...defaultOptions,
|
||||||
|
...options,
|
||||||
...(options.timeout && { responseTimeout: options.timeout }),
|
...(options.timeout && { responseTimeout: options.timeout }),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -288,9 +288,7 @@ function extractAll(htmlValue, selector) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function get(urlValue, selector, headers, options, queryAll = false) {
|
async function get(urlValue, selector, headers, options, queryAll = false) {
|
||||||
const res = await http.get(urlValue, {
|
const res = await http.get(urlValue, headers);
|
||||||
headers,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
const item = queryAll
|
const item = queryAll
|
||||||
|
|
|
@ -4,6 +4,10 @@ function slugify(string, delimiter = '-', {
|
||||||
encode = false,
|
encode = false,
|
||||||
limit = 1000,
|
limit = 1000,
|
||||||
} = {}) {
|
} = {}) {
|
||||||
|
if (!string) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
const slugComponents = string.trim().toLowerCase().match(/\w+/g);
|
const slugComponents = string.trim().toLowerCase().match(/\w+/g);
|
||||||
|
|
||||||
if (!slugComponents) {
|
if (!slugComponents) {
|
||||||
|
|