From b188bc5744a45e587870981c4567edef43f77817 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Thu, 29 Oct 2020 15:20:59 +0100 Subject: [PATCH] Filtering out empty or unidentified scenes from update scraper, with warning. Improved Jesse Loads Monster Facials reliability. --- public/img/logos/evilangel/evilangel.png | Bin 20100 -> 20100 bytes .../misc/evil-angel_halloween-2020.svg | 2 ++ public/img/logos/evilangel/network.png | Bin 20100 -> 20100 bytes seeds/02_sites.js | 6 ++--- src/scrapers/jesseloadsmonsterfacials.js | 22 ++++++++++++++---- src/updates.js | 10 ++++++-- 6 files changed, 30 insertions(+), 10 deletions(-) create mode 100644 public/img/logos/evilangel/misc/evil-angel_halloween-2020.svg diff --git a/public/img/logos/evilangel/evilangel.png b/public/img/logos/evilangel/evilangel.png index be03f1a2fd284d002b906d2467b69f7a72accde6..4b3fe34b6b982d2158796bdac7c16d975762a5e9 100644 GIT binary patch delta 442 zcmV;r0Y(0VodJZM0gys}rBbR6W)SI+p*po7D&nYBC_;r$E41ohKKcbs8j=(jN5Qq= z;KyRs!Nplu2UkH5`~Y!sbW(JY691PJTEuv8+>dwn9(V5mu31$yt7jb0blc3Nl0q)O zDuiAUMie{-5SJ9om=nb`e8<;40&KlYuq^*`e~x~&U@1UAAf8o!4AUmwAfDc|4aWP# zyi%4_;&bA-Nf#u3WV-C}8{?A89Lwa*Y-XOACl*RwEO)U|HZ|fY;+U%Gl+WcnRvB+G z)~a=-eNXnnNI_p&W;)F%QdqkM+H^bNYJX2Vj)BONgw}+>leu-ldA%S91EyH zgXH?b{@{1FR&jEF!b^%If!-I#`4|O4yFjbqIN!&P(>eiy&%l-5@zy9Sx0hc?#@RK2#k}Cyia>XL>en#Jv2ZnBe@S4|KdmpC{K$f~n-v9@Pz*vd0 z*L~jI*WKH{XWIS!0E{Pcu|4~{^#A|>24YJ`L;(K)YygNXk%>j600009a7bBm000fw k000fw0YWI7cmMzZ2XskIMF->x925@?^LS`%v+DtwOwyRj?EnA( delta 442 zcmV;r0Y(0VodJZM0gys}#Sf|ub`WvMP&-)=6>-!m6rn<>6nNgNw7S4z7YA_yOYN=%nZ(CH^ldw21NGxF7HCJ?`ECLcPp1t79C{blXfN;$kMd zDt5g>KoI?iqDN+yF(*k$_>Ql81o(Ov<5~Xa{v3U3&SF47B%Wn|hG`RT5KnK~2Iqa^ zFe}O`@j3CBNf#u3C`-Nm{=^dvC_t@Xlle$#1U1~DPPEVta9Gs ztd*;*c~AbrP)=W2<~q$0B(R7jND!f*iW17O5u;Tn#X^eq<39dj*DsMvAy)~E91EyG zgY5dj|KNAGR(^7S!b=LpfzB7l`4|Pdc7aCCalVfor*Q%VpMfjA<*(F%nNQMdEiG~c z3~U1z*DX!i11@)fp(jH&WmgK)6!LlC{fxdT3k==@y=z`?&3&9c0BPzfc>^3A0?`6x zulu~ayS=x6&ouk{0S^Cie@=h@lmGw#24YJ`L;&~zy#O67FNdLd00009a7bBm000fw k000fw0YWI7cmMzZ2XskIMF->p0~Zz+*ipL4v+DtwOg8<;{{R30 diff --git a/public/img/logos/evilangel/misc/evil-angel_halloween-2020.svg b/public/img/logos/evilangel/misc/evil-angel_halloween-2020.svg new file mode 100644 index 00000000..7bc5f40a --- /dev/null +++ b/public/img/logos/evilangel/misc/evil-angel_halloween-2020.svg @@ -0,0 +1,2 @@ + +EA-Halloween \ No newline at end of file diff --git a/public/img/logos/evilangel/network.png b/public/img/logos/evilangel/network.png index 2734e68576cc7a89f71bb4631ca6b96acec499d1..13f5db8647d1ea303bfe072898cb72b044c2e534 100644 GIT binary patch delta 442 zcmV;r0Y(0VodJZM0gys}rBbR6W)SI+p*po7D&nYBC_;r$E41ohKKcbs8j=(jN5Qq= z;KyRs!Nplu2UkH5`~Y!sbW(JY691PJTEuv8+>dwn9(V5mu31$yt7jb0blc3Nl0q)O zDuiAUMie{-5SJ9om=nb`e8<;40&KlYuq^*`e~x~&U@1UAAf8o!4AUmwAfDc|4aWP# zyi%4_;&bA-Nf#u3WV-C}8{?A89Lwa*Y-XOACl*RwEO)U|HZ|fY;+U%Gl+WcnRvB+G z)~a=-eNXnnNI_p&W;)F%QdqkM+H^bNYJX2Vj)BONgw}+>leu-ldA%S91EyH zgXH?b{@{1FR&jEF!b^%If!-I#`4|O4yFjbqIN!&P(>eiy&%l-5@zy9Sx0hc?#@RK2#k}Cyia>XL>en#Jv2ZnBe@S4|KdmpC{K$f~n-v9@Pz*vd0 z*L~jI*WKH{XWIS!0E{Pcu|4~{^#A|>24YJ`L;(K)YygNXk%>j600009a7bBm000fw k000fw0YWI7cmMzZ2XskIMF->x925@`=Xo?Ev+DtwOwT&XX#fBK delta 442 zcmV;r0Y(0VodJZM0gys}#Sf|ub`WvMP&-)=6>-!m6rn<>6nNgNw7S4z7YA_yOYN=%nZ(CH^ldw21NGxF7HCJ?`ECLcPp1t79C{blXfN;$kMd zDt5g>KoI?iqDN+yF(*k$_>Ql81o(Ov<5~Xa{v3U3&SF47B%Wn|hG`RT5KnK~2Iqa^ zFe}O`@j3CBNf#u3C`-Nm{=^dvC_t@Xlle$#1U1~DPPEVta9Gs ztd*;*c~AbrP)=W2<~q$0B(R7jND!f*iW17O5u;Tn#X^eq<39dj*DsMvAy)~E91EyG zgY5dj|KNAGR(^7S!b=LpfzB7l`4|Pdc7aCCalVfor*Q%VpMfjA<*(F%nNQMdEiG~c z3~U1z*DX!i11@)fp(jH&WmgK)6!LlC{fxdT3k==@y=z`?&3&9c0BPzfc>^3A0?`6x zulu~ayS=x6&ouk{0S^Cie@=h@lmGw#24YJ`L;&~zy#O67FNdLd00009a7bBm000fw k000fw0YWI7cmMzZ2XskIMF->p0~Zz%Ml~;Lv+DtwOe(_0VgLXD diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 5e526f1c..daf9075c 100644 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -3415,21 +3415,21 @@ const sites = [ { slug: 'paintoy', name: 'Paintoy', - url: 'https://www.paintoy.com', + url: 'http://www.paintoy.com', tags: ['bdsm'], parent: 'insex', }, { slug: 'aganmedon', name: 'Agan Medon', - url: 'https://www.aganmedon.com', + url: 'http://www.aganmedon.com', tags: ['bdsm', 'animated'], parent: 'insex', }, { slug: 'sensualpain', name: 'Sensual Pain', - url: 'https://www.sensualpain.com', + url: 'http://www.sensualpain.com', tags: ['bdsm'], parent: 'insex', }, diff --git a/src/scrapers/jesseloadsmonsterfacials.js b/src/scrapers/jesseloadsmonsterfacials.js index 19a49155..29050d2b 100644 --- a/src/scrapers/jesseloadsmonsterfacials.js +++ b/src/scrapers/jesseloadsmonsterfacials.js @@ -1,23 +1,35 @@ 'use strict'; -const { get, initAll } = require('../utils/qu'); +const { get, initAll, formatDate } = require('../utils/qu'); function scrapeLatest(scenes, dates, site) { return scenes.map(({ qu }, index) => { const release = {}; + const path = qu.url('a[href*="videos/"]'); - const path = qu.url('a'); - release.url = `${site.url}/visitors/${path}`; - release.entryId = path.match(/videos\/([a-zA-Z0-9]+)(?:_hd)?_trailer/)?.[1]; + if (path) { + release.url = `${site.url}/visitors/${path}`; + } if (dates && dates[index]) { release.date = dates[index].qu.date(null, 'MM/DD/YYYY'); } + const entryId = path?.match(/videos\/([a-zA-Z0-9]+)(?:_hd)?_trailer/)?.[1] + || qu.img('img[src*="graphics/fft"]')?.match(/fft_(\w+).gif/)?.[1]; + + if (!entryId) { + return null; + } + + release.entryId = release.date ? `${formatDate(release.date, 'YYYY-MM-DD')}-${entryId}` : entryId; release.description = qu.q('tbody tr:nth-child(3) font', true); const infoLine = qu.q('font[color="#663366"]', true); - if (infoLine) release.duration = Number(infoLine.match(/(\d+) min/)[1]) * 60; + + if (infoLine) { + release.duration = Number(infoLine.match(/(\d+) min/i)?.[1] || infoLine.match(/video: (\d+)/i)?.[1]) * 60 || null; + } const poster = qu.img('img[src*="photos/"][width="400"]'); release.poster = `${site.url}/visitors/${poster}`; diff --git a/src/updates.js b/src/updates.js index c58303e7..fa6aeab6 100644 --- a/src/updates.js +++ b/src/updates.js @@ -109,7 +109,12 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) { return accReleases; } - const pageReleasesWithEntity = pageReleases.map(release => ({ ...release, entity: release.entity || entity })); + const validPageReleases = pageReleases.filter(release => release?.entryId); // filter out empty and unidentified releases + const pageReleasesWithEntity = validPageReleases.map(release => ({ ...release, entity: release.entity || entity })); + + if (pageReleases.length > validPageReleases.length) { + logger.warn(`Found ${pageReleases.length - validPageReleases.length} empty or unidentified releases on page ${page} for '${entity.name}'`); + } if (needNextPage(pageReleasesWithEntity, accReleases, isUpcoming)) { return scrapeReleasesPage(page + 1, accReleases.concat(pageReleasesWithEntity), isUpcoming); @@ -119,6 +124,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) { } const releases = await scrapeReleasesPage(argv.page || 1, []); + const hasDates = releases.every(release => !!release.date); const limitedReleases = (argv.last && releases.slice(0, Math.max(argv.last, 0))) @@ -133,7 +139,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) { } async function scrapeLatestReleases(scraper, entity, preData) { - if ((!argv.latest && !argv.last && !argv.after) || !scraper.fetchLatest) { + if ((!argv.latest && !argv.last) || !scraper.fetchLatest) { return emptyReleases; }