From a310f9bb1dcdb8fb9d3fb1ef84804c4a01f485d8 Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Thu, 12 Dec 2019 03:12:05 +0100 Subject: [PATCH] Added support for release photo fallbacks. Limited photo fetching concurrency. Modifier XEmpire scraper for AllBlackX support and improved photo scraping. Added movie property to Evil Angel scraper. --- public/img/logos/xempire/allblackx.png | Bin 0 -> 14327 bytes seeds/01_sites.js | 9 ++- seeds/03_tags.js | 4 + src/media.js | 30 +++++++- src/networks.js | 1 + src/releases.js | 18 +++-- src/scrapers/julesjordan.js | 15 ++-- src/scrapers/xempire.js | 99 +++++++++++++------------ src/sites.js | 7 +- 9 files changed, 113 insertions(+), 70 deletions(-) create mode 100644 public/img/logos/xempire/allblackx.png diff --git a/public/img/logos/xempire/allblackx.png b/public/img/logos/xempire/allblackx.png new file mode 100644 index 0000000000000000000000000000000000000000..4e542b420abbe1fd73280a7f8d7796998cffaa56 GIT binary patch literal 14327 zcmX9_2RzjO|391Tz1NYwx2$kRS&6gv-dpxK#D(}uR#s8Q$4bZwXA@Cz_BdJDBH8>u ze*YfT^|;5wU7y$cHJ-2M^Z9=9L|=>a9@9Mt1VZ{q8}<|e!6^X$9YTl?{_Q(!Knj6i z#k;GkKY66C&fyo}>*DU^41w@vL}e&x_dcO{ZeXO5ksQwmd*D=|PY2UrX}YITLE-V- zgjLt(f_fPUpa?r{jXqzt1V2SBT9QWUkX-@9Y#ksNDahDA;l^zCC>;gQG_E zM@UjQDKtJbd_r9QfI6l6y<6XApCcXXe`gsxz_@Qg3LCMaQKQ6Gm_vq{z_X8Y-PKN8 zeMWuada``BLl@QlOTVg3J*tECqkFZN%*;0))bPDB>!WhgeqyN3NGpe&Smx;0eDjYQ z>&AXuG*jokDPt-NafMpauk-F@%&FI2&sst;%_GR7Xzisq_TOFXZ*4o#?Cn0sxP&S= zrkeA%-Vn*+_tWzhyMzog_#(CQZ`uf~_95y9We)sBG-dZ`?x8Ljm5j(3zTjO1hzGl8|7r$a}sLF@-=lAdg_G#-Ve2g(0C<=z3I4 z!zuEv?tU1@&v~duh(Do2B1*yW zoPv-DSB<0W`J39c>LX$Pe5YB}}wu0Dlj> z)qqdqYhsli-!2kX;CsZPk8f>g&uGjDg&@iVZHRP)EW#9Vj#7cWpKkPE7;xw_w!8be zL~*y&5$V`ENW^B$=W*t&{t|*voHOD>+(TweI`pUGoGgi?9BHo@Nv~Myk)%Wj)&RCG z&S(Dai#qBvrNb^%4OC)zDcL@^n~})E(ozej`QlZMHg{TGU0n>7(01 zQE0K|6>A}O!Cu9g59RE9xZ2v~mIlhOsPEbWHYQmkdaUG?FDo&Kov&3L9s`Lc8>P%> zhF15wqu!+@$LPcKU#cnq*glZYLWna)Xu!zBt^)n?K_03X;kH@d)&m-hDkqyLKUa-m zb#7HIS}jdZ{#8l2?UMNQG7>5_F#ou&IfUguk@!b0-Z|kBr;8zbPFE_kHgMTzh98kv zKPNBQt5*YckuIlGIkCKm@$8p2makswN$;>{r!D~SHo-1 zQB%qSLeRXlw6xaX;9z!*J?qa+O*NU{Tw*w&RS^`Yk4;Udy>)J&lz%=ekCls2GA7wG zXa;3m^_>f2EHHJz16#PLDO$XYjSD;STWLL!y;%P0==aG4fKB7~=L@^Om)|~2zth4p zsMwwWE?$1gaD+J?|I)fIA)^3$Z|7UaK}c7*y%ie{oFbQ&mNxi0$E`$Uzh%}}q8L>v zf(CmqUheL*w6}w&3XM%o-mbl=_6*___e$}M^Vk!-#gPZYqs zLe3g{$C8gD$OQxho{n6obCZtA2W}W6haP>92=dNwVbB~sr&{UA*E&Y1lk^e{Qf+wu z)x-1qX=nHeNCt|CubNVeEn|&hyDGPvt3AWKZ8x!1iC`f}WF|!A?(kk{muj)5jYN24 z7X&?sqVfA!zNuW^*?AK`!%$Gyd9kgUfZ%=HUFPlWZClSdD3&T=r-Ctu>&QQ1@kFO)cVp89Cn+t@@b<6lOCW`YhU^hZeE4KZNl2+W_wUHg7 zsZ8uc;EN@r!B@^bK~P)v1if9So``J)3*W%0&NNOE&CQ6wBMHjNV>`t6bLgj+1crq#V85zqqc{TaT`6#-QR4UagPIy#$Jrcu)9o3Y`!Y~<1gqAv zy~L~hQ9x6@vlmH$X6`-rzZ^b}mQ+i7ij!@YKjx=76T%LnPFF@?ee>2o1_lO9V^c-L zpg*%!od4?U=!_7;uRD4DjhLC3Y*Mni&5eu#r9OmEn_sS`t0)g8a`cLQP4z|$8V9Qz zJvVgX(d_#ztmXI?s|BCI<6A16utR#_jX?5L=Re2>6@L4l;q`}w2b+34nZgOD#Ze!f zCGr$&Xbz+^w5{3&r?BN`^jjWPSTEC%++j zAeE8%l4A1b&re+d+n(y=4wwo*JEP^E#s}b*Oo&*sr zTz1R3%s7q+;u`mp7mw9^UoPs>M)}z8V3Xr8N6SyDmc4zf-jbcvf9YY zQLJKnAC+FOkbiAYYgY@*g?q2>dCG~UABuGOIvy8PFyyDWyScTf!r1g7p%SPrq}viJMk*qF*WN=~ow?_2vN6PRU!3KaO15~(+0oISxQmL!SCv30Nt zWpj*gf}w65&RneSjjPNdH3kcF|EhrOBAVWWB^a(FY#gaoM#tGWj*%$UlQrCrlbyc43R_F7C0VO-+XH$OMiSIY>*tOO98tZ~IS@stAcx|Vr)*<|V><{9zLS*) z@h+h6C@kpNobX~w6M@O;HFB)+=MTu4LcnN^7jC zd*0JzmrE2g(=o0b7X9R#+@ktKm4Um$_*AAHk&h6Bt&QXGb96zXm;*MToY!+RUkQR|h~)Tgk7?6W zngn_xd}S?-#4buO5_j=P>p(1cAJ2$i++1rVAwu)rNN&NufcG6ZVS3^|qjZ_%V?BOh2OXT|b#i-;R(R+MbhE(KqW zld>yceTob6%rq%xOaW2k{G$pZShOHC2RYP<%1+pSlA;J-5sM(!WT%tcq4Gh(3geA~ z3ki4pgE4Vb$mn}#pO6LSUXFo`j&qTe@_2HP(Ee3PAM8j^iqjtl>!eY|R>TEtrjFEa z!SRSg>Hd$qHf*lZ9j~>hd}7PO&Uf%+bR(eE2n3Xq)Ryt!fb(#mO0$IxSkh?qiYcZm z9BZs8pkz>)Y_NSt3)u_QBN#V+-{T7IIn!s`evz*}HL_xlLjKWZY$~QZr(-$Bwksv; zReGQ~q-45Y-g?^A%gWGCeE;sJbXY-PSOqsRAq(y0CyW!I%-erglp`xEt77EFd0$%2 z)59YTKe7KkX_|+(_u}FdBUn5=hun&sp{V)_J>y{SiI#dQp1z9O3|;Rsz;m5c-&P1) zpDH7qa*MxTauY|6mB4EK(OMa5IDT|SG2@Wz!#K{XoudJlTHOsxyvX$pMWxJPFQxxP zc`}EO5kJy)PF+B_r_s+>F)7kS?J$)g%v)~H@Zk+^JCQ-QO^}cW!NlkKxG+CryJiM> z7&)AkwmV!Jrz-3fnkdp5=97Yw%f=bw{IatdFG*mytVnXkA001cZIJ6KX{z7m<>7IW z4~!jh08ioNC#O0S>^AKx-KXN_VCl%p=*0NYTnV6xEB9}mr$aaHew zW&6RrT9{)YEcnGbtBgrP)yrSYzcyg1NHhJhFZ2je^$K2Jud$Txa%CuU6c<}xlsPHe8MT+ty&U<+5!1fz;w1t?ybUVFwGEmK$BC_=u6=tG6BCNaO1pdn+fsV%`sE(HL-j>Gt^TGgW8P3sf{LoCLlO)Edm(-cV zPM^n?CxVByl$4Y-g3m=i=+snM|7^9kwob=lm-nDqy&IFl&2mB!xs}sqV?ED8?1%|8 zVz5SW_5+Mhyy3xv$lp(555lQrjbOFu9uZU-F_dl7ZDlgW%#AFUYf~su#q@dE*X{MK zxnO}O!PZ>aV0+oaPWro^T?0|yKVMWDm3Oj7#5Z(wL>bwM(nr;ml>8dwAPh%bvchj$ zz+Hg*F6gVKWFxiWCrl&VXw5(I(4RyL>Y>5|c!n>!58+yxmp{KClN4+;#2)zKMIV@r z&OMyPi!w%MB%JF<;fG%m`OVphfsvBfjR(t%7wCC?+<%zkua5=wjL4+)kxjaM8JY);d60 z##d%%d)vY%HSrE|S9lEgv(mXVnDcIVQ{CU@(_eTR*t)=)a=SP?d(L6+Btw7F?A8QU zTWVzAoB=ECvOAMZcDE|3jqK~*hKkZFt5&p6>Lu=$pru0f%i@Jq)8ru|ai;MN%7JCS z;dkplDdEASYV?1LD}XU6KUQp$Dn zj$Wn2bubtF4-q`gMJ}|SEraXNgD-qBXGcs4VE^NN;Am1y>1)+GaZ`tk+~~$6{3IZM zfPHERqlZj5%yfzbTHTNVOqV-PfHnbp<&8b1OC9w947{UV|2(at$oAs>3_+uyVR;Smw(4JY8LcfSv9J#wns zH1ITKfoY*|bg^^o7C9t{B2IHc10HZ9_*QSt@lm8$QH#Im;fvlQK}4a$5_#3pb^m76 zq$Oj5PvZy_oF*Jl<$%DV`MJ3V;ls8<7LC8L?Szvg0)g<0_m46Q7$1eEC5R-wdtVsuZT~=x*grSBJ&&RLyb4 zZqPxQz@@Jqv0g_s>M!Jy!;pTS6?T0VM0&$7##!7m01*%r6vf(5pfkd=`g9g!VPT<( z<*JY!<}8pEusyYp#^~~75@3GE1wu2OTmsgY4y~z z%*K)5Dvam)F--Z$%};Hs1iepOy>B*913mIjozL@(4@{90xX-@3jgN5~AK2iBzG_-I zxUHy$7Jm@~(;3Jo)O$iH(UZ?Bg%A)7@vy;xc17$-aKW4iR$6&B6DnaBfG0v-^^ea}F`7J^EEiP&SM5Xjc69NQ!8TG%{?ecN zt@8)iR*Ha&P}v=Mq00$Q^~BKm(ZkmHRga(HO5l3xf4|sY#^1SN)?z0-(LU57w)*+} zUo7%B*)Q+U&ATmV^cA$f>QcdPX-f~)UV_iiD@2(2nlB62b;=THN65$+$T&6}1cii* z4@BLV<#<=WtF0Zg{O_SCzU$q}h!NB3U*?3im5{Fm$mb09Qy+`?fPGYiHwk`}5@g!_ zb*aa|!IMi_7xhvh2Ro$pH%{2}{w1r@GzA8h9?r7bautG5F%J~Zua9OqR9|R&Dl$C& zZDv>He4(IxRav&2n9bM2<0yP{KBe2-(9j>61{JE@{`%~ZyN^#aT}TGKR`ci2`ySTg zVkAk#|B=Qf5ph|!{Tlp?5zFp@!=}q&??qJ+U;AsOn!4qEotY0by#_A05X^rz5usI# z`8d2;kCjBBl8z>O&rw^QgQ*GSWNTjw|8;;w5%1-hc}{sjcIJc_3B99h7NF1bC8@Jq z-LNo0VLP$DWp5ALYpdc+;o_{-mO@kiS+@<)=AOq;@a(ir@nYd7fzlx~dX9?TDSN(5i#Fe!jomMF6s{cG#p;s)q z@o>f%K2C6hZ$(zUZng#&B*Hk5DD!~h(Z^Pq8){2Yw5wpQhOuujgPZn7-3+?cv<^g% zF4MLUXopM5@aEQ=%Yn@_dB|M6a)0^Nu#+c0?xoxc%fVxP+XZ)e$?(6wd1J|0lvcxi zzx)}%n%X1j{e9qI44*bdD;b|i!S|k^%R$1B%5CzL{UrwxS++CVoo9)ZmN&IP=e`u7&vD2} z*l%oo>WuNd6J{Mhj{TlL%Rk1)mvwR*CZ5y(_y@FgAQcauIq3LXUUR%sPevA#?ZX{BL`_|2j?kH08^QESF3S&S$}E(bFH_2oXyVqh-tHP4j3&~68OTs{1>lw~)$OS1BhI};NV6?A<5wh4s( z{{HT=jmb@8Z3*NkNe|n{ctj}W9X3YudTT})6DbYb5XZ2!NTcWVEIbc{F8^VT)zndy z;Rie)`Q|WPX*!CLx_C_0B7M+rCkGBdCGyc^HEZJ&NlYr=WPe_Fo;Ej+oH6EM$RkDx zr=gBm!P}GVqN1YJ>UQv>Rf`Dz(PWJ1(GCP`Of|)-v(VlGNSc7<@9X@oH9*_fsFvE* z>3?ZWR;BD-IHOY1Eg!%MqlRhXo9WWi)61B79pK=HulfY9MdHhY_O>?sT{=rs(^JaB z1i|NTEWxNyt{S~1I}1P%N0(kz_FMlvSGaf8ITBIei9?_uFn{9ybAHO}W%Q9XT4GRy*}J=6H^wtfua*fS*j-L37S#*CAN7M|=0f!^6Echs09(QV#~P%PY1CjgsnLh56Hb zEHC}Fj9(G7emhjrHLAB&nO*YaxE0}zE!G~qom zd{jQs8{5l*4zUjoK8e?u2@a(WoqA`exYU*hhQ>~6g?}?2cw>ULFQamlPbIMWK#(U0 z?Vni--ZE(yuuoW$V=S-+k!=MG*HTm3fQ9RZ^wY&fCugLDiA^)KngdbR{| za4LdN&B4rpF9u)y-Lm~u3v#?LGg>$7n=-&rk98n`MBkJ~ko&d{LH?@*2hN&2AG!Ig z$DQVJCcx(8;Wy}#@BJ(bdDWexNO>Lv(u6OpI^py<_S~ftBBeq(3LlJ(jm-{0HnL=( z4J8bOHv_fy+hRw@D>;Z7uss+gUvdJ<(){fV{{Cd(%T@!Pc@ttIfnX*zOz0M!s zwrg-(_8nRI(3xWK(zGh(2sM^@(jZO{#T7uVK=Qa}UB|N`kkjHff|rAA=gY5C7^_CN zAGmLO46WIR%cb~vPUR-|HbA)RSY37Z(VE!lRomTV$=xXcPi!Vja!be1x)N?n>Zpb8 z0rxcTe=Zyh{lCO8kw)tN;Q8f4K-#;m3}^s9mnOrkWkSSoxxG+8Hsd2kHPIfH2ak;~ zK7knZzg@42tunS@$4RFPTiXB1Tv$7!1(y|hYF-8E0K^->VnrVz@8IB*S-*`W}JgXI903?a}#|xgphK*v8|uKGgwu4P|1zGBQ$+9 zen1e;I;9}u(}bQ%ehF=mKjbn}w+BTGdtiP;@BTvP9K>L)3QK*BchC+B)bFr$%yQJIk}DXzyg>Y@Oyg=^7$tvfQ`8vXnH9 zl;$^x3{u;AY}iR6;`Q_HykVz*oFord`j)DztAku9R(eT47Tx4xzy{#2UdjLXGm*#3U8K@{#zXM zphMKpBLR2~aTzaCiqPIA)z9gUi{hNPhZ*+?&PFP+faUs37kSxUKqrN^^@(EYfvAT06+`;xxc;}2-W zll^cio-FxavB`ccvzl6qHPUdRJG37E6oPNDNBg&MY;24lHAQLdYBdM4?&e12aqoX}mSA z`T6;_!8HlS0w?^3x$2Z?r~@y4qCLsvp%n*|9%~-UAcCSb2k`NHBRT=E-qyAgO@i(p z{^hGB9kYPfq_%X+&shY`GUdCb3oIl!P*e z3n&!g13~Sf3P($>dfCcX$@0W!a6SKS0zGFcs>8X&B9EX5iF5zr#hBEtpqUaI0rDf8 zh;uTHYGM{Q{BlF=PF|>rm1e#zq#$*^@u5xw-Jub@>C4dwXP#~}{fq(+y5oQI*wNYV zSx87RgqwA0d|HGb4u|XGP#}j8H8nLm=FL(hrKtM%)N*v8OZ64%y7SH{Bc$JLlZSZO zitlgxG=a~>S|gvs3LewOy$w)kWe=^LxIxiGNZ;y3d#fmE!fkwilxR#u5(^K(`_?N9 zfzBF0`mS?D@;R*ZJO_F8^MsHOR!`mmO(>zAqC|Tdx1i{Ez$#&USfh95ZbLfFTkWPm#?3TL>?(&h@zEn zg$zwb`AD^Onj}azr$S$5h*oTSVZ5}U0)&f1zOvpqwA3s)%elAw^d>lI*c&u<6q|fc zncCK?SD!(mef1Rn>C$KDT*%#=0^7q0`DvX2^(NS|RonW|ql^v_?ed8v%JCguBFfhA zf4j4eJRm{3s$qvkcYSsh*Y{f?qVe-*AA1iml2Yx_K5vnpLcM>zSsoL`6P`Oc_F&}Vi#~t;42P`&3@SbUiX)F670MDf$EpU! zNjGEpzp6xur8;mx>#)b=XzZDt*86W_HS%{=lB%|u@FH)`2Tv4+Yz(VgsoWSaEQgNx z(HF{3(%@)Gf=itdmxhVhZw@yM^i7{XhZ8BvaOBK^l_C8x^CcyY1NMz*1aqQcr{cZc zUlqHedEp~IH{urtw0@d9hi>D!D=L1=1WcWc zG7tvThj!b~=2ro^$s-Q?mH270dL+4OaolanD^|44VeqH`>Xi;tdX;I-UGrP)0>iiU zFOZRhk;Ki@g4tzEXRW{#$!7!yVcb^`Y>@8P(cb95MNbd4lfW20=<@jC8*c>HfbEiL zb3QiU){OzPGUb#tyW`#yM*$JyG~C2ICj8N8;K7rSHI;BNe2>*yOk~kuogB@de5BHm zEA*A1rYiJ^;wJv752=;%41OF4tsh*4hRK+M=zdlCr!?@O;lqc1_xFp@%mpB;A^Pi} z!ODXNSM@(JX^^wSrIQnhfgAaUjVhGI#ZBut61b5>re2~{c|p!-G*}V8IUm|3cr5b< zswjOj><&C|bI5E2N3H;rzAc*zIvx}py!ZrUqVcSDSAILLJB&0FJ>o?B zJ`_hI_1GEM%Lp0}l z^?c9+B}%!T_zF-sWV-2)r@~bg$r(Xkx+$a5UB10pcdGrJ(|tb7Tg-xA?Hs7@PY0>B z0qhy#@96~ESCbs;3b5fQtw*x_4zg+nRgFP80XY1W;|ulg*P%FlI+OeG#IY&5@zEQOu*z;iAIVG zWTz!NQ=41F&C4s&*F2`GqM`yPTxpb%UKi9tt4dTXZB=rIolLk=16*my|EI#fOBIiw zFdJ5MRNYP<+yL6Yu7j#=Tt?AO!l;lGtITiuh|8f(HBl5q!WlpfILBfs?t z1WGED=Vb(jq9n{RyW8v6^&l^-fzc#CVXy#skKlTvlKU4wLemF(R_XIgUBA6)YP-t_ zERjh7vB;AATRdL!U#{ybX0B+C(Ql{x(@N~M^ne-6T^xPLqV(67*EZw%w4bs<|geJ z1|af;&8ks0v>7%=fr^G`*4RBgYK_hJkz+$QOppVd)3A!=8{mNqk9UwDrai6yDu|*P z9T+>UIHE{%W+Ts62d3R69`3yz@@6Sn*;@~1_B-L1!0q(UF7&5()WzSX4yJ;MGCfOM z7#YeB(TZkD6n|nRI(g$FkS6`va5wP;QAc*zJ{<0|BhAk1L>j!`u(2g)w&ahq`t!YiiVfX)qcRkys;(0}z0@e+y2 zie$X!+RE^I#V%US<|$7(=WM0)20uq}e6Yh~v;+3uvwKZ)f}zTHPrHE?l$eZNd~8#7a$tzTkYq>0m6F z5vkM>5EKv)YOb_rxR&Or?TZr29n_nzceb^2G>Vr@8(Dt0e}FAQynv}pBY@;+1(D3N zYAeuX_8R<`CFe(UNH8v;MZFZ0%8Ev#T_F)+6ep*k%Pa5t`sYvvmoKE~)IJbfOdsb;Wa8vSQuDj}xr>A4d+$ZF>ylV3>xXTUUTWIg(M`=Xp7+8-6f zOa%hju~5Z?LpFU@Hnm2aK;51Cb;3d7vKXwS{)gn$LJGJd$;{zherPF+@=XEsb z6Jgir$ON-PE4EIa1vzgdPEdryQs}6v@KIyw%)W+3au4LZs=SbW>hA5WMB5V0VXI&5^e{-t{aaO4)g>Pf z>t7JF`YwW}8UHdq(SD8I-Lqob9k`%5>|%SVM@ao4#?6;Ad(TDAuf)1`>;7@=_xLdj)vtQ_-$e+$bt*JFru`ZJ!5 z5+GI}tp20<1A$xH!Y&Hw4PZBw zKn{q;C*g#p(?wN)d(R|iO0r^zH=Y#rQee&BGp~+>X|hz><_-em6RiZ^@ufcO>8fqX z^N7(5PVl_(3sxSTZpunY(R_Ods=(pAh5F-%uLYb*;e}WQ3(KM+rwI*hZ5L1MSd+4B zDuC0z4}mh$(jD=a|NUHz=g_ue(BwZ;ILz2A4vFo;HlW(r%-6_$A2CnGl%I|q@~x5Y z%YKnIQk-wFM@hdQU=@w$_Y`aS08%89;iE~3FB)zQ7d1jh-5Z{-1a_*no8c$oJuIEe zhhARTqjZ!b>Ml_KP&Ox3`mqPA7F52Rra& z%C~@v2|>sl0Z5{j`)}t#f6diATLl|X$DL%$nv;9)*9*W0mK!s1-=CQ0L;+Kr=1d~j z;-DNm4(6g>PVxyNkjS8R*$^E84}Jqo0oy)6+XtYw9H^??Mt-qxEd zwKe=i2XFF^JO?XfRt+%=kuK+#s>=w>hpYD6)Z5e1Q02O5LBazZA5$n-;G<5*knzjK ze>Y}OuIo$S0{v7a29!Uv?Eqf?obsoYo7S|-0c%4$$JajzxEf}6U&oQ&Q5`Jr(z^uP zz^Xb(w;Zen?YoqrvmZ#{N_V~o&`Nc6B>!RfhYQU#CTQ3j`YPp}&Hg`*((m7XxuYbf z5(q|vp#jB}_759>-P%0EZZQPeto3+asu!Lt^9m_pDlxY6jG(Q}9PWMeL4nd86ciLd z+VPj%`!--nCG2Qz++j}K+Q7h4mLSh?7!N{vsI4E=dj$=MS#P0A{Wl8vWP)d#~@9s@^|j?*TSy+-^Vymnm@t zk%O4fCH-?hMh(+jC^N?I9_d9#e?!Ma}>5u!4#E%3&_4VBHqR6Yw>t1V8wl0J0 z+ZgWBU!jJ(o#vuN=Mk7nH!TM2O)aA1K0ZD->O@Pccr9I;AI}&~54N`Qeu&0{n}>5C zNZk4N|5A2;2ZTg~B38vodi@4nQ7h1+5F?FU{|>_|o0mcKpx8J& z>3_5pbWx$qTmTLDw3_qrEojL@w1fVdlRt z7I3Y0KDk1D-#Vc4X^c|)tV_PNC6y&!etY@~wy+`*0*x0#o~UBh6vm8YTt1f$a4SB(3Z-a1 z7O^WqS~*xR>DZdXep~EZ{^u5qaRnZ{9~v-KgSp_SD=LDvu&_Hz>eH@4!%|Iaf~)@0 zp<~ch_sr&9H~5g&1{TF3pyVo;GiC;duX91Vm%tU8%lo$^Nh#qK`NwAD7k7Y<+&DNG zLpbQ*BP`i;F1Ie!)(pBRh<9?veULSnCv4UI`-G&fToIugMS@O1@U5TI4h%g(^y zq>CSvXV{*Hn1K=;Z3a&|R1k!xF5`KUU93HFW5J%V= z|IYsX6#+~))liKoT0O<^3Fujb90p6=g!%gVzTdZ&m61`7z4VN|)r5yc?KMr0nU!^$ zgC-=gkF`c4l_a3Ubv4g{HWs8pR5i%$C%bAuYY+IfeZg+GIT~vQ&N9BYl%b3Dx!iu$ zr57PDhTKu6h^}n^`5T&CTo!)lfw`%ud5j^31{1Yhg2*=xzJ@Y>>s4g5?4GPNT7Eef zVT{J-*fd=G3(_M$Vi*g+=~nnJ6I}h1MrydgYYpvwF&Qy;48xw+3>f|NNJt~LW%5uf z^r*7({nZmRDdL(nCC{jAyL~#Iq&&YLdc^lp(%vtpRw6wQ9DV=6TLFOATqr-Ck)2ER z7}ZL{)(9!j*(tYVPDt*7p!FIKD-tzYR7$%<*3ri1FpxlxJe1Qf=n{PX`nOkIcl^AMSqu+9G1QCKneA=`u)_(0~3J3(#(%DnNI?Z`HQ$I5Vv&xM`p{ z7FdIo+@lsivyyh}zfLQWO~>+}3Gmc-K_(YJAmP;62EvBpLr@DDgMMjlDneHCLobtu z@SsO#$_X9f+E38lgp2&v0iLbf2C#5KC2T9nvE#rFPUt3fK2_UXC(Fw8p}e2;NA+l- z%jYM%L#O$oga$2-vsG7t+k>9 z9iBCShZ-yrf9D}g86t9Dk#R2X@cjw5ozsipP7amCWt@fk(Bl5Lg;x5Z<&${q12q%K zAtNB1d;YqwGiej|j*5MO1NYIiZoKq5zb5&zoTY(@UcN|F@aCT@%u7dwSH;?v562V(d?*F?ZWzX;yfTIKwT8XQ2&O+k8?{yKJ oB^-vM2BAOQEhI@CHLlp5C>|S><-GZEcVX<2hCZxT%`Wc$0B6I05&!@I literal 0 HcmV?d00001 diff --git a/seeds/01_sites.js b/seeds/01_sites.js index 705df1a4..3274be06 100644 --- a/seeds/01_sites.js +++ b/seeds/01_sites.js @@ -582,7 +582,7 @@ function getSites(networksMap) { }, { slug: 'pornstarslikeitbig', - name: 'Pornstars Like it Big', + name: 'Pornstars Like It Big', url: 'https://www.brazzers.com/sites/view/id/24/pornstars-like-it-big', description: "A real big dick, that's what everyone wants. Porn-stars are no exception, all the biggest stars agree; BIG COCK is for them. Check out how it stretches their tiny pussies and cums on their round tits. We've got the best chicks jocking the biggest dicks.", network_id: networksMap.brazzers, @@ -2397,6 +2397,13 @@ function getSites(networksMap) { url: 'https://www.darkx.com', network_id: networksMap.xempire, }, + { + slug: 'allblackx', + name: 'AllBlackX', + description: 'AllBlackX.com features the hottest ebony pornstar beauties in hardcore black on black gonzo porn. From director Mason, watch 4k ultra HD videos inside', + url: 'https://www.allblackx.com', + network_id: networksMap.xempire, + }, { slug: 'lesbianx', name: 'LesbianX', diff --git a/seeds/03_tags.js b/seeds/03_tags.js index 67fc8fe9..ffff0f83 100644 --- a/seeds/03_tags.js +++ b/seeds/03_tags.js @@ -1166,6 +1166,10 @@ function getTagAliases(tagsMap) { name: 'dp', alias_for: tagsMap['double-penetration'], }, + { + name: 'first dp', + alias_for: tagsMap['double-penetration'], + }, { name: 'double penetration (dp)', alias_for: tagsMap['double-penetration'], diff --git a/src/media.js b/src/media.js index 010e1a27..f555711a 100644 --- a/src/media.js +++ b/src/media.js @@ -10,7 +10,6 @@ const sharp = require('sharp'); const blake2 = require('blake2'); const knex = require('./knex'); -const pluckPhotos = require('./utils/pluck-photos'); function getHash(buffer) { const hash = blake2.createHash('blake2b', { digestLength: 24 }); @@ -20,6 +19,21 @@ function getHash(buffer) { return hash.digest('hex'); } +function pluckPhotos(photos, release, specifiedLimit) { + const limit = specifiedLimit || config.media.limit; + + if (photos.length <= limit) { + return photos; + } + + const plucked = [1] + .concat( + Array.from({ length: limit }, (value, index) => Math.round((index + 1) * (photos.length / (limit)))), + ); + + return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close +} + async function getThumbnail(buffer) { return sharp(buffer) .resize({ @@ -94,7 +108,12 @@ async function filterHashDuplicates(files, domains = ['releases'], roles = ['pho return files.filter(file => file && !photoHashes.has(file.hash)); } -async function fetchPhoto(photoUrl, index, identifier) { +async function fetchPhoto(photoUrl, index, identifier, attempt = 1) { + if (Array.isArray(photoUrl)) { + return fetchPhoto(photoUrl[0], index, identifier); + // return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => fetchPhoto(url, index, identifier)), Promise.reject()); + } + try { const { pathname } = new URL(photoUrl); const mimetype = mime.getType(pathname); @@ -116,7 +135,12 @@ async function fetchPhoto(photoUrl, index, identifier) { throw new Error(`Response ${res.statusCode} not OK`); } catch (error) { - console.warn(`Failed to store photo ${index + 1} (${photoUrl}) for ${identifier}: ${error}`); + console.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} (${photoUrl}) for ${identifier}: ${error}`); + + if (attempt < 3) { + await Promise.delay(1000); + return fetchPhoto(photoUrl, index, identifier, attempt + 1); + } return null; } diff --git a/src/networks.js b/src/networks.js index 37ce81f2..ab1da0d5 100644 --- a/src/networks.js +++ b/src/networks.js @@ -39,6 +39,7 @@ async function findNetworkByUrl(url) { const network = await knex('networks') .where('networks.url', 'like', `%${domain}`) + .orWhere('networks.url', url) .first(); if (network) { diff --git a/src/releases.js b/src/releases.js index 17f2a120..25d108ed 100644 --- a/src/releases.js +++ b/src/releases.js @@ -248,7 +248,6 @@ async function storeReleaseAssets(release, releaseId) { try { await Promise.all([ - associateTags(release, releaseId), storePhotos(release, releaseId), storePoster(release, releaseId), storeTrailer(release, releaseId), @@ -275,17 +274,22 @@ async function storeRelease(release) { }) .returning('*'); - // await storeReleaseAssets(release, existingRelease.id); - console.log(`Updated release "${release.title}" (${existingRelease.id}, ${release.site.name})`); + if (updatedRelease) { + await associateTags(release, updatedRelease.id); + console.log(`Updated release "${release.title}" (${existingRelease.id}, ${release.site.name})`); + } - return updatedRelease ? updatedRelease.id : existingRelease.id; + await associateTags(release, existingRelease.id); + + return existingRelease.id; } const [releaseEntry] = await knex('releases') .insert(curatedRelease) .returning('*'); - // await storeReleaseAssets(release, releaseEntry.id); + await associateTags(release, releaseEntry.id); + console.log(`Stored release "${release.title}" (${releaseEntry.id}, ${release.site.name})`); return releaseEntry.id; @@ -334,7 +338,9 @@ async function storeReleases(releases) { await Promise.all([ associateActors(actors, storedReleases), - Promise.all(storedReleases.map(async release => storeReleaseAssets(release, release.id))), + Promise.map(storedReleases, async release => storeReleaseAssets(release, release.id), { + concurrency: 10, + }), ]); return storedReleases; diff --git a/src/scrapers/julesjordan.js b/src/scrapers/julesjordan.js index 10147447..a7fd5a85 100644 --- a/src/scrapers/julesjordan.js +++ b/src/scrapers/julesjordan.js @@ -7,7 +7,6 @@ const { JSDOM } = require('jsdom'); const moment = require('moment'); const { heightToCm } = require('../utils/convert'); -const { matchTags } = require('../tags'); async function fetchPhotos(url) { const res = await bhttp.get(url); @@ -22,13 +21,8 @@ function scrapePhotos(html) { .map((photoIndex, photoElement) => { const src = $(photoElement).attr('src'); - if (src.match(/dl\d+/)) { - // thumbnail URLs containing dl02/ or dl03/ don't appear to have - // a full photo available, fall back to thumbnail - return src; - } - - return src.replace('thumbs/', 'photos/'); + // high res often available in photos/ directory, but not always, provide original as fallback + return [src.replace('thumbs/', 'photos/'), src]; }) .toArray(); @@ -172,8 +166,8 @@ async function scrapeScene(html, url, site) { const photos = await getPhotos(entryId, site); - const rawTags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); - const tags = await matchTags(rawTags); + const tags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); + const movie = $('.update_dvds a').href; return { url, @@ -184,6 +178,7 @@ async function scrapeScene(html, url, site) { description, poster, photos, + movie, trailer: { src: trailer, quality: 720, diff --git a/src/scrapers/xempire.js b/src/scrapers/xempire.js index 61f663b6..34d46cf7 100644 --- a/src/scrapers/xempire.js +++ b/src/scrapers/xempire.js @@ -6,14 +6,12 @@ const cheerio = require('cheerio'); const { JSDOM } = require('jsdom'); const moment = require('moment'); -const { fetchSites } = require('../sites'); -const { matchTags } = require('../tags'); - const defaultTags = { hardx: [], darkx: ['interracial'], eroticax: [], lesbianx: ['lesbian'], + allblackx: ['ebony', 'bbc'], }; async function fetchPhotos(url) { @@ -25,37 +23,56 @@ async function fetchPhotos(url) { function scrapePhotos(html) { const $ = cheerio.load(html, { normalizeWhitespace: true }); - const unlockedPhotos = $('.preview .imgLink.pgUnlocked') - .map((photoIndex, photoElement) => $(photoElement).attr('href')).toArray(); + return $('.preview .imgLink').toArray().map((linkEl) => { + const url = $(linkEl).attr('href'); - const lockedThumbnails = $('.preview .imgLink.lockedPicture img') - .map((photoIndex, photoElement) => $(photoElement) - .attr('src')) - // .replace('_tb.jpg', '.jpg')) does not always work - .toArray(); + if (url.match('/join')) { + // URL links to join page instead of full photo, extract thumbnail + const src = $(linkEl).find('img').attr('src'); - return unlockedPhotos.concat(lockedThumbnails); + if (src.match('previews/')) { + // resource often serves full photo at a modifier URL anyway, add as primary source + const highRes = src + .replace('previews/', '') + .replace('_tb.jpg', '.jpg'); + + // keep original thumbnail as fallback in case full photo is not available + return [highRes, src]; + } + + return src; + } + + // URL links to full photo + return url; + }); } async function getPhotos(albumPath, siteDomain) { const albumUrl = `https://${siteDomain}${albumPath}`; - const html = await fetchPhotos(albumUrl); - const $ = cheerio.load(html, { normalizeWhitespace: true }); - const photos = scrapePhotos(html); + try { + const html = await fetchPhotos(albumUrl); + const $ = cheerio.load(html, { normalizeWhitespace: true }); + const photos = scrapePhotos(html); - const pages = $('.paginatorPages a').map((pageIndex, pageElement) => $(pageElement).attr('href')).toArray(); + const pages = $('.paginatorPages a').map((pageIndex, pageElement) => $(pageElement).attr('href')).toArray(); - const otherPhotos = await Promise.map(pages, async (page) => { - const pageUrl = `https://${siteDomain}${page}`; - const pageHtml = await fetchPhotos(pageUrl); + const otherPhotos = await Promise.map(pages, async (page) => { + const pageUrl = `https://${siteDomain}${page}`; + const pageHtml = await fetchPhotos(pageUrl); - return scrapePhotos(pageHtml); - }, { - concurrency: 2, - }); + return scrapePhotos(pageHtml); + }, { + concurrency: 2, + }); - return photos.concat(otherPhotos.flat()); + return photos.concat(otherPhotos.flat()); + } catch (error) { + console.error(`Failed to fetch XEmpire photos from ${albumPath}: ${error.message}`); + + return []; + } } function scrape(html, site) { @@ -109,32 +126,26 @@ function scrape(html, site) { async function scrapeScene(html, url, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); const json = $('script[type="application/ld+json"]').html(); + const json2 = $('script:contains("dataLayer = ")').html(); const videoJson = $('script:contains("window.ScenePlayerOptions")').html(); const data = JSON.parse(json)[0]; + const data2 = JSON.parse(json2.slice(json2.indexOf('[{'), -1))[0]; const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{"id":'), videoJson.indexOf('};') + 1)); - const entryId = new URL(url).pathname.split('/').slice(-1)[0]; + const entryId = data2.sceneDetails.sceneId || new URL(url).pathname.split('/').slice(-1)[0]; - const title = $('meta[name="twitter:title"]').attr('content'); - const description = data.description || $('meta[name="twitter:description"]').attr('content'); + const title = data2.sceneDetails.sceneTitle || $('meta[name="twitter:title"]').attr('content'); + const description = data2.sceneDetails.sceneDescription || data.description || $('meta[name="twitter:description"]').attr('content'); // date in data object is not the release date of the scene, but the date the entry was added const date = moment.utc($('.updatedDate').first().text(), 'MM-DD-YYYY').toDate(); - const actors = data.actor - .sort(({ gender: genderA }, { gender: genderB }) => { - if (genderA === 'female' && genderB === 'male') return -1; - if (genderA === 'male' && genderB === 'female') return 1; - - return 0; - }) - .map(actor => actor.name); - + const actors = (data2.sceneDetails.sceneActors || data.actor).map(actor => actor.actorName || actor.name); const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5; const duration = moment.duration(data.duration.slice(2).split(':')).asSeconds(); - const siteDomain = $('meta[name="twitter:domain"]').attr('content'); + const siteDomain = $('meta[name="twitter:domain"]').attr('content') || 'allblackx.com'; // only AllBlackX has no twitter domain, no other useful hints available const siteSlug = siteDomain && siteDomain.split('.')[0].toLowerCase(); const siteUrl = siteDomain && `https://www.${siteDomain}`; @@ -144,19 +155,10 @@ async function scrapeScene(html, url, site) { const photos = await getPhotos($('.picturesItem a').attr('href'), siteDomain, site); const rawTags = data.keywords.split(', '); - - const [[channelSite], tags] = await Promise.all([ - site.isFallback - ? fetchSites({ - url: siteUrl, - slug: siteSlug, - }) - : [site], - matchTags([...defaultTags[siteSlug], ...rawTags]), - ]); + const tags = [...defaultTags[siteSlug], ...rawTags]; return { - url: channelSite ? `${channelSite.url}/en/video/${new URL(url).pathname.split('/').slice(-2).join('/')}` : url, + url: `${siteUrl}/en/video/${new URL(url).pathname.split('/').slice(-2).join('/')}`, entryId, title, date, @@ -174,7 +176,8 @@ async function scrapeScene(html, url, site) { rating: { stars, }, - site: channelSite || site, + site, + channel: siteSlug, }; } diff --git a/src/sites.js b/src/sites.js index 5894bf33..d64be7d1 100644 --- a/src/sites.js +++ b/src/sites.js @@ -62,11 +62,14 @@ async function findSiteByUrl(url) { 'sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters', ) - .where('sites.url', 'like', `%${domain}%`) + .where('sites.url', 'like', `%${domain}`) + .orWhere('sites.url', url) .first(); if (site) { - return curateSite(site, true); + const curatedSite = curateSite(site, true); + + return curatedSite; } return null;