From 14077005117d5e776186c9cf3f7d538166cbeb6c Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Thu, 5 Mar 2020 17:07:07 +0100 Subject: [PATCH] Added Interracial Pass sites. Fixed Hush removing poster from base release. --- public/img/logos/hushpass/favicon.png | Bin 0 -> 1474 bytes public/img/logos/interracialpass/favicon.png | Bin 0 -> 931 bytes seeds/02_sites.js | 86 ++++++++++++++++++- src/scrape-sites.js | 22 ++--- src/scrapers/assylum.js | 6 +- src/scrapers/hush.js | 32 ++++--- 6 files changed, 120 insertions(+), 26 deletions(-) create mode 100644 public/img/logos/hushpass/favicon.png create mode 100644 public/img/logos/interracialpass/favicon.png diff --git a/public/img/logos/hushpass/favicon.png b/public/img/logos/hushpass/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..bb2ebfd5bbf1de7b722ac288588e77b9f8d545cf GIT binary patch literal 1474 zcmV;z1wHzSP)EX>4Tx04R}tkv&MmKpe$iQ%hB<4(%Y~kfAzR5EXIMDionYs1;guFuC*#nlvOS zE{=k0!NHHks)LKOt`4q(Aou~|DYS_3;J6>}?mh0_0YbgZG^=YI&~)2O zCE{WxyDA1>;YR@d2%|@4mN6$uNqCO0d-(Wz7vovp=l&dhYR+PSPb8jYhG`RT5KnK~ z2Iqa^2rJ4e@j3CBNf#u3?RNSu0mr>z@3Dp`5<5%ypW>NMI35kRU=q6(y8mBSx!EiiH&I$36T*O}|7gg=bb;{@eSad^gZEa<4bO1wgWnpw> zWFU8GbZ8()Nlj2!fese{00WvyL_t(o!|hg0Xk1koJ>PfloA+jhdDEHLOkzZ#P^m_t z;L5b3MG-63jo`+GE?l^(OR3;i$V%O~a4o2~ECeDd{-gz~C00{Hf1*)oW12WWnPleu z+1@^Iz|2t@6VxlA>SpXgY0U*vf!_2i(sTBFX z&kG9+LqxPW4*0Iq`;7Z*RY*1n*WB9Zm|nOPc*#=A>POJ}Op>hRdu*ySWi za+|E3bK-ekV{&rxP+{(!@Z^bWS9rAGY>r$;X+U>Rv07OJl6a~#@)8D!#o6V-hV$riqq++8vkv^9#P%)5~)?kqP^`lXG z|Gux^3&y#xMnsyKY5RAj6cdpTA^~U@q5~=s+>={)YyP+4ySfsv)}mA@mH*9tDyn)y zhIeZ|ZIb-SN3T4_dv-4o0bn*w25iR_2otJyaI)?Wd?~t184;}6_OU$^*pNl{ns?c3 zbbfZ^)%nHxlYhDVg${2DFc1t6cO@VqFvg5%tq&Cnh0!!k^{yF;Bc@cfGsh5Q2d;M5 zM1YKH1wA*m);?9KROUnktyXK#L1R`DI9bg?(ABFtP1ABOBU)?!OU9vXlnS9A^5WJr z9mN@_wNzDGYx8#^V!0Lh&mWyRN1iV0B+hhI$y=G>AP9cHBN-2YCw8b;ahFkc`R33U z8a+}(DP^_RH4*WWBq`ojb_%G)#j~FG^Gwe+%w#g3PESwYJ3Bi&7KUMb+luE1%P4&| zJ$vva1U>qgh(JVfrBZ3v>-AM4lD*Vw14tZ%b-wKrU#&^u@{M2epByY-JDV>a#se?! z5F~xueErvHg?$D(2^t@A{^IfDo}2nXg0ifsvVHdGC3h}<1CP8o4&cIi8}=#C6V5pS zU`=jFNU9DGV zX=990N@44PnHfqc-i&2tCgy)~HO5$D%o>0K7_|K;#2}_31C5;t5YY!j^bLS;LlPpQ zhEnRMjjOJvX?j{ImEBka01m*i=XnX4wDxCm;7u|GZA6C<=MhmtV%Lhp_t$}M_aDCd c*vF3e8y?V=bO~qWz5oCK07*qoM6N<$f}ua4>i_@% literal 0 HcmV?d00001 diff --git a/public/img/logos/interracialpass/favicon.png b/public/img/logos/interracialpass/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..44cdb825d12fe3467dde49ceabe9547ae8df9f11 GIT binary patch literal 931 zcmV;U16=%xP)EX>4Tx04R}tkv&MmKpe$iQ%hB<4(%Y~kfAzR5EXIMDionYs1;guFuC*#nlvOS zE{=k0!NHHks)LKOt`4q(Aou~|DYS_3;J6>}?mh0_0YbgZG^=YI&~)2O zCE{WxyDA1>;YR@d2%|@4mN6$uNqCO0d-(Wz7vovp=l&dhYR+PSPb8jYhG`RT5KnK~ z2Iqa^2rJ4e@j3CBNf#u3?RNSu0mr>z@3Dp`5<5%ypW>NMI35kRU=q6(y8mBSx!EiiH&I$36T*O}|7gg=bb;{@eSad^gZEa<4bO1wgWnpw> zWFU8GbZ8()Nlj2!fese{00DbRL_t(o!|j)`OT$1I#eZod4kASnH&GBBx(R~+f+#rY z-{2x{f(}AK5GUQmLC`@QEC_-))J<_wT*O7)vPd0*I#}s*i4fu?w#Px2JP09=d&f(@ z``%q3N2ej_UVkw!Xfe#7mKjB#SSpGw2gr?pdNc31a^U`Hs`4VHJcJ_0!`pL!7lxp|002ovPDHLk FV1o8;hu;7I literal 0 HcmV?d00001 diff --git a/seeds/02_sites.js b/seeds/02_sites.js index ea6e6140..de190946 100644 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -2189,13 +2189,14 @@ const sites = [ }, }, { - slug: 'buttnakedinstreets', + slug: 'buttnakedinthestreets', name: 'Butt Naked In The Streets', url: 'https://buttnakedinthestreets.com', network: 'hushpass', parameters: { latest: 'https://hushpass.com/t1/categories/ButtNakedInStreets_%d_d.html', media: 'https://hushpass.com', + match: 'Butt Naked In Streets', t1: true, }, }, @@ -2264,6 +2265,89 @@ const sites = [ t1: true, }, }, + { + slug: '2bigtobetrue', + name: '2 Big To Be True', + url: 'https://www.2bigtobetrue.com/', + tags: ['interracial'], + network: 'interracialpass', + parameters: { + latest: 'https://www.interracialpass.com/t1/categories/2-big-to-be-true_%d_d.html', + media: 'https://www.interracialpass.com', + t1: true, + }, + }, + { + slug: 'abominableblackman', + name: 'Abominable Black Man', + url: 'https://www.abominableblackman.com/', + tags: ['interracial'], + network: 'interracialpass', + parameters: { + latest: 'https://www.interracialpass.com/t1/categories/abominable-black-man_%d_d.html', + media: 'https://www.interracialpass.com', + t1: true, + }, + }, + { + slug: 'bootyannihilation', + name: 'Booty Annihilation', + tags: ['interracial'], + network: 'interracialpass', + parameters: { + latest: 'https://www.interracialpass.com/t1/categories/BootyAnnihilation_%d_d.html', + media: 'https://www.interracialpass.com', + t1: true, + }, + }, + { + slug: 'daddysworstnightmare', + name: 'Daddy\'s Worst Nightmare', + url: 'https://www.daddysworstnightmare.com/', + tags: ['interracial'], + network: 'interracialpass', + parameters: { + latest: 'https://www.interracialpass.com/t1/categories/daddys-worst-nightmare_%d_d.html', + media: 'https://www.interracialpass.com', + t1: true, + }, + }, + { + slug: 'monstercockfuckfest', + name: 'Monster Cock Fuck Fest', + url: 'https://www.monstercockfuckfest.com/', + tags: ['interracial'], + network: 'interracialpass', + parameters: { + latest: 'https://www.interracialpass.com/t1/categories/monster-cock-fuck-fest_%d_d.html', + media: 'https://www.interracialpass.com', + t1: true, + }, + }, + { + slug: 'mymomsfuckingblackzilla', + name: 'My Mom\'s Fucking Blackzilla', + url: 'https://www.mymomsfuckingblackzilla.com/', + tags: ['interracial'], + network: 'interracialpass', + parameters: { + latest: 'https://www.interracialpass.com/t1/categories/my-moms-fucking-blackzilla_%d_d.html', + media: 'https://www.interracialpass.com', + t1: true, + }, + }, + { + slug: 'mywifesfirstmonstercock', + name: 'My Wife\'s First Monster Cock', + url: 'https://www.mywifesfirstmonstercock.com/', + tags: ['interracial'], + network: 'interracialpass', + parameters: { + latest: 'https://www.interracialpass.com/t1/categories/my-wifes-first-monster-cock_%d_d.html', + media: 'https://www.interracialpass.com', + t1: true, + }, + }, // INSEX { slug: 'sexuallybroken', diff --git a/src/scrape-sites.js b/src/scrape-sites.js index 18440ca1..91be1fab 100644 --- a/src/scrape-sites.js +++ b/src/scrape-sites.js @@ -37,12 +37,12 @@ async function findDuplicateReleaseIds(latestReleases, accReleases) { .concat(accReleases.map(release => String(release.entryId)))); } -async function scrapeUniqueReleases(scraper, site, preflight, afterDate = getAfterDate(), accReleases = [], page = argv.page) { +async function scrapeUniqueReleases(scraper, site, beforeFetchLatest, afterDate = getAfterDate(), accReleases = [], page = argv.page) { if (!argv.latest || !scraper.fetchLatest) { return []; } - const latestReleases = await scraper.fetchLatest(site, page, preflight); + const latestReleases = await scraper.fetchLatest(site, page, beforeFetchLatest); if (!Array.isArray(latestReleases)) { logger.warn(`Scraper returned ${latestReleases || 'null'} when fetching latest from '${site.name}' on '${site.network.name}'`); @@ -73,7 +73,7 @@ async function scrapeUniqueReleases(scraper, site, preflight, afterDate = getAft || (argv.last && accReleases.length + uniqueReleases.length < argv.last)) ) { // oldest release on page is newer that specified date range, or latest count has not yet been met, fetch next page - return scrapeUniqueReleases(scraper, site, preflight, afterDate, accReleases.concat(uniqueReleases), page + 1); + return scrapeUniqueReleases(scraper, site, beforeFetchLatest, afterDate, accReleases.concat(uniqueReleases), page + 1); } if (argv.last && uniqueReleases.length >= argv.last) { @@ -87,9 +87,9 @@ async function scrapeUniqueReleases(scraper, site, preflight, afterDate = getAft return accReleases.concat(uniqueReleases).slice(0, argv.nullDateLimit); } -async function scrapeUpcomingReleases(scraper, site, preflight) { +async function scrapeUpcomingReleases(scraper, site, beforeFetchLatest) { if (argv.upcoming && scraper.fetchUpcoming) { - const upcomingReleases = await scraper.fetchUpcoming(site, 1, preflight); + const upcomingReleases = await scraper.fetchUpcoming(site, 1, beforeFetchLatest); return upcomingReleases ? upcomingReleases.map(release => ({ ...release, site, upcoming: true })) @@ -99,11 +99,11 @@ async function scrapeUpcomingReleases(scraper, site, preflight) { return []; } -async function deepFetchReleases(baseReleases, preflight) { +async function deepFetchReleases(baseReleases, beforeFetchLatest) { return Promise.map(baseReleases, async (release) => { if (release.url || (release.path && release.site)) { try { - const fullRelease = await scrapeRelease(release.url, release, 'scene', preflight); + const fullRelease = await scrapeRelease(release.url, release, 'scene', beforeFetchLatest); if (fullRelease) { return { @@ -133,11 +133,11 @@ async function deepFetchReleases(baseReleases, preflight) { } async function scrapeSiteReleases(scraper, site) { - const preflight = await scraper.preflight?.(site); + const beforeFetchLatest = await scraper.beforeFetchLatest?.(site); const [newReleases, upcomingReleases] = await Promise.all([ - scrapeUniqueReleases(scraper, site, preflight), // fetch basic release info from scene overview - scrapeUpcomingReleases(scraper, site, preflight), // fetch basic release info from upcoming overview + scrapeUniqueReleases(scraper, site, beforeFetchLatest), // fetch basic release info from scene overview + scrapeUpcomingReleases(scraper, site, beforeFetchLatest), // fetch basic release info from upcoming overview ]); if (argv.upcoming) { @@ -148,7 +148,7 @@ async function scrapeSiteReleases(scraper, site) { if (argv.deep) { // follow URL for every release - return deepFetchReleases(baseReleases, preflight); + return deepFetchReleases(baseReleases, beforeFetchLatest); } return baseReleases; diff --git a/src/scrapers/assylum.js b/src/scrapers/assylum.js index fc4ac03e..5ed37d30 100644 --- a/src/scrapers/assylum.js +++ b/src/scrapers/assylum.js @@ -117,8 +117,8 @@ async function fetchLatest(site, page = 1, models) { return qLatest && scrapeLatest(qLatest, site, models); } -async function fetchScene(url, site, release, preflight) { - const models = preflight || await fetchModels(site); +async function fetchScene(url, site, release, beforeFetchLatest) { + const models = beforeFetchLatest || await fetchModels(site); const qScene = await get(url); return qScene && scrapeScene(qScene, url, site, models); @@ -127,5 +127,5 @@ async function fetchScene(url, site, release, preflight) { module.exports = { fetchLatest, fetchScene, - preflight: fetchModels, + beforeFetchLatest: fetchModels, }; diff --git a/src/scrapers/hush.js b/src/scrapers/hush.js index cbbd6a17..fd99206b 100644 --- a/src/scrapers/hush.js +++ b/src/scrapers/hush.js @@ -2,6 +2,7 @@ const util = require('util'); +const knex = require('../knex'); const { get, geta, fd } = require('../utils/q'); const slugify = require('../utils/slugify'); @@ -22,7 +23,7 @@ function extractPoster(posterPath, site, baseRelease) { return [posterSources, []]; } - return [null, []]; + return [baseRelease?.poster || null, []]; } function scrapeLatest(scenes, site) { @@ -59,8 +60,8 @@ function scrapeLatestT1(scenes, site) { release.date = qd('.more-info-div', 'MMM D, YYYY'); release.duration = ql('.more-info-div'); - release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1] - || `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`; + // release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1]; + release.entryId = `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`; const posterPath = q('.img-div img', 'src0_1x') || qi('img.video_placeholder'); @@ -102,7 +103,7 @@ function scrapeScene({ html, q, qa, qd, ql }, site, url, baseRelease) { return release; } -function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease) { +function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease, channelRegExp) { const release = { url }; release.title = q('.trailer-section-head .section-title', true); @@ -111,8 +112,8 @@ function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease) { release.date = qd('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/); release.duration = ql('.update-info-row:nth-child(2)'); - release.entryId = q('.player-thumb img', 'id').match(/set-target-(\d+)/)[1] - || `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`; + // release.entryId = q('.player-thumb img', 'id')?.match(/set-target-(\d+)/)[1]; + release.entryId = `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`; release.actors = qa('.models-list-thumbs a').map(el => ({ name: q(el, 'span', true), @@ -136,8 +137,8 @@ function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease) { const stars = q('.update-rating', true).match(/\d.\d/)?.[0]; if (stars) release.stars = Number(stars); - if (site.slug === 'hushpass') { - const channel = release.tags.find(tag => /Shot Her First|WhiteZilla|Frat House Fuck Fest|Freaky First Timers|MILF Invaders|Housewives Need Cash|Bubble Butt Bonanza|Suburban Sex Party|Butt Naked In Streets/i.test(tag)); + if (channelRegExp) { + const channel = release.tags.find(tag => channelRegExp.test(tag)); if (channel) { release.channel = { @@ -150,6 +151,14 @@ function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease) { return release; } +async function getChannelRegExp(site) { + if (!['hushpass', 'interracialpass'].includes(site.network.slug)) return null; + + const sites = await knex('sites').where('network_id', site.network.id); + + return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i'); +} + async function fetchLatest(site, page = 1) { const url = (site.parameters?.latest && util.format(site.parameters.latest, page)) || (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`) @@ -162,16 +171,17 @@ async function fetchLatest(site, page = 1) { return site.parameters?.t1 ? scrapeLatestT1(qLatest, site) : scrapeLatest(qLatest, site); } -async function fetchScene(url, site, baseRelease) { +async function fetchScene(url, site, baseRelease, beforeFetchLatest) { + const channelRegExp = beforeFetchLatest || await getChannelRegExp(site); const qScene = await get(url); if (!qScene) return null; - return site.parameters?.t1 ? scrapeSceneT1(qScene, site, url, baseRelease) : scrapeScene(qScene, site, url, baseRelease); + return site.parameters?.t1 ? scrapeSceneT1(qScene, site, url, baseRelease, channelRegExp) : scrapeScene(qScene, site, url, baseRelease); } module.exports = { - // preflight, + beforeFetchLatest: getChannelRegExp, fetchLatest, fetchScene, };