Added site aliases. Migrated various scrapers to qu. Added BAM Visions base.

This commit is contained in:
ThePendulum 2020-03-12 00:15:25 +01:00
parent c020d5659e
commit 37e188a0df
11 changed files with 400 additions and 69 deletions

View File

@ -151,6 +151,11 @@ exports.up = knex => Promise.resolve()
.inTable('networks'); .inTable('networks');
table.string('name'); table.string('name');
table.string('slug', 32)
.unique();
table.string('alias');
table.string('url'); table.string('url');
table.text('description'); table.text('description');
table.json('parameters'); table.json('parameters');
@ -162,9 +167,6 @@ exports.up = knex => Promise.resolve()
table.boolean('scrape') table.boolean('scrape')
.defaultTo(true); .defaultTo(true);
table.string('slug', 32)
.unique();
table.datetime('created_at') table.datetime('created_at')
.defaultTo(knex.fn.now()); .defaultTo(knex.fn.now());
})) }))

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.6 KiB

View File

@ -78,6 +78,11 @@ const networks = [
url: 'https://www.babes.com', url: 'https://www.babes.com',
parent: 'mindgeek', parent: 'mindgeek',
}, },
{
slug: 'bamvisions',
name: 'BAM Visions',
url: 'https://www.bamvisions.com',
},
{ {
slug: 'bang', slug: 'bang',
name: 'Bang!', name: 'Bang!',

File diff suppressed because it is too large Load Diff

View File

@ -375,10 +375,11 @@ async function updateReleasesSearch(releaseIds) {
TO_TSVECTOR( TO_TSVECTOR(
'traxxx', 'traxxx',
releases.title || ' ' || releases.title || ' ' ||
sites.name || ' ' ||
sites.slug || ' ' ||
networks.name || ' ' || networks.name || ' ' ||
networks.slug || ' ' || networks.slug || ' ' ||
sites.name || ' ' ||
sites.slug || ' ' ||
COALESCE(sites.alias, '') || ' ' ||
COALESCE(releases.shoot_id, '') || ' ' || COALESCE(releases.shoot_id, '') || ' ' ||
COALESCE(TO_CHAR(releases.date, 'YYYY YY MM FMMM FMmonth mon DD FMDD'), '') || ' ' || COALESCE(TO_CHAR(releases.date, 'YYYY YY MM FMMM FMmonth mon DD FMDD'), '') || ' ' ||
STRING_AGG(COALESCE(actors.name, ''), ' ') || ' ' || STRING_AGG(COALESCE(actors.name, ''), ' ') || ' ' ||
@ -394,7 +395,7 @@ async function updateReleasesSearch(releaseIds) {
LEFT JOIN tags ON local_tags.tag_id = tags.id LEFT JOIN tags ON local_tags.tag_id = tags.id
LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for
${releaseIds ? 'WHERE releases.id = ANY(?)' : ''} ${releaseIds ? 'WHERE releases.id = ANY(?)' : ''}
GROUP BY releases.id, sites.name, sites.slug, networks.name, networks.slug; GROUP BY releases.id, sites.name, sites.slug, sites.alias, networks.name, networks.slug;
`, releaseIds && [releaseIds]); `, releaseIds && [releaseIds]);
if (documents.rows?.length > 0) { if (documents.rows?.length > 0) {

View File

@ -41,22 +41,22 @@ function scrapeLatest(scenes, site, models) {
}); });
} }
function scrapeScene({ html, q, qa, qd, qis }, url, site, models) { function scrapeScene({ html, qu }, url, site, models) {
const release = { url }; const release = { url };
[release.entryId] = url.split('/').slice(-1); [release.entryId] = url.split('/').slice(-1);
release.title = q('.mas_title', true); release.title = qu.q('.mas_title', true);
release.description = q('.mas_longdescription', true); release.description = qu.q('.mas_longdescription', true);
release.date = qd('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/); release.date = qu.date('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
const actorString = q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, ''); const actorString = qu.q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, '');
const actors = matchActors(actorString, models); const actors = matchActors(actorString, models);
if (actors.length > 0) release.actors = actors; if (actors.length > 0) release.actors = actors;
else release.actors = extractActors(actorString); else release.actors = extractActors(actorString);
release.tags = qa('.tags a', true); release.tags = qu.all('.tags a', true);
release.photos = qis('.stills img').map(photoPath => `${site.url}/${photoPath}`); release.photos = qu.imgs('.stills img').map(photoPath => `${site.url}/${photoPath}`);
const posterIndex = 'splash:'; const posterIndex = 'splash:';
const poster = html.slice(html.indexOf('faceimages/', posterIndex), html.indexOf('.jpg', posterIndex) + 4); const poster = html.slice(html.indexOf('faceimages/', posterIndex), html.indexOf('.jpg', posterIndex) + 4);

View File

@ -0,0 +1,26 @@
'use strict';
const { geta } = require('../utils/qu');
function scrapeLatest(scenes, _site) {
return scenes.map(( qu ) => {
const release = {};
release.title = qu.q('h3 a', true);
release.url = qu.url('h3 a');
console.log(release);
return release;
});
}
async function fetchLatest(site, page = 1) {
const url = `https://tour.bamvisions.com/categories/movies/${page}/latest/`;
const res = geta(url, '.item-episode');
return res.ok ? scrapeLatest(res.items, site) : res.status;
}
module.exports = {
fetchLatest,
};

View File

@ -134,16 +134,16 @@ function scrapeAllTour(scenes) {
}); });
} }
function scrapeScene({ html, q, qa, qd, ql }, site, url, baseRelease) { function scrapeScene({ html, qu }, site, url, baseRelease) {
const release = { url }; const release = { url };
release.title = q('.centerwrap h2', true); release.title = qu.q('.centerwrap h2', true);
release.description = q('.videocontent p', true); release.description = qu.q('.videocontent p', true);
release.date = qd('.videodetails .date', 'MM/DD/YYYY'); release.date = qu.date('.videodetails .date', 'MM/DD/YYYY');
release.duration = ql('.videodetails .date'); release.duration = qu.dur('.videodetails .date');
release.actors = qa('.modelname a', true); release.actors = qu.all('.modelname a', true);
const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1]; const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1];
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease); [release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
@ -151,7 +151,7 @@ function scrapeScene({ html, q, qa, qd, ql }, site, url, baseRelease) {
const trailerPath = html.match(/\/trailers\/.*.mp4/); const trailerPath = html.match(/\/trailers\/.*.mp4/);
if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` }; if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
const stars = q('.modelrates + p', true).match(/\d.\d/)?.[0]; const stars = qu.q('.modelrates + p', true).match(/\d.\d/)?.[0];
if (stars) release.stars = Number(stars); if (stars) release.stars = Number(stars);
// release.entryId = html.match(/set-target-(\d+)/)[1]; // release.entryId = html.match(/set-target-(\d+)/)[1];
@ -160,31 +160,31 @@ function scrapeScene({ html, q, qa, qd, ql }, site, url, baseRelease) {
return release; return release;
} }
function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease, channelRegExp) { function scrapeSceneT1({ html, qu }, site, url, baseRelease, channelRegExp) {
const release = { url }; const release = { url };
release.title = q('.trailer-section-head .section-title', true); release.title = qu.q('.trailer-section-head .section-title', true);
release.description = qtx('.row .update-info-block'); release.description = qu.text('.row .update-info-block');
release.date = qd('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/); release.date = qu.date('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = ql('.update-info-row:nth-child(2)'); release.duration = qu.dur('.update-info-row:nth-child(2)');
release.actors = qa('.models-list-thumbs a').map(el => ({ release.actors = qu.all('.models-list-thumbs a').map(el => ({
name: q(el, 'span', true), name: qu.q(el, 'span', true),
avatar: getImageWithFallbacks(q, 'img', site, el), avatar: getImageWithFallbacks(qu.q, 'img', site, el),
})); }));
release.tags = qa('.tags a', true); release.tags = qu.all('.tags a', true);
// const posterPath = html.match(/poster="(.*\.jpg)/)?.[1]; // const posterPath = html.match(/poster="(.*\.jpg)/)?.[1];
const posterPath = q('.player-thumb img', 'src0_1x'); const posterPath = qu.q('.player-thumb img', 'src0_1x');
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease); [release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1]; const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1];
if (trailer && /^http/.test(trailer)) release.trailer = { src: trailer, referer: url }; if (trailer && /^http/.test(trailer)) release.trailer = { src: trailer, referer: url };
else if (trailer) release.trailer = { src: `${site.parameters?.media || site.url}${trailer}`, referer: url }; else if (trailer) release.trailer = { src: `${site.parameters?.media || site.url}${trailer}`, referer: url };
const stars = q('.update-rating', true).match(/\d.\d/)?.[0]; const stars = qu.q('.update-rating', true).match(/\d.\d/)?.[0];
if (stars) release.stars = Number(stars); if (stars) release.stars = Number(stars);
if (channelRegExp) { if (channelRegExp) {
@ -204,26 +204,26 @@ function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease, cha
return release; return release;
} }
function scrapeSceneTour({ html, q, qd, qa, qis }, site, url) { function scrapeSceneTour({ html, qu }, site, url) {
const release = {}; const release = {};
if (url) release.url = url; if (url) release.url = url;
release.title = q('.update_title, .video-title', true); release.title = qu.q('.update_title, .video-title', true);
release.description = q('.latest_update_description, .video-summary', true); release.description = qu.q('.latest_update_description, .video-summary', true);
const date = qd('.availdate, .update_date', 'YYYY-MM-DD'); const date = qu.date('.availdate, .update_date', 'YYYY-MM-DD');
if (date) release.date = date; if (date) release.date = date;
release.actors = qa('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true); release.actors = qu.all('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true);
release.tags = qa('.update_tags a, .tour_update_tags a', true); release.tags = qu.all('.update_tags a, .tour_update_tags a', true);
const [photo, poster, ...photos] = qis('.update_image img:not(.play_icon_overlay)'); const [photo, poster, ...photos] = qu.imgs('.update_image img:not(.play_icon_overlay)');
if (poster || photo) release.poster = poster || photo; if (poster || photo) release.poster = poster || photo;
if ((photo && poster) || photos) release.photos = poster ? [photo, ...photos] : photos; // don't use first photo when already used as fallback poster if ((photo && poster) || photos) release.photos = poster ? [photo, ...photos] : photos; // don't use first photo when already used as fallback poster
if (release.date) release.entryId = deriveEntryId(release); if (release.date) release.entryId = deriveEntryId(release);
const trailerCode = q('.update_image a', 'onclick'); const trailerCode = qu.q('.update_image a', 'onclick');
const trailerPath = trailerCode?.match(/tload\('(.*)'\)/)?.[1] || html.match(/\/trailer\/.*\.mp4/)?.[0]; const trailerPath = trailerCode?.match(/tload\('(.*)'\)/)?.[1] || html.match(/\/trailer\/.*\.mp4/)?.[0];
if (trailerPath && /^http/.test(trailerPath)) release.trailer = { src: trailerPath }; if (trailerPath && /^http/.test(trailerPath)) release.trailer = { src: trailerPath };
else if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` }; else if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
@ -231,10 +231,10 @@ function scrapeSceneTour({ html, q, qd, qa, qis }, site, url) {
return release; return release;
} }
function scrapeProfile({ el, q, qtxs }, site) { function scrapeProfile({ el, qu }, site) {
const profile = {}; const profile = {};
const bio = qtxs('.stats p').reduce((acc, info) => { const bio = qu.texts('.stats p').reduce((acc, info) => {
const [key, value] = info.split(':'); const [key, value] = info.split(':');
return { return {
@ -254,7 +254,7 @@ function scrapeProfile({ el, q, qtxs }, site) {
if (bio.age) profile.age = Number(bio.age); if (bio.age) profile.age = Number(bio.age);
if (bio.height) profile.height = feetInchesToCm(bio.height); if (bio.height) profile.height = feetInchesToCm(bio.height);
profile.avatar = getImageWithFallbacks(q, '.profileimg img', site); profile.avatar = getImageWithFallbacks(qu.q, '.profileimg img', site);
const qReleases = ctxa(el, '.modelFeatures .modelfeature'); const qReleases = ctxa(el, '.modelFeatures .modelfeature');
profile.releases = scrapeAll(qReleases, site); profile.releases = scrapeAll(qReleases, site);
@ -262,10 +262,10 @@ function scrapeProfile({ el, q, qtxs }, site) {
return profile; return profile;
} }
function scrapeProfileT1({ el, q, qa }, site) { function scrapeProfileT1({ el, qu }, site) {
const profile = {}; const profile = {};
const bio = qa('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => { const bio = qu.all('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => {
const [key, value] = info.split(':'); const [key, value] = info.split(':');
if (!value) return acc; if (!value) return acc;
@ -292,7 +292,7 @@ function scrapeProfileT1({ el, q, qa }, site) {
if (heightMetric) profile.height = Number(heightMetric[1]); if (heightMetric) profile.height = Number(heightMetric[1]);
if (heightImperial) profile.height = feetInchesToCm(Number(heightImperial[0]), Number(heightImperial[1])); if (heightImperial) profile.height = feetInchesToCm(Number(heightImperial[0]), Number(heightImperial[1]));
profile.avatar = getImageWithFallbacks(q, '.img-div img', site); profile.avatar = getImageWithFallbacks(qu.q, '.img-div img', site);
const qReleases = ctxa(el, '.item-video'); const qReleases = ctxa(el, '.item-video');
profile.releases = scrapeAllT1(qReleases, site); profile.releases = scrapeAllT1(qReleases, site);
@ -300,10 +300,10 @@ function scrapeProfileT1({ el, q, qa }, site) {
return profile; return profile;
} }
function scrapeProfileTour({ el, q, qtxs }, site) { function scrapeProfileTour({ el, qu }, site) {
const profile = {}; const profile = {};
const bio = qtxs('.model_bio').reduce((acc, info) => { const bio = qu.texts('.model_bio').reduce((acc, info) => {
const [key, value] = info.split(':'); const [key, value] = info.split(':');
return { return {
@ -339,7 +339,7 @@ function scrapeProfileTour({ el, q, qtxs }, site) {
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim()); if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
profile.avatar = getImageWithFallbacks(q, '.model_picture img', site); profile.avatar = getImageWithFallbacks(qu.q, '.model_picture img', site);
const qReleases = ctxa(el, '.update_block'); const qReleases = ctxa(el, '.update_block');
profile.releases = qReleases.map((qRelease) => { profile.releases = qReleases.map((qRelease) => {

View File

@ -4,6 +4,7 @@ const adulttime = require('./adulttime');
const assylum = require('./assylum'); const assylum = require('./assylum');
const amateurallure = require('./amateurallure'); const amateurallure = require('./amateurallure');
const babes = require('./babes'); const babes = require('./babes');
const bamvisions = require('./bamvisions');
const bang = require('./bang'); const bang = require('./bang');
const bangbros = require('./bangbros'); const bangbros = require('./bangbros');
const blowpass = require('./blowpass'); const blowpass = require('./blowpass');
@ -70,6 +71,7 @@ module.exports = {
amateurallure, amateurallure,
assylum, assylum,
babes, babes,
bamvisions,
bang, bang,
bangbros, bangbros,
blowpass, blowpass,
@ -108,7 +110,7 @@ module.exports = {
perfectgonzo, perfectgonzo,
pervcity, pervcity,
pimpxxx: cherrypimps, pimpxxx: cherrypimps,
pornpros: whalemember, ornpros: whalemember,
private: privateNetwork, private: privateNetwork,
puretaboo, puretaboo,
realitykings, realitykings,