forked from DebaucheryLibrarian/traxxx
Added site aliases. Migrated various scrapers to qu. Added BAM Visions base.
This commit is contained in:
parent
c020d5659e
commit
37e188a0df
|
@ -151,6 +151,11 @@ exports.up = knex => Promise.resolve()
|
||||||
.inTable('networks');
|
.inTable('networks');
|
||||||
|
|
||||||
table.string('name');
|
table.string('name');
|
||||||
|
table.string('slug', 32)
|
||||||
|
.unique();
|
||||||
|
|
||||||
|
table.string('alias');
|
||||||
|
|
||||||
table.string('url');
|
table.string('url');
|
||||||
table.text('description');
|
table.text('description');
|
||||||
table.json('parameters');
|
table.json('parameters');
|
||||||
|
@ -162,9 +167,6 @@ exports.up = knex => Promise.resolve()
|
||||||
table.boolean('scrape')
|
table.boolean('scrape')
|
||||||
.defaultTo(true);
|
.defaultTo(true);
|
||||||
|
|
||||||
table.string('slug', 32)
|
|
||||||
.unique();
|
|
||||||
|
|
||||||
table.datetime('created_at')
|
table.datetime('created_at')
|
||||||
.defaultTo(knex.fn.now());
|
.defaultTo(knex.fn.now());
|
||||||
}))
|
}))
|
||||||
|
|
Binary file not shown.
After Width: | Height: | Size: 8.7 KiB |
Binary file not shown.
After Width: | Height: | Size: 2.5 KiB |
Binary file not shown.
After Width: | Height: | Size: 7.6 KiB |
|
@ -78,6 +78,11 @@ const networks = [
|
||||||
url: 'https://www.babes.com',
|
url: 'https://www.babes.com',
|
||||||
parent: 'mindgeek',
|
parent: 'mindgeek',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
slug: 'bamvisions',
|
||||||
|
name: 'BAM Visions',
|
||||||
|
url: 'https://www.bamvisions.com',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
slug: 'bang',
|
slug: 'bang',
|
||||||
name: 'Bang!',
|
name: 'Bang!',
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -375,10 +375,11 @@ async function updateReleasesSearch(releaseIds) {
|
||||||
TO_TSVECTOR(
|
TO_TSVECTOR(
|
||||||
'traxxx',
|
'traxxx',
|
||||||
releases.title || ' ' ||
|
releases.title || ' ' ||
|
||||||
sites.name || ' ' ||
|
|
||||||
sites.slug || ' ' ||
|
|
||||||
networks.name || ' ' ||
|
networks.name || ' ' ||
|
||||||
networks.slug || ' ' ||
|
networks.slug || ' ' ||
|
||||||
|
sites.name || ' ' ||
|
||||||
|
sites.slug || ' ' ||
|
||||||
|
COALESCE(sites.alias, '') || ' ' ||
|
||||||
COALESCE(releases.shoot_id, '') || ' ' ||
|
COALESCE(releases.shoot_id, '') || ' ' ||
|
||||||
COALESCE(TO_CHAR(releases.date, 'YYYY YY MM FMMM FMmonth mon DD FMDD'), '') || ' ' ||
|
COALESCE(TO_CHAR(releases.date, 'YYYY YY MM FMMM FMmonth mon DD FMDD'), '') || ' ' ||
|
||||||
STRING_AGG(COALESCE(actors.name, ''), ' ') || ' ' ||
|
STRING_AGG(COALESCE(actors.name, ''), ' ') || ' ' ||
|
||||||
|
@ -394,7 +395,7 @@ async function updateReleasesSearch(releaseIds) {
|
||||||
LEFT JOIN tags ON local_tags.tag_id = tags.id
|
LEFT JOIN tags ON local_tags.tag_id = tags.id
|
||||||
LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for
|
LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for
|
||||||
${releaseIds ? 'WHERE releases.id = ANY(?)' : ''}
|
${releaseIds ? 'WHERE releases.id = ANY(?)' : ''}
|
||||||
GROUP BY releases.id, sites.name, sites.slug, networks.name, networks.slug;
|
GROUP BY releases.id, sites.name, sites.slug, sites.alias, networks.name, networks.slug;
|
||||||
`, releaseIds && [releaseIds]);
|
`, releaseIds && [releaseIds]);
|
||||||
|
|
||||||
if (documents.rows?.length > 0) {
|
if (documents.rows?.length > 0) {
|
||||||
|
|
|
@ -41,22 +41,22 @@ function scrapeLatest(scenes, site, models) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene({ html, q, qa, qd, qis }, url, site, models) {
|
function scrapeScene({ html, qu }, url, site, models) {
|
||||||
const release = { url };
|
const release = { url };
|
||||||
|
|
||||||
[release.entryId] = url.split('/').slice(-1);
|
[release.entryId] = url.split('/').slice(-1);
|
||||||
release.title = q('.mas_title', true);
|
release.title = qu.q('.mas_title', true);
|
||||||
release.description = q('.mas_longdescription', true);
|
release.description = qu.q('.mas_longdescription', true);
|
||||||
release.date = qd('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
release.date = qu.date('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||||
|
|
||||||
const actorString = q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, '');
|
const actorString = qu.q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, '');
|
||||||
const actors = matchActors(actorString, models);
|
const actors = matchActors(actorString, models);
|
||||||
if (actors.length > 0) release.actors = actors;
|
if (actors.length > 0) release.actors = actors;
|
||||||
else release.actors = extractActors(actorString);
|
else release.actors = extractActors(actorString);
|
||||||
|
|
||||||
release.tags = qa('.tags a', true);
|
release.tags = qu.all('.tags a', true);
|
||||||
|
|
||||||
release.photos = qis('.stills img').map(photoPath => `${site.url}/${photoPath}`);
|
release.photos = qu.imgs('.stills img').map(photoPath => `${site.url}/${photoPath}`);
|
||||||
|
|
||||||
const posterIndex = 'splash:';
|
const posterIndex = 'splash:';
|
||||||
const poster = html.slice(html.indexOf('faceimages/', posterIndex), html.indexOf('.jpg', posterIndex) + 4);
|
const poster = html.slice(html.indexOf('faceimages/', posterIndex), html.indexOf('.jpg', posterIndex) + 4);
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const { geta } = require('../utils/qu');
|
||||||
|
|
||||||
|
function scrapeLatest(scenes, _site) {
|
||||||
|
return scenes.map(( qu ) => {
|
||||||
|
const release = {};
|
||||||
|
|
||||||
|
release.title = qu.q('h3 a', true);
|
||||||
|
release.url = qu.url('h3 a');
|
||||||
|
|
||||||
|
console.log(release);
|
||||||
|
return release;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchLatest(site, page = 1) {
|
||||||
|
const url = `https://tour.bamvisions.com/categories/movies/${page}/latest/`;
|
||||||
|
const res = geta(url, '.item-episode');
|
||||||
|
|
||||||
|
return res.ok ? scrapeLatest(res.items, site) : res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
fetchLatest,
|
||||||
|
};
|
|
@ -134,16 +134,16 @@ function scrapeAllTour(scenes) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene({ html, q, qa, qd, ql }, site, url, baseRelease) {
|
function scrapeScene({ html, qu }, site, url, baseRelease) {
|
||||||
const release = { url };
|
const release = { url };
|
||||||
|
|
||||||
release.title = q('.centerwrap h2', true);
|
release.title = qu.q('.centerwrap h2', true);
|
||||||
release.description = q('.videocontent p', true);
|
release.description = qu.q('.videocontent p', true);
|
||||||
|
|
||||||
release.date = qd('.videodetails .date', 'MM/DD/YYYY');
|
release.date = qu.date('.videodetails .date', 'MM/DD/YYYY');
|
||||||
release.duration = ql('.videodetails .date');
|
release.duration = qu.dur('.videodetails .date');
|
||||||
|
|
||||||
release.actors = qa('.modelname a', true);
|
release.actors = qu.all('.modelname a', true);
|
||||||
|
|
||||||
const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1];
|
const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1];
|
||||||
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
|
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
|
||||||
|
@ -151,7 +151,7 @@ function scrapeScene({ html, q, qa, qd, ql }, site, url, baseRelease) {
|
||||||
const trailerPath = html.match(/\/trailers\/.*.mp4/);
|
const trailerPath = html.match(/\/trailers\/.*.mp4/);
|
||||||
if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
|
if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
|
||||||
|
|
||||||
const stars = q('.modelrates + p', true).match(/\d.\d/)?.[0];
|
const stars = qu.q('.modelrates + p', true).match(/\d.\d/)?.[0];
|
||||||
if (stars) release.stars = Number(stars);
|
if (stars) release.stars = Number(stars);
|
||||||
|
|
||||||
// release.entryId = html.match(/set-target-(\d+)/)[1];
|
// release.entryId = html.match(/set-target-(\d+)/)[1];
|
||||||
|
@ -160,31 +160,31 @@ function scrapeScene({ html, q, qa, qd, ql }, site, url, baseRelease) {
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease, channelRegExp) {
|
function scrapeSceneT1({ html, qu }, site, url, baseRelease, channelRegExp) {
|
||||||
const release = { url };
|
const release = { url };
|
||||||
|
|
||||||
release.title = q('.trailer-section-head .section-title', true);
|
release.title = qu.q('.trailer-section-head .section-title', true);
|
||||||
release.description = qtx('.row .update-info-block');
|
release.description = qu.text('.row .update-info-block');
|
||||||
|
|
||||||
release.date = qd('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
release.date = qu.date('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||||
release.duration = ql('.update-info-row:nth-child(2)');
|
release.duration = qu.dur('.update-info-row:nth-child(2)');
|
||||||
|
|
||||||
release.actors = qa('.models-list-thumbs a').map(el => ({
|
release.actors = qu.all('.models-list-thumbs a').map(el => ({
|
||||||
name: q(el, 'span', true),
|
name: qu.q(el, 'span', true),
|
||||||
avatar: getImageWithFallbacks(q, 'img', site, el),
|
avatar: getImageWithFallbacks(qu.q, 'img', site, el),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
release.tags = qa('.tags a', true);
|
release.tags = qu.all('.tags a', true);
|
||||||
|
|
||||||
// const posterPath = html.match(/poster="(.*\.jpg)/)?.[1];
|
// const posterPath = html.match(/poster="(.*\.jpg)/)?.[1];
|
||||||
const posterPath = q('.player-thumb img', 'src0_1x');
|
const posterPath = qu.q('.player-thumb img', 'src0_1x');
|
||||||
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
|
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
|
||||||
|
|
||||||
const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1];
|
const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1];
|
||||||
if (trailer && /^http/.test(trailer)) release.trailer = { src: trailer, referer: url };
|
if (trailer && /^http/.test(trailer)) release.trailer = { src: trailer, referer: url };
|
||||||
else if (trailer) release.trailer = { src: `${site.parameters?.media || site.url}${trailer}`, referer: url };
|
else if (trailer) release.trailer = { src: `${site.parameters?.media || site.url}${trailer}`, referer: url };
|
||||||
|
|
||||||
const stars = q('.update-rating', true).match(/\d.\d/)?.[0];
|
const stars = qu.q('.update-rating', true).match(/\d.\d/)?.[0];
|
||||||
if (stars) release.stars = Number(stars);
|
if (stars) release.stars = Number(stars);
|
||||||
|
|
||||||
if (channelRegExp) {
|
if (channelRegExp) {
|
||||||
|
@ -204,26 +204,26 @@ function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease, cha
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeSceneTour({ html, q, qd, qa, qis }, site, url) {
|
function scrapeSceneTour({ html, qu }, site, url) {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
if (url) release.url = url;
|
if (url) release.url = url;
|
||||||
release.title = q('.update_title, .video-title', true);
|
release.title = qu.q('.update_title, .video-title', true);
|
||||||
release.description = q('.latest_update_description, .video-summary', true);
|
release.description = qu.q('.latest_update_description, .video-summary', true);
|
||||||
|
|
||||||
const date = qd('.availdate, .update_date', 'YYYY-MM-DD');
|
const date = qu.date('.availdate, .update_date', 'YYYY-MM-DD');
|
||||||
if (date) release.date = date;
|
if (date) release.date = date;
|
||||||
|
|
||||||
release.actors = qa('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true);
|
release.actors = qu.all('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true);
|
||||||
release.tags = qa('.update_tags a, .tour_update_tags a', true);
|
release.tags = qu.all('.update_tags a, .tour_update_tags a', true);
|
||||||
|
|
||||||
const [photo, poster, ...photos] = qis('.update_image img:not(.play_icon_overlay)');
|
const [photo, poster, ...photos] = qu.imgs('.update_image img:not(.play_icon_overlay)');
|
||||||
if (poster || photo) release.poster = poster || photo;
|
if (poster || photo) release.poster = poster || photo;
|
||||||
if ((photo && poster) || photos) release.photos = poster ? [photo, ...photos] : photos; // don't use first photo when already used as fallback poster
|
if ((photo && poster) || photos) release.photos = poster ? [photo, ...photos] : photos; // don't use first photo when already used as fallback poster
|
||||||
|
|
||||||
if (release.date) release.entryId = deriveEntryId(release);
|
if (release.date) release.entryId = deriveEntryId(release);
|
||||||
|
|
||||||
const trailerCode = q('.update_image a', 'onclick');
|
const trailerCode = qu.q('.update_image a', 'onclick');
|
||||||
const trailerPath = trailerCode?.match(/tload\('(.*)'\)/)?.[1] || html.match(/\/trailer\/.*\.mp4/)?.[0];
|
const trailerPath = trailerCode?.match(/tload\('(.*)'\)/)?.[1] || html.match(/\/trailer\/.*\.mp4/)?.[0];
|
||||||
if (trailerPath && /^http/.test(trailerPath)) release.trailer = { src: trailerPath };
|
if (trailerPath && /^http/.test(trailerPath)) release.trailer = { src: trailerPath };
|
||||||
else if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
|
else if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
|
||||||
|
@ -231,10 +231,10 @@ function scrapeSceneTour({ html, q, qd, qa, qis }, site, url) {
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeProfile({ el, q, qtxs }, site) {
|
function scrapeProfile({ el, qu }, site) {
|
||||||
const profile = {};
|
const profile = {};
|
||||||
|
|
||||||
const bio = qtxs('.stats p').reduce((acc, info) => {
|
const bio = qu.texts('.stats p').reduce((acc, info) => {
|
||||||
const [key, value] = info.split(':');
|
const [key, value] = info.split(':');
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -254,7 +254,7 @@ function scrapeProfile({ el, q, qtxs }, site) {
|
||||||
if (bio.age) profile.age = Number(bio.age);
|
if (bio.age) profile.age = Number(bio.age);
|
||||||
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
||||||
|
|
||||||
profile.avatar = getImageWithFallbacks(q, '.profileimg img', site);
|
profile.avatar = getImageWithFallbacks(qu.q, '.profileimg img', site);
|
||||||
|
|
||||||
const qReleases = ctxa(el, '.modelFeatures .modelfeature');
|
const qReleases = ctxa(el, '.modelFeatures .modelfeature');
|
||||||
profile.releases = scrapeAll(qReleases, site);
|
profile.releases = scrapeAll(qReleases, site);
|
||||||
|
@ -262,10 +262,10 @@ function scrapeProfile({ el, q, qtxs }, site) {
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeProfileT1({ el, q, qa }, site) {
|
function scrapeProfileT1({ el, qu }, site) {
|
||||||
const profile = {};
|
const profile = {};
|
||||||
|
|
||||||
const bio = qa('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => {
|
const bio = qu.all('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => {
|
||||||
const [key, value] = info.split(':');
|
const [key, value] = info.split(':');
|
||||||
|
|
||||||
if (!value) return acc;
|
if (!value) return acc;
|
||||||
|
@ -292,7 +292,7 @@ function scrapeProfileT1({ el, q, qa }, site) {
|
||||||
if (heightMetric) profile.height = Number(heightMetric[1]);
|
if (heightMetric) profile.height = Number(heightMetric[1]);
|
||||||
if (heightImperial) profile.height = feetInchesToCm(Number(heightImperial[0]), Number(heightImperial[1]));
|
if (heightImperial) profile.height = feetInchesToCm(Number(heightImperial[0]), Number(heightImperial[1]));
|
||||||
|
|
||||||
profile.avatar = getImageWithFallbacks(q, '.img-div img', site);
|
profile.avatar = getImageWithFallbacks(qu.q, '.img-div img', site);
|
||||||
|
|
||||||
const qReleases = ctxa(el, '.item-video');
|
const qReleases = ctxa(el, '.item-video');
|
||||||
profile.releases = scrapeAllT1(qReleases, site);
|
profile.releases = scrapeAllT1(qReleases, site);
|
||||||
|
@ -300,10 +300,10 @@ function scrapeProfileT1({ el, q, qa }, site) {
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeProfileTour({ el, q, qtxs }, site) {
|
function scrapeProfileTour({ el, qu }, site) {
|
||||||
const profile = {};
|
const profile = {};
|
||||||
|
|
||||||
const bio = qtxs('.model_bio').reduce((acc, info) => {
|
const bio = qu.texts('.model_bio').reduce((acc, info) => {
|
||||||
const [key, value] = info.split(':');
|
const [key, value] = info.split(':');
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -339,7 +339,7 @@ function scrapeProfileTour({ el, q, qtxs }, site) {
|
||||||
|
|
||||||
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
|
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
|
||||||
|
|
||||||
profile.avatar = getImageWithFallbacks(q, '.model_picture img', site);
|
profile.avatar = getImageWithFallbacks(qu.q, '.model_picture img', site);
|
||||||
|
|
||||||
const qReleases = ctxa(el, '.update_block');
|
const qReleases = ctxa(el, '.update_block');
|
||||||
profile.releases = qReleases.map((qRelease) => {
|
profile.releases = qReleases.map((qRelease) => {
|
||||||
|
|
|
@ -4,6 +4,7 @@ const adulttime = require('./adulttime');
|
||||||
const assylum = require('./assylum');
|
const assylum = require('./assylum');
|
||||||
const amateurallure = require('./amateurallure');
|
const amateurallure = require('./amateurallure');
|
||||||
const babes = require('./babes');
|
const babes = require('./babes');
|
||||||
|
const bamvisions = require('./bamvisions');
|
||||||
const bang = require('./bang');
|
const bang = require('./bang');
|
||||||
const bangbros = require('./bangbros');
|
const bangbros = require('./bangbros');
|
||||||
const blowpass = require('./blowpass');
|
const blowpass = require('./blowpass');
|
||||||
|
@ -70,6 +71,7 @@ module.exports = {
|
||||||
amateurallure,
|
amateurallure,
|
||||||
assylum,
|
assylum,
|
||||||
babes,
|
babes,
|
||||||
|
bamvisions,
|
||||||
bang,
|
bang,
|
||||||
bangbros,
|
bangbros,
|
||||||
blowpass,
|
blowpass,
|
||||||
|
@ -108,7 +110,7 @@ module.exports = {
|
||||||
perfectgonzo,
|
perfectgonzo,
|
||||||
pervcity,
|
pervcity,
|
||||||
pimpxxx: cherrypimps,
|
pimpxxx: cherrypimps,
|
||||||
pornpros: whalemember,
|
ornpros: whalemember,
|
||||||
private: privateNetwork,
|
private: privateNetwork,
|
||||||
puretaboo,
|
puretaboo,
|
||||||
realitykings,
|
realitykings,
|
||||||
|
|
Loading…
Reference in New Issue