Added Arch Angel, updated BAM Visions scraper to accomodate Arch Angel (different network, same unidentified CMS).

This commit is contained in:
DebaucheryLibrarian 2021-07-06 00:01:44 +02:00
parent 96a2125248
commit 6847ef690c
19 changed files with 129 additions and 13 deletions

View File

@ -1,12 +1,12 @@
<template> <template>
<a <a
v-if="campaign" v-if="campaign"
:href="campaign.url" :href="campaign.url || campaign.affiliate?.url"
target="_blank" target="_blank"
class="campaign" class="campaign"
> >
<img <img
v-if="campaign.banner.entity.type === 'network'" v-if="campaign.banner.entity.type === 'network' || !campaign.banner.entity.parent"
:src="`/img/banners/${campaign.banner.entity.slug}/${campaign.banner.id}.${campaign.banner.type || 'jpg'}`" :src="`/img/banners/${campaign.banner.entity.slug}/${campaign.banner.id}.${campaign.banner.type || 'jpg'}`"
:width="campaign.banner.width" :width="campaign.banner.width"
:height="campaign.banner.height" :height="campaign.banner.height"
@ -106,6 +106,14 @@ export default {
type: Object, type: Object,
default: null, default: null,
}, },
minHeight: {
type: Number,
default: null,
},
maxHeight: {
type: Number,
default: null,
},
minRatio: { minRatio: {
type: Number, type: Number,
default: null, default: null,

View File

@ -148,7 +148,9 @@ async function fetchEntity(scroll = true) {
this.totalCount = totalCount; this.totalCount = totalCount;
this.pageTitle = entity.name; this.pageTitle = entity.name;
this.entityUrl = entity.campaigns.find(campaign => !campaign.banner)?.url || entity.url;
const campaign = entity.campaigns.find(campaignX => !campaignX.banner);
this.entityUrl = campaign?.url || campaign?.affiliate?.url || entity.url;
if (scroll && this.$refs.filter?.$el) { if (scroll && this.$refs.filter?.$el) {
this.$refs.filter.$el.scrollIntoView(); this.$refs.filter.$el.scrollIntoView();

View File

@ -119,6 +119,10 @@ const campaignsFragment = `
}) { }) {
id id
url url
affiliate {
id
url
}
banner { banner {
id id
type type

View File

@ -1274,6 +1274,26 @@ exports.up = knex => Promise.resolve()
.notNullable() .notNullable()
.defaultTo(knex.fn.now()); .defaultTo(knex.fn.now());
})) }))
.then(() => knex.schema.createTable('affiliates', (table) => {
table.string('id')
.primary()
.unique()
.notNullable();
table.integer('entity_id', 12)
.references('id')
.inTable('entities');
table.text('url');
table.unique(['entity_id', 'url']);
table.text('comment');
table.datetime('created_at')
.notNullable()
.defaultTo(knex.fn.now());
}))
.then(() => knex.schema.createTable('banners', (table) => { .then(() => knex.schema.createTable('banners', (table) => {
table.string('id') table.string('id')
.primary() .primary()
@ -1326,13 +1346,16 @@ exports.up = knex => Promise.resolve()
.references('id') .references('id')
.inTable('entities'); .inTable('entities');
table.text('affiliate_id')
.references('id')
.inTable('affiliates');
table.text('url');
table.string('banner_id') table.string('banner_id')
.references('id') .references('id')
.inTable('banners'); .inTable('banners');
table.text('url')
.notNullable();
table.text('comment'); table.text('comment');
table.datetime('created_at') table.datetime('created_at')
@ -1358,8 +1381,10 @@ exports.up = knex => Promise.resolve()
CREATE UNIQUE INDEX unique_actor_slugs_network ON actors (slug, entity_id, entry_id); CREATE UNIQUE INDEX unique_actor_slugs_network ON actors (slug, entity_id, entry_id);
CREATE UNIQUE INDEX unique_actor_slugs ON actors (slug) WHERE entity_id IS NULL; CREATE UNIQUE INDEX unique_actor_slugs ON actors (slug) WHERE entity_id IS NULL;
CREATE UNIQUE INDEX unique_entity_campaigns_banner ON campaigns (entity_id, url, banner_id); CREATE UNIQUE INDEX unique_entity_campaigns_banner_url ON campaigns (entity_id, url, banner_id) WHERE affiliate_id IS NULL;
CREATE UNIQUE INDEX unique_entity_campaigns ON campaigns (entity_id, url) WHERE banner_id IS NULL; CREATE UNIQUE INDEX unique_entity_campaigns_url ON campaigns (entity_id, url) WHERE banner_id IS NULL AND affiliate_id IS NULL;
CREATE UNIQUE INDEX unique_entity_campaigns_banner_affiliate ON campaigns (entity_id, affiliate_id, banner_id) WHERE url IS NULL;
CREATE UNIQUE INDEX unique_entity_campaigns_affiliate ON campaigns (entity_id, affiliate_id) WHERE banner_id IS NULL AND url IS NULL;
CREATE UNIQUE INDEX releases_search_unique ON releases_search (release_id); CREATE UNIQUE INDEX releases_search_unique ON releases_search (release_id);
CREATE INDEX releases_search_index ON releases_search USING GIN (document); CREATE INDEX releases_search_index ON releases_search USING GIN (document);
@ -1722,6 +1747,8 @@ exports.down = (knex) => { // eslint-disable-line arrow-body-style
DROP TABLE IF EXISTS banners_tags CASCADE; DROP TABLE IF EXISTS banners_tags CASCADE;
DROP TABLE IF EXISTS banners CASCADE; DROP TABLE IF EXISTS banners CASCADE;
DROP TABLE IF EXISTS campaigns CASCADE; DROP TABLE IF EXISTS campaigns CASCADE;
DROP TABLE IF EXISTS affiliates CASCADE;
DROP TABLE IF EXISTS batches CASCADE; DROP TABLE IF EXISTS batches CASCADE;
DROP TABLE IF EXISTS actors_avatars CASCADE; DROP TABLE IF EXISTS actors_avatars CASCADE;

Binary file not shown.

After

Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 953 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 953 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 953 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.1 KiB

View File

@ -504,6 +504,17 @@ const sites = [
profile: 'http://www.american-pornstar.com/models', profile: 'http://www.american-pornstar.com/models',
}, },
}, },
// ARCH ANGEL
{
slug: 'archangel',
name: 'ArchAngel',
url: 'https://www.archangelvideo.com',
parameters: {
latest: 'https://www.archangelvideo.com/tour/categories/movies/{page}/latest/',
profile: 'https://www.archangelvideo.com/tour/models/{slug}.html',
sets: 'https://www.archangelvideo.com/tour/sets.php',
},
},
// ASSYLUM // ASSYLUM
{ {
slug: 'assylum', slug: 'assylum',

View File

@ -1,5 +1,14 @@
const bulkInsert = require('../src/utils/bulk-insert'); const bulkInsert = require('../src/utils/bulk-insert');
const affiliates = [
{
id: 'archangel_share',
channel: 'archangel',
url: 'https://join.archangelvideo.com/track/MzQ3LjEuMS4xLjAuMC4wLjAuMA',
comment: 'revshare',
},
];
const banners = [ const banners = [
{ {
id: '21sextury_300_250_anal', id: '21sextury_300_250_anal',
@ -78,6 +87,13 @@ const banners = [
network: '21naturals', network: '21naturals',
tags: ['sex', 'brunette'], tags: ['sex', 'brunette'],
}, },
{
id: 'archangel_970_90_kendra_lust',
width: 970,
height: 90,
channel: 'archangel',
tags: ['dp', 'anal', 'sex', 'interracial', 'black'],
},
{ {
id: 'evilangel_728_90_adriana_chechik_gangbang', id: 'evilangel_728_90_adriana_chechik_gangbang',
width: 728, width: 728,
@ -352,6 +368,15 @@ const campaigns = [
url: 'https://www.iyalc.com/21naturals/go.php?pr=8&su=1&si=209&ad=277470&pa=index&ar=&buffer=', url: 'https://www.iyalc.com/21naturals/go.php?pr=8&su=1&si=209&ad=277470&pa=index&ar=&buffer=',
comment: 'per signup', comment: 'per signup',
}, },
{
channel: 'archangel',
affiliate: 'archangel_share',
},
{
channel: 'archangel',
affiliate: 'archangel_share',
banner: 'archangel_970_90_kendra_lust',
},
{ {
network: 'brazzers', network: 'brazzers',
url: 'https://landing.brazzersnetwork.com/?ats=eyJhIjozMTYwOTcsImMiOjU5MzI1Mzk5LCJuIjoxNCwicyI6OTAsImUiOjg4MDMsInAiOjExfQ==', url: 'https://landing.brazzersnetwork.com/?ats=eyJhIjozMTYwOTcsImMiOjU5MzI1Mzk5LCJuIjoxNCwicyI6OTAsImUiOjg4MDMsInAiOjExfQ==',
@ -624,6 +649,7 @@ exports.seed = async knex => Promise.resolve()
await knex('banners_tags').delete(); await knex('banners_tags').delete();
await Promise.all([ await Promise.all([
knex('affiliates').delete(),
knex('campaigns').delete(), knex('campaigns').delete(),
knex('banners').delete(), knex('banners').delete(),
]); ]);
@ -643,6 +669,13 @@ exports.seed = async knex => Promise.resolve()
const channelsBySlug = channels.reduce((acc, channel) => ({ ...acc, [channel.slug]: channel }), {}); const channelsBySlug = channels.reduce((acc, channel) => ({ ...acc, [channel.slug]: channel }), {});
const tagsBySlug = tags.reduce((acc, tag) => ({ ...acc, [tag.slug]: tag }), {}); const tagsBySlug = tags.reduce((acc, tag) => ({ ...acc, [tag.slug]: tag }), {});
const affiliatesWithEntityId = affiliates.map(affiliate => ({
id: affiliate.id,
entity_id: networksBySlug[affiliate.network]?.id || channelsBySlug[affiliate.channel]?.id || null,
url: affiliate.url,
comment: affiliate.comment,
}));
const bannersWithEntityId = banners.map(banner => ({ const bannersWithEntityId = banners.map(banner => ({
id: banner.id, id: banner.id,
width: banner.width, width: banner.width,
@ -659,9 +692,11 @@ exports.seed = async knex => Promise.resolve()
const campaignsWithEntityIdAndAffiliateId = campaigns.map(campaign => ({ const campaignsWithEntityIdAndAffiliateId = campaigns.map(campaign => ({
entity_id: networksBySlug[campaign.network]?.id || channelsBySlug[campaign.channel]?.id, entity_id: networksBySlug[campaign.network]?.id || channelsBySlug[campaign.channel]?.id,
url: campaign.url, url: campaign.url,
affiliate_id: campaign.affiliate,
banner_id: campaign.banner, banner_id: campaign.banner,
})).filter(link => link.entity_id && link.url); })).filter(link => link.entity_id && (link.url || link.affiliate_id));
await knex('affiliates').insert(affiliatesWithEntityId);
await bulkInsert('banners', bannersWithEntityId, false); await bulkInsert('banners', bannersWithEntityId, false);
await bulkInsert('banners_tags', bannerTags, false); await bulkInsert('banners_tags', bannerTags, false);
await bulkInsert('campaigns', campaignsWithEntityIdAndAffiliateId, false); await bulkInsert('campaigns', campaignsWithEntityIdAndAffiliateId, false);

View File

@ -187,6 +187,7 @@ async function fetchEntitiesBySlug(entitySlugs, sort = 'asc') {
array['parent'] as parent_path array['parent'] as parent_path
FROM entities FROM entities
WHERE slug = ANY(:entitySlugs) WHERE slug = ANY(:entitySlugs)
OR substring(url from 'https%://%#"[a-z0-9-]+#".(com|net)%' for '#') = ANY(:entitySlugs)
UNION ALL UNION ALL
@ -217,6 +218,7 @@ async function fetchEntitiesBySlug(entitySlugs, sort = 'asc') {
const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => ({ const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => ({
...accEntities, ...accEntities,
[entity.slug]: accEntities[entity.slug] || curateEntity(entity, true), [entity.slug]: accEntities[entity.slug] || curateEntity(entity, true),
[urlToSiteSlug(entity.url)]: accEntities[entity.slug] || curateEntity(entity, true),
}), {}); }), {});
return entitiesBySlug; return entitiesBySlug;

View File

@ -1,5 +1,7 @@
'use strict'; 'use strict';
const format = require('template-format');
const { get, geta, initAll, formatDate } = require('../utils/qu'); const { get, geta, initAll, formatDate } = require('../utils/qu');
const slugify = require('../utils/slugify'); const slugify = require('../utils/slugify');
@ -68,7 +70,10 @@ function scrapeScene({ html, qu }, url, site) {
} }
async function fetchActorReleases(actorId, site, page = 1, accScenes = []) { async function fetchActorReleases(actorId, site, page = 1, accScenes = []) {
const url = `${site.url}/sets.php?id=${actorId}&page=${page}`; const url = site.parameters?.sets
? `${site.parameters.sets}?id=${actorId}&page=${page}`
: `${site.url}/sets.php?id=${actorId}&page=${page}`;
const res = await get(url); const res = await get(url);
if (!res.ok) return []; if (!res.ok) return [];
@ -86,6 +91,11 @@ async function fetchActorReleases(actorId, site, page = 1, accScenes = []) {
} }
async function scrapeProfile({ qu }, site, withScenes) { async function scrapeProfile({ qu }, site, withScenes) {
if (!qu.exists('.content')) {
// page probably returned a 404 with a 200 HTTP code
return null;
}
const profile = {}; const profile = {};
const bio = qu.all('.stats li', true).reduce((acc, row) => { const bio = qu.all('.stats li', true).reduce((acc, row) => {
@ -120,7 +130,9 @@ async function scrapeProfile({ qu }, site, withScenes) {
} }
async function fetchLatest(site, page = 1) { async function fetchLatest(site, page = 1) {
const url = `${site.url}/categories/movies/${page}/latest/`; const url = site.parameters?.latest
? format(site.parameters.latest, { page })
: `${site.url}/categories/movies/${page}/latest/`;
const res = await geta(url, '.item-episode'); const res = await geta(url, '.item-episode');
return res.ok ? scrapeAll(res.items, site) : res.status; return res.ok ? scrapeAll(res.items, site) : res.status;
@ -132,18 +144,31 @@ async function fetchScene(url, site) {
return res.ok ? scrapeScene(res.item, url, site) : res.status; return res.ok ? scrapeScene(res.item, url, site) : res.status;
} }
async function fetchMovies(channel, page) {
console.log(channel, page);
}
async function fetchProfile({ name: actorName }, { site }, include) { async function fetchProfile({ name: actorName }, { site }, include) {
const actorSlugA = slugify(actorName, ''); const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName); const actorSlugB = slugify(actorName);
const resA = await get(`${site.url}/models/${actorSlugA}.html`); const urlA = site.parameters?.profile
const res = resA.ok ? resA : await get(`${site.url}/models/${actorSlugB}.html`); ? format(site.parameters.profile, { slug: actorSlugA })
: `${site.url}/models/${actorSlugA}.html`;
const urlB = site.parameters?.profile
? format(site.parameters.profile, { slug: actorSlugB })
: `${site.url}/models/${actorSlugB}.html`;
const resA = await get(urlA);
const res = resA.ok ? resA : await get(urlB);
return res.ok ? scrapeProfile(res.item, site, include.scenes) : res.status; return res.ok ? scrapeProfile(res.item, site, include.scenes) : res.status;
} }
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchMovies,
fetchScene, fetchScene,
fetchProfile, fetchProfile,
}; };

View File

@ -71,6 +71,7 @@ const scrapers = {
amateurallure, amateurallure,
americanpornstar, americanpornstar,
amateureuro: porndoe, amateureuro: porndoe,
archangel: bamvisions,
assylum, assylum,
aziani, aziani,
badoink, badoink,
@ -155,6 +156,7 @@ const scrapers = {
analized: fullpornnetwork, analized: fullpornnetwork,
analviolation: fullpornnetwork, analviolation: fullpornnetwork,
anilos: nubiles, anilos: nubiles,
archangel: bamvisions,
aziani, aziani,
babes: mindgeek, babes: mindgeek,
babevr: badoink, babevr: badoink,