Added Arch Angel, updated BAM Visions scraper to accomodate Arch Angel (different network, same unidentified CMS).
|  | @ -1,12 +1,12 @@ | |||
| <template> | ||||
| 	<a | ||||
| 		v-if="campaign" | ||||
| 		:href="campaign.url" | ||||
| 		:href="campaign.url || campaign.affiliate?.url" | ||||
| 		target="_blank" | ||||
| 		class="campaign" | ||||
| 	> | ||||
| 		<img | ||||
| 			v-if="campaign.banner.entity.type === 'network'" | ||||
| 			v-if="campaign.banner.entity.type === 'network' || !campaign.banner.entity.parent" | ||||
| 			:src="`/img/banners/${campaign.banner.entity.slug}/${campaign.banner.id}.${campaign.banner.type || 'jpg'}`" | ||||
| 			:width="campaign.banner.width" | ||||
| 			:height="campaign.banner.height" | ||||
|  | @ -106,6 +106,14 @@ export default { | |||
| 			type: Object, | ||||
| 			default: null, | ||||
| 		}, | ||||
| 		minHeight: { | ||||
| 			type: Number, | ||||
| 			default: null, | ||||
| 		}, | ||||
| 		maxHeight: { | ||||
| 			type: Number, | ||||
| 			default: null, | ||||
| 		}, | ||||
| 		minRatio: { | ||||
| 			type: Number, | ||||
| 			default: null, | ||||
|  |  | |||
|  | @ -148,7 +148,9 @@ async function fetchEntity(scroll = true) { | |||
| 	this.totalCount = totalCount; | ||||
| 
 | ||||
| 	this.pageTitle = entity.name; | ||||
| 	this.entityUrl = entity.campaigns.find(campaign => !campaign.banner)?.url || entity.url; | ||||
| 
 | ||||
| 	const campaign = entity.campaigns.find(campaignX => !campaignX.banner); | ||||
| 	this.entityUrl = campaign?.url || campaign?.affiliate?.url || entity.url; | ||||
| 
 | ||||
| 	if (scroll && this.$refs.filter?.$el) { | ||||
| 		this.$refs.filter.$el.scrollIntoView(); | ||||
|  |  | |||
|  | @ -119,6 +119,10 @@ const campaignsFragment = ` | |||
| 	}) { | ||||
| 		id | ||||
| 		url | ||||
| 		affiliate { | ||||
| 			id | ||||
| 			url | ||||
| 		} | ||||
| 		banner { | ||||
| 			id | ||||
| 			type | ||||
|  |  | |||
|  | @ -1274,6 +1274,26 @@ exports.up = knex => Promise.resolve() | |||
| 			.notNullable() | ||||
| 			.defaultTo(knex.fn.now()); | ||||
| 	})) | ||||
| 	.then(() => knex.schema.createTable('affiliates', (table) => { | ||||
| 		table.string('id') | ||||
| 			.primary() | ||||
| 			.unique() | ||||
| 			.notNullable(); | ||||
| 
 | ||||
| 		table.integer('entity_id', 12) | ||||
| 			.references('id') | ||||
| 			.inTable('entities'); | ||||
| 
 | ||||
| 		table.text('url'); | ||||
| 
 | ||||
| 		table.unique(['entity_id', 'url']); | ||||
| 
 | ||||
| 		table.text('comment'); | ||||
| 
 | ||||
| 		table.datetime('created_at') | ||||
| 			.notNullable() | ||||
| 			.defaultTo(knex.fn.now()); | ||||
| 	})) | ||||
| 	.then(() => knex.schema.createTable('banners', (table) => { | ||||
| 		table.string('id') | ||||
| 			.primary() | ||||
|  | @ -1326,13 +1346,16 @@ exports.up = knex => Promise.resolve() | |||
| 			.references('id') | ||||
| 			.inTable('entities'); | ||||
| 
 | ||||
| 		table.text('affiliate_id') | ||||
| 			.references('id') | ||||
| 			.inTable('affiliates'); | ||||
| 
 | ||||
| 		table.text('url'); | ||||
| 
 | ||||
| 		table.string('banner_id') | ||||
| 			.references('id') | ||||
| 			.inTable('banners'); | ||||
| 
 | ||||
| 		table.text('url') | ||||
| 			.notNullable(); | ||||
| 
 | ||||
| 		table.text('comment'); | ||||
| 
 | ||||
| 		table.datetime('created_at') | ||||
|  | @ -1358,8 +1381,10 @@ exports.up = knex => Promise.resolve() | |||
| 			CREATE UNIQUE INDEX unique_actor_slugs_network ON actors (slug, entity_id, entry_id); | ||||
| 			CREATE UNIQUE INDEX unique_actor_slugs ON actors (slug) WHERE entity_id IS NULL; | ||||
| 
 | ||||
| 			CREATE UNIQUE INDEX unique_entity_campaigns_banner ON campaigns (entity_id, url, banner_id); | ||||
| 			CREATE UNIQUE INDEX unique_entity_campaigns ON campaigns (entity_id, url) WHERE banner_id IS NULL; | ||||
| 			CREATE UNIQUE INDEX unique_entity_campaigns_banner_url ON campaigns (entity_id, url, banner_id) WHERE affiliate_id IS NULL; | ||||
| 			CREATE UNIQUE INDEX unique_entity_campaigns_url ON campaigns (entity_id, url) WHERE banner_id IS NULL AND affiliate_id IS NULL; | ||||
| 			CREATE UNIQUE INDEX unique_entity_campaigns_banner_affiliate ON campaigns (entity_id, affiliate_id, banner_id) WHERE url IS NULL; | ||||
| 			CREATE UNIQUE INDEX unique_entity_campaigns_affiliate ON campaigns (entity_id, affiliate_id) WHERE banner_id IS NULL AND url IS NULL; | ||||
| 
 | ||||
| 			CREATE UNIQUE INDEX releases_search_unique ON releases_search (release_id); | ||||
| 			CREATE INDEX releases_search_index ON releases_search USING GIN (document); | ||||
|  | @ -1722,6 +1747,8 @@ exports.down = (knex) => { // eslint-disable-line arrow-body-style | |||
| 		DROP TABLE IF EXISTS banners_tags CASCADE; | ||||
| 		DROP TABLE IF EXISTS banners CASCADE; | ||||
| 		DROP TABLE IF EXISTS campaigns CASCADE; | ||||
| 		DROP TABLE IF EXISTS affiliates CASCADE; | ||||
| 
 | ||||
| 		DROP TABLE IF EXISTS batches CASCADE; | ||||
| 
 | ||||
| 		DROP TABLE IF EXISTS actors_avatars CASCADE; | ||||
|  |  | |||
| After Width: | Height: | Size: 64 KiB | 
| After Width: | Height: | Size: 4.6 KiB | 
| After Width: | Height: | Size: 953 B | 
| After Width: | Height: | Size: 953 B | 
| After Width: | Height: | Size: 953 B | 
| After Width: | Height: | Size: 1.3 KiB | 
| After Width: | Height: | Size: 1.3 KiB | 
| After Width: | Height: | Size: 4.6 KiB | 
| After Width: | Height: | Size: 4.1 KiB | 
| After Width: | Height: | Size: 4.1 KiB | 
|  | @ -504,6 +504,17 @@ const sites = [ | |||
| 			profile: 'http://www.american-pornstar.com/models', | ||||
| 		}, | ||||
| 	}, | ||||
| 	// ARCH ANGEL
 | ||||
| 	{ | ||||
| 		slug: 'archangel', | ||||
| 		name: 'ArchAngel', | ||||
| 		url: 'https://www.archangelvideo.com', | ||||
| 		parameters: { | ||||
| 			latest: 'https://www.archangelvideo.com/tour/categories/movies/{page}/latest/', | ||||
| 			profile: 'https://www.archangelvideo.com/tour/models/{slug}.html', | ||||
| 			sets: 'https://www.archangelvideo.com/tour/sets.php', | ||||
| 		}, | ||||
| 	}, | ||||
| 	// ASSYLUM
 | ||||
| 	{ | ||||
| 		slug: 'assylum', | ||||
|  |  | |||
|  | @ -1,5 +1,14 @@ | |||
| const bulkInsert = require('../src/utils/bulk-insert'); | ||||
| 
 | ||||
| const affiliates = [ | ||||
| 	{ | ||||
| 		id: 'archangel_share', | ||||
| 		channel: 'archangel', | ||||
| 		url: 'https://join.archangelvideo.com/track/MzQ3LjEuMS4xLjAuMC4wLjAuMA', | ||||
| 		comment: 'revshare', | ||||
| 	}, | ||||
| ]; | ||||
| 
 | ||||
| const banners = [ | ||||
| 	{ | ||||
| 		id: '21sextury_300_250_anal', | ||||
|  | @ -78,6 +87,13 @@ const banners = [ | |||
| 		network: '21naturals', | ||||
| 		tags: ['sex', 'brunette'], | ||||
| 	}, | ||||
| 	{ | ||||
| 		id: 'archangel_970_90_kendra_lust', | ||||
| 		width: 970, | ||||
| 		height: 90, | ||||
| 		channel: 'archangel', | ||||
| 		tags: ['dp', 'anal', 'sex', 'interracial', 'black'], | ||||
| 	}, | ||||
| 	{ | ||||
| 		id: 'evilangel_728_90_adriana_chechik_gangbang', | ||||
| 		width: 728, | ||||
|  | @ -352,6 +368,15 @@ const campaigns = [ | |||
| 		url: 'https://www.iyalc.com/21naturals/go.php?pr=8&su=1&si=209&ad=277470&pa=index&ar=&buffer=', | ||||
| 		comment: 'per signup', | ||||
| 	}, | ||||
| 	{ | ||||
| 		channel: 'archangel', | ||||
| 		affiliate: 'archangel_share', | ||||
| 	}, | ||||
| 	{ | ||||
| 		channel: 'archangel', | ||||
| 		affiliate: 'archangel_share', | ||||
| 		banner: 'archangel_970_90_kendra_lust', | ||||
| 	}, | ||||
| 	{ | ||||
| 		network: 'brazzers', | ||||
| 		url: 'https://landing.brazzersnetwork.com/?ats=eyJhIjozMTYwOTcsImMiOjU5MzI1Mzk5LCJuIjoxNCwicyI6OTAsImUiOjg4MDMsInAiOjExfQ==', | ||||
|  | @ -624,6 +649,7 @@ exports.seed = async knex => Promise.resolve() | |||
| 		await knex('banners_tags').delete(); | ||||
| 
 | ||||
| 		await Promise.all([ | ||||
| 			knex('affiliates').delete(), | ||||
| 			knex('campaigns').delete(), | ||||
| 			knex('banners').delete(), | ||||
| 		]); | ||||
|  | @ -643,6 +669,13 @@ exports.seed = async knex => Promise.resolve() | |||
| 		const channelsBySlug = channels.reduce((acc, channel) => ({ ...acc, [channel.slug]: channel }), {}); | ||||
| 		const tagsBySlug = tags.reduce((acc, tag) => ({ ...acc, [tag.slug]: tag }), {}); | ||||
| 
 | ||||
| 		const affiliatesWithEntityId = affiliates.map(affiliate => ({ | ||||
| 			id: affiliate.id, | ||||
| 			entity_id: networksBySlug[affiliate.network]?.id || channelsBySlug[affiliate.channel]?.id || null, | ||||
| 			url: affiliate.url, | ||||
| 			comment: affiliate.comment, | ||||
| 		})); | ||||
| 
 | ||||
| 		const bannersWithEntityId = banners.map(banner => ({ | ||||
| 			id: banner.id, | ||||
| 			width: banner.width, | ||||
|  | @ -659,9 +692,11 @@ exports.seed = async knex => Promise.resolve() | |||
| 		const campaignsWithEntityIdAndAffiliateId = campaigns.map(campaign => ({ | ||||
| 			entity_id: networksBySlug[campaign.network]?.id || channelsBySlug[campaign.channel]?.id, | ||||
| 			url: campaign.url, | ||||
| 			affiliate_id: campaign.affiliate, | ||||
| 			banner_id: campaign.banner, | ||||
| 		})).filter(link => link.entity_id && link.url); | ||||
| 		})).filter(link => link.entity_id && (link.url || link.affiliate_id)); | ||||
| 
 | ||||
| 		await knex('affiliates').insert(affiliatesWithEntityId); | ||||
| 		await bulkInsert('banners', bannersWithEntityId, false); | ||||
| 		await bulkInsert('banners_tags', bannerTags, false); | ||||
| 		await bulkInsert('campaigns', campaignsWithEntityIdAndAffiliateId, false); | ||||
|  |  | |||
|  | @ -187,6 +187,7 @@ async function fetchEntitiesBySlug(entitySlugs, sort = 'asc') { | |||
| 				array['parent'] as parent_path | ||||
| 			FROM entities | ||||
| 			WHERE slug = ANY(:entitySlugs) | ||||
| 			OR substring(url from 'https%://%#"[a-z0-9-]+#".(com|net)%' for '#') = ANY(:entitySlugs) | ||||
| 
 | ||||
| 			UNION ALL | ||||
| 
 | ||||
|  | @ -217,6 +218,7 @@ async function fetchEntitiesBySlug(entitySlugs, sort = 'asc') { | |||
| 	const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => ({ | ||||
| 		...accEntities, | ||||
| 		[entity.slug]: accEntities[entity.slug] || curateEntity(entity, true), | ||||
| 		[urlToSiteSlug(entity.url)]: accEntities[entity.slug] || curateEntity(entity, true), | ||||
| 	}), {}); | ||||
| 
 | ||||
| 	return entitiesBySlug; | ||||
|  |  | |||
|  | @ -1,5 +1,7 @@ | |||
| 'use strict'; | ||||
| 
 | ||||
| const format = require('template-format'); | ||||
| 
 | ||||
| const { get, geta, initAll, formatDate } = require('../utils/qu'); | ||||
| const slugify = require('../utils/slugify'); | ||||
| 
 | ||||
|  | @ -68,7 +70,10 @@ function scrapeScene({ html, qu }, url, site) { | |||
| } | ||||
| 
 | ||||
| async function fetchActorReleases(actorId, site, page = 1, accScenes = []) { | ||||
| 	const url = `${site.url}/sets.php?id=${actorId}&page=${page}`; | ||||
| 	const url = site.parameters?.sets | ||||
| 		? `${site.parameters.sets}?id=${actorId}&page=${page}` | ||||
| 		: `${site.url}/sets.php?id=${actorId}&page=${page}`; | ||||
| 
 | ||||
| 	const res = await get(url); | ||||
| 
 | ||||
| 	if (!res.ok) return []; | ||||
|  | @ -86,6 +91,11 @@ async function fetchActorReleases(actorId, site, page = 1, accScenes = []) { | |||
| } | ||||
| 
 | ||||
| async function scrapeProfile({ qu }, site, withScenes) { | ||||
| 	if (!qu.exists('.content')) { | ||||
| 		// page probably returned a 404 with a 200 HTTP code
 | ||||
| 		return null; | ||||
| 	} | ||||
| 
 | ||||
| 	const profile = {}; | ||||
| 
 | ||||
| 	const bio = qu.all('.stats li', true).reduce((acc, row) => { | ||||
|  | @ -120,7 +130,9 @@ async function scrapeProfile({ qu }, site, withScenes) { | |||
| } | ||||
| 
 | ||||
| async function fetchLatest(site, page = 1) { | ||||
| 	const url = `${site.url}/categories/movies/${page}/latest/`; | ||||
| 	const url = site.parameters?.latest | ||||
| 		? format(site.parameters.latest, { page }) | ||||
| 		: `${site.url}/categories/movies/${page}/latest/`; | ||||
| 	const res = await geta(url, '.item-episode'); | ||||
| 
 | ||||
| 	return res.ok ? scrapeAll(res.items, site) : res.status; | ||||
|  | @ -132,18 +144,31 @@ async function fetchScene(url, site) { | |||
| 	return res.ok ? scrapeScene(res.item, url, site) : res.status; | ||||
| } | ||||
| 
 | ||||
| async function fetchMovies(channel, page) { | ||||
| 	console.log(channel, page); | ||||
| } | ||||
| 
 | ||||
| async function fetchProfile({ name: actorName }, { site }, include) { | ||||
| 	const actorSlugA = slugify(actorName, ''); | ||||
| 	const actorSlugB = slugify(actorName); | ||||
| 
 | ||||
| 	const resA = await get(`${site.url}/models/${actorSlugA}.html`); | ||||
| 	const res = resA.ok ? resA : await get(`${site.url}/models/${actorSlugB}.html`); | ||||
| 	const urlA = site.parameters?.profile | ||||
| 		? format(site.parameters.profile, { slug: actorSlugA }) | ||||
| 		: `${site.url}/models/${actorSlugA}.html`; | ||||
| 
 | ||||
| 	const urlB = site.parameters?.profile | ||||
| 		? format(site.parameters.profile, { slug: actorSlugB }) | ||||
| 		: `${site.url}/models/${actorSlugB}.html`; | ||||
| 
 | ||||
| 	const resA = await get(urlA); | ||||
| 	const res = resA.ok ? resA : await get(urlB); | ||||
| 
 | ||||
| 	return res.ok ? scrapeProfile(res.item, site, include.scenes) : res.status; | ||||
| } | ||||
| 
 | ||||
| module.exports = { | ||||
| 	fetchLatest, | ||||
| 	fetchMovies, | ||||
| 	fetchScene, | ||||
| 	fetchProfile, | ||||
| }; | ||||
|  |  | |||
|  | @ -71,6 +71,7 @@ const scrapers = { | |||
| 		amateurallure, | ||||
| 		americanpornstar, | ||||
| 		amateureuro: porndoe, | ||||
| 		archangel: bamvisions, | ||||
| 		assylum, | ||||
| 		aziani, | ||||
| 		badoink, | ||||
|  | @ -155,6 +156,7 @@ const scrapers = { | |||
| 		analized: fullpornnetwork, | ||||
| 		analviolation: fullpornnetwork, | ||||
| 		anilos: nubiles, | ||||
| 		archangel: bamvisions, | ||||
| 		aziani, | ||||
| 		babes: mindgeek, | ||||
| 		babevr: badoink, | ||||
|  |  | |||