Added dedicated Arch Angel scraper.

This commit is contained in:
DebaucheryLibrarian 2023-07-02 05:07:38 +02:00
parent e22dbb315e
commit 4a3674feac
14 changed files with 291 additions and 78 deletions

View File

@ -63,6 +63,10 @@ function entityCampaign() {
return randomCampaign;
}
if (this.allowGeneric) {
return this.genericCampaign();
}
this.$emit('campaign', null);
return null;
@ -84,6 +88,10 @@ function tagCampaign() {
return randomCampaign;
}
if (this.allowGeneric) {
return this.genericCampaign();
}
this.$emit('campaign', null);
return null;
@ -134,6 +142,10 @@ export default {
type: Number,
default: null,
},
allowGeneric: {
type: Boolean,
default: false,
},
maxRatio: {
type: Number,
default: null,

View File

@ -114,6 +114,7 @@
<div class="releases">
<Releases
:releases="entity.releases"
:entity="entity"
:done="done"
/>
@ -157,14 +158,17 @@ async function fetchEntity(scroll = true) {
const campaign = entity.campaigns.find((campaignX) => !campaignX.banner)
|| entity.parent?.campaigns.find((campaignX) => !campaignX.banner);
const { searchParams, pathname, origin } = new URL(entity.url);
if (entity.url) {
const { searchParams, pathname, origin } = new URL(entity.url);
const affiliateParams = new URLSearchParams({
...(entity.url && Object.fromEntries(searchParams)), // preserve any query in entity URL, e.g. ?siteId=5
...(campaign?.affiliate?.parameters && Object.fromEntries(new URLSearchParams(campaign.affiliate.parameters))), // append affiliate parameters
}).toString();
const affiliateParams = new URLSearchParams({
...(entity.url && Object.fromEntries(searchParams)), // preserve any query in entity URL, e.g. ?siteId=5
...(campaign?.affiliate?.parameters && Object.fromEntries(new URLSearchParams(campaign.affiliate.parameters))), // append affiliate parameters
}).toString();
this.entityUrl = campaign?.url || campaign?.affiliate?.url || `${origin}${pathname}${campaign?.affiliate?.parameters ? `?${affiliateParams}` : ''}`;
}
this.entityUrl = campaign?.url || campaign?.affiliate?.url || `${origin}${pathname}${campaign?.affiliate?.parameters ? `?${affiliateParams}` : ''}`;
this.done = true;
if (scroll && this.$refs.filter?.$el) {

View File

@ -76,7 +76,7 @@ export default {
releases: [],
networks: [],
pageTitle: null,
limit: 30,
limit: 29, // reserve one campaign spot
totalCount: 0,
from: null,
done: false,

View File

@ -12,18 +12,34 @@
:key="sfw"
class="nolist tiles"
>
<li
v-for="(release, index) in releases"
:key="`release-${release.id}`"
>
<SceneTile
:release="release"
:referer="referer"
:index="index"
:stash="stash"
@stash="isStashed => $emit('stash', isStashed)"
/>
</li>
<template v-for="(item, index) in items">
<li
v-if="item === 'campaign'"
:key="`campaign-${index}`"
class="campaign"
>
<Campaign
v-if="item === 'campaign'"
:entity="entity"
:min-ratio="0.75"
:max-ratio="1.25"
:allow-generic="true"
/>
</li>
<li
v-else
:key="`release-${item.id}`"
>
<SceneTile
:release="item"
:referer="referer"
:index="index"
:stash="stash"
@stash="isStashed => $emit('stash', isStashed)"
/>
</li>
</template>
</ul>
<span
@ -38,51 +54,53 @@
</div>
</template>
<script>
<script setup>
import { defineProps, defineEmits, computed } from 'vue';
import { useStore } from 'vuex';
import { useRouter } from 'vue-router';
import Campaign from '../campaigns/campaign.vue';
import Ellipsis from '../loading/ellipsis.vue';
import SceneTile from './scene-tile.vue';
function range() {
return this.$route.params.range;
}
const router = useRouter();
const store = useStore();
function sfw() {
return this.$store.state.ui.sfw;
}
defineEmits(['stash']);
export default {
components: {
Ellipsis,
SceneTile,
const props = defineProps({
releases: {
type: Array,
default: () => [],
},
props: {
releases: {
type: Array,
default: () => [],
},
context: {
type: String,
default: null,
},
done: {
type: Boolean,
default: true,
},
referer: {
type: String,
default: null,
},
stash: {
type: Object,
default: null,
},
entity: {
type: Object,
default: null,
},
emits: ['stash'],
computed: {
range,
sfw,
context: {
type: String,
default: null,
},
};
done: {
type: Boolean,
default: true,
},
referer: {
type: String,
default: null,
},
stash: {
type: Object,
default: null,
},
});
const campaignIndex = computed(() => Math.floor(Math.random() * props.releases.length - 5) + 5);
const items = computed(() => props.releases.flatMap((release, index) => (props.releases.length > 10 && index === campaignIndex.value ? ['campaign', release] : release)));
const range = computed(() => router.route?.params.range);
const sfw = computed(() => store.state.ui.sfw);
</script>
<style lang="scss" scoped>
@ -126,6 +144,12 @@ export default {
font-weight: bold;
}
.campaign {
display: flex;
align-items: flex-start;
justify-content: center;
}
@media(max-width: $breakpoint-mega) {
.tiles {
grid-template-columns: repeat(auto-fill, minmax(19rem, 1fr));

View File

@ -82,12 +82,7 @@ function initReleasesActions(store, router) {
connection: moviesConnection(
first: $limit
offset: $offset
orderBy: DATE_DESC
filter: {
date: {
isNull: false
}
}
orderBy: EFFECTIVE_DATE_DESC
) {
movies: nodes {
${movieFields}

View File

@ -1582,6 +1582,10 @@ exports.up = (knex) => Promise.resolve()
ALTER TABLE releases
ADD COLUMN effective_date timestamptz
GENERATED ALWAYS AS (COALESCE(date, created_at)) STORED;
ALTER TABLE movies
ADD COLUMN effective_date timestamptz
GENERATED ALWAYS AS (COALESCE(date, created_at)) STORED;
`);
})
// INDEXES

14
package-lock.json generated
View File

@ -79,7 +79,7 @@
"tunnel": "0.0.6",
"ua-parser-js": "^1.0.32",
"undici": "^4.13.0",
"unprint": "^0.9.4",
"unprint": "^0.10.0",
"url-pattern": "^1.0.3",
"v-tooltip": "^2.0.3",
"video.js": "^7.11.4",
@ -17512,9 +17512,9 @@
}
},
"node_modules/unprint": {
"version": "0.9.4",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.9.4.tgz",
"integrity": "sha512-KU5hU99TUQPoXBu7E6oUMPOiCzQK70oHmHRnKLaCvQHpkU+q8opzl3bsNLzs4tTUcyWn8FNIdhRS1++bmwqrWg==",
"version": "0.10.0",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.10.0.tgz",
"integrity": "sha512-HQ73xcI7wzWeK3j3EQ0wWDe1dpMmeKY4LpFg9bNPD5G8ZyqYm5hYx1KGu6J9NTV/T5tOW4Ec0zyHoCRT2Msimg==",
"dependencies": {
"axios": "^0.27.2",
"bottleneck": "^2.19.5",
@ -32339,9 +32339,9 @@
"integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw="
},
"unprint": {
"version": "0.9.4",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.9.4.tgz",
"integrity": "sha512-KU5hU99TUQPoXBu7E6oUMPOiCzQK70oHmHRnKLaCvQHpkU+q8opzl3bsNLzs4tTUcyWn8FNIdhRS1++bmwqrWg==",
"version": "0.10.0",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.10.0.tgz",
"integrity": "sha512-HQ73xcI7wzWeK3j3EQ0wWDe1dpMmeKY4LpFg9bNPD5G8ZyqYm5hYx1KGu6J9NTV/T5tOW4Ec0zyHoCRT2Msimg==",
"requires": {
"axios": "^0.27.2",
"bottleneck": "^2.19.5",

View File

@ -138,7 +138,7 @@
"tunnel": "0.0.6",
"ua-parser-js": "^1.0.32",
"undici": "^4.13.0",
"unprint": "^0.9.4",
"unprint": "^0.10.0",
"url-pattern": "^1.0.3",
"v-tooltip": "^2.0.3",
"video.js": "^7.11.4",

View File

@ -1367,6 +1367,10 @@ const aliases = [
name: 'big tits',
for: 'big-boobs',
},
{
name: 'big tits d-dd cup',
for: 'big-boobs',
},
{
name: 'busty - big boobs',
for: 'big-boobs',

View File

@ -717,11 +717,6 @@ const sites = [
slug: 'archangel',
name: 'ArchAngel',
url: 'https://www.archangelvideo.com',
parameters: {
latest: 'https://www.archangelvideo.com/tour/categories/movies/{page}/latest/',
profile: 'https://www.archangelvideo.com/tour/models/{slug}.html',
sets: 'https://www.archangelvideo.com/tour/sets.php',
},
},
// ASSYLUM
{

View File

@ -697,6 +697,7 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
channel: entity,
network: entity?.parent,
entity,
include,
scraper: scraperSlug,
parameters: getRecursiveParameters(entity),
};

173
src/scrapers/archangel.js Executable file
View File

@ -0,0 +1,173 @@
'use strict';
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const { feetInchesToCm } = require('../utils/convert');
function getEntryId(release) {
return slugify(new URL(release.url).pathname.match(/\/([\w-]+)\.html/)?.[1]
|| [unprint.formatDate(release.date, 'YYYY-MM-DD'), release.title, ...release.actors]);
}
function scrapeAll(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('a');
release.title = query.content('a span');
release.date = query.date('.timeDate', 'YYYY-MM-DD');
release.duration = query.duration('.timeDate');
release.actors = query.all('a[href*="models/"], a[href*="sets.php"]').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null),
}));
release.poster = query.img('img.mainThumb');
release.photoCount = query.number('.timeDate');
release.entryId = getEntryId(release);
return release;
});
}
function scrapeScene({ query, html }, { url }) {
const release = { url };
release.title = query.content('.title h2');
release.description = query.content('.description p');
release.date = query.date('.info p', 'MMMM D, YYYY');
release.duration = query.duration('.info p');
release.actors = query.all('.info a[href*="models/"], .info a[href*="sets.php"]').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null),
}));
release.poster = query.img('.update_thumb') || html.match(/poster="(.*\.jpg)"/)?.[1];
release.trailer = html.match(/src="(.*\.mp4)"/)?.[1];
release.photoCount = query.number('.info', { match: /(\d+) photos/i, matchIndex: 1 });
release.tags = query.contents('.info .tags a');
release.entryId = getEntryId(release);
return release;
}
function scrapeMovie({ query, element }, { entity, url }) {
const release = { url };
release.title = query.content('.title h2');
release.description = query.content('.aboutArea p');
release.covers = [[
query.img('.update_thumb', { attribute: 'src0_2x', origin: entity.url }),
query.img('.update_thumb', { attribute: 'src0_1x', origin: entity.url }),
query.img('.update_thumb', { attribute: 'src0', origin: entity.url }),
// usually upscaled
query.img('.update_thumb', { attribute: 'src0_4x', origin: entity.url }),
query.img('.update_thumb', { attribute: 'src0_3x', origin: entity.url }),
].filter(Boolean)];
release.entryId = getEntryId(release);
release.scenes = scrapeAll(unprint.initAll(element, '.item-video'));
return release;
}
function scrapeProfile({ query, element }, { url, entity }) {
const profile = { url };
const bio = Object.fromEntries(query.all('.stats li')
.map((row) => [
slugify(unprint.query.content(row, '.data-name, span'), '_'),
unprint.query.text(row),
])
.filter(([key, value]) => key && value));
profile.description = query.content('.aboutArea p');
profile.birthPlace = bio.place_of_birth;
profile.dateOfBirth = unprint.extractDate(bio.age, 'MMMM D, YYYY');
profile.height = Number(bio.height?.match(/(\d+)\s*cm/)?.[1]) || (/\d fe*t \d+ inch/i.test(bio.height) && feetInchesToCm(bio.height)) || null;
profile.measurements = bio.measurements;
profile.hairColor = bio.hair_color;
profile.eyes = bio.eye_color;
profile.avatar = [
query.img('.model_bio_thumb', { attribute: 'src0_4x', origin: entity.url }),
query.img('.model_bio_thumb', { attribute: 'src0_3x', origin: entity.url }),
query.img('.model_bio_thumb', { attribute: 'src0_2x', origin: entity.url }),
query.img('.model_bio_thumb', { attribute: 'src0_1x', origin: entity.url }),
query.img('.model_bio_thumb', { attribute: 'src0', origin: entity.url }),
].filter(Boolean);
profile.scenes = scrapeAll(unprint.initAll(element, '.item-video'));
return profile;
}
async function fetchLatest(channel, page = 1) {
const url = `${channel.url}/tour/categories/movies_${page}_d.html`;
const res = await unprint.get(url, { selectAll: '.item-video' });
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
async function fetchProfile({ name: actorName, url: actorUrl }, { entity, include }) {
const res = await [
actorUrl,
`${entity.url}/tour/models/${slugify(actorName, '-')}.html`,
`${entity.url}/tour/models/${slugify(actorName, '')}.html`,
].reduce(async (chain, url) => {
const prevRes = await chain;
if (prevRes.ok || !url) {
return prevRes;
}
const actorRes = await unprint.get(url);
if (actorRes.ok) {
return {
...actorRes,
url,
};
}
return prevRes;
}, Promise.resolve({ ok: false, status: null }));
if (res.ok) {
return scrapeProfile(res.context, { entity, include, url: res.url });
}
return res.status;
}
module.exports = {
fetchLatest,
fetchProfile,
scrapeScene: {
scraper: scrapeScene,
unprint: true,
},
scrapeMovie: {
scraper: scrapeMovie,
unprint: true,
},
};

View File

@ -1,6 +1,7 @@
'use strict';
const adultempire = require('./adultempire');
const archangel = require('./archangel');
const assylum = require('./assylum');
const aziani = require('./aziani');
const amateurallure = require('./amateurallure');
@ -79,7 +80,7 @@ const scrapers = {
amateurallure,
americanpornstar,
amateureuro: porndoe,
archangel: bamvisions,
archangel,
assylum,
aziani,
badoink,
@ -175,7 +176,7 @@ const scrapers = {
analized: fullpornnetwork,
analviolation: fullpornnetwork,
anilos: nubiles,
archangel: bamvisions,
archangel,
aziani,
babes: mindgeek,
babevr: badoink,

View File

@ -242,7 +242,7 @@ async function fetchLatest(entity, page, options) {
// select from configured random image source
release.poster = `${options.source}?id=${nanoid()}`; // ensure source is unique
release.photos = Array.from({ length: Math.floor(Math.random() * 10) + 1 }, () => `${options.source}?id=${nanoid()}`); // ensure source is unique
} else {
} else if (options.includeMedia) {
// select from local SFW database
const [poster, ...photos] = await knex('media')
.select('path')