Added update url parameter to Dogfart scraper.

This commit is contained in:
DebaucheryLibrarian 2022-04-04 18:21:51 +02:00
parent 430c7e124d
commit 0d95746689
4 changed files with 21 additions and 12 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

View File

@ -2785,6 +2785,9 @@ const sites = [
name: 'Gloryhole Initiations',
url: 'https://www.gloryhole-initiations.com',
parent: 'dogfartnetwork',
parameters: {
latest: 'https://www.gloryhole-initiations.com/tourx/scenes',
},
},
{
slug: 'dogfartbehindthescenes',

View File

@ -100,6 +100,13 @@ const banners = [
channel: 'archangel',
tags: ['dp', 'anal', 'sex', 'interracial', 'black'],
},
{
id: 'wefuckblackgirls_728_90_loss',
width: 728,
height: 90,
network: 'dogfartnetwork',
tags: ['mfm', 'threesome', 'anal', 'black', 'interracial'],
},
{
id: 'evilangel_728_90_adriana_chechik_gangbang',
width: 728,
@ -795,7 +802,7 @@ const campaigns = [
},
];
exports.seed = async knex => Promise.resolve()
exports.seed = async (knex) => Promise.resolve()
.then(async () => {
await Promise.all([
knex('campaigns').delete(),
@ -810,19 +817,19 @@ exports.seed = async knex => Promise.resolve()
const [networks, channels, tags] = await Promise.all([
knex('entities')
.where('type', 'network')
.whereIn('slug', campaigns.concat(banners).map(link => link.network).filter(Boolean)),
.whereIn('slug', campaigns.concat(banners).map((link) => link.network).filter(Boolean)),
knex('entities')
.where('type', 'channel')
.whereIn('slug', campaigns.concat(banners).map(link => link.channel).filter(Boolean)),
.whereIn('slug', campaigns.concat(banners).map((link) => link.channel).filter(Boolean)),
knex('tags')
.whereIn('slug', banners.flatMap(banner => banner.tags || [])),
.whereIn('slug', banners.flatMap((banner) => banner.tags || [])),
]);
const networksBySlug = networks.reduce((acc, network) => ({ ...acc, [network.slug]: network }), {});
const channelsBySlug = channels.reduce((acc, channel) => ({ ...acc, [channel.slug]: channel }), {});
const tagsBySlug = tags.reduce((acc, tag) => ({ ...acc, [tag.slug]: tag }), {});
const affiliatesWithEntityId = affiliates.map(affiliate => ({
const affiliatesWithEntityId = affiliates.map((affiliate) => ({
id: affiliate.id,
entity_id: networksBySlug[affiliate.network]?.id || channelsBySlug[affiliate.channel]?.id || null,
url: affiliate.url,
@ -830,7 +837,7 @@ exports.seed = async knex => Promise.resolve()
comment: affiliate.comment,
}));
const bannersWithEntityId = banners.map(banner => ({
const bannersWithEntityId = banners.map((banner) => ({
id: banner.id,
width: banner.width,
height: banner.height,
@ -838,17 +845,17 @@ exports.seed = async knex => Promise.resolve()
entity_id: networksBySlug[banner.network]?.id || channelsBySlug[banner.channel]?.id || null,
}));
const bannerTags = banners.flatMap(banner => banner.tags?.map(tag => ({
const bannerTags = banners.flatMap((banner) => banner.tags?.map((tag) => ({
banner_id: banner.id,
tag_id: tagsBySlug[tag].id,
})) || []);
const campaignsWithEntityIdAndAffiliateId = campaigns.map(campaign => ({
const campaignsWithEntityIdAndAffiliateId = campaigns.map((campaign) => ({
entity_id: networksBySlug[campaign.network]?.id || channelsBySlug[campaign.channel]?.id,
url: campaign.url,
affiliate_id: campaign.affiliate,
banner_id: campaign.banner,
})).filter(link => link.entity_id && (link.url || link.affiliate_id));
})).filter((link) => link.entity_id && (link.url || link.affiliate_id));
await knex('affiliates').insert(affiliatesWithEntityId);
await bulkInsert('banners', bannersWithEntityId, false);

View File

@ -93,9 +93,8 @@ async function scrapeScene({ query }, url, channel, baseScene, parameters) {
return release;
}
async function fetchLatest(channel, page = 1) {
// const res = await qu.getAll(`https://dogfartnetwork.com/tour/scenes/?p=${page}`, '.recent-updates');
const res = await qu.getAll(`${channel.url}/tour/scenes/?p=${page}`, '.recent-updates, .preview-image-container');
async function fetchLatest(channel, page = 1, { parameters }) {
const res = await qu.getAll(parameters.latest ? `${parameters.latest}/?p=${page}` : `${channel.url}/tour/scenes/?p=${page}`, '.recent-updates, .preview-image-container');
if (res.ok) {
return scrapeLatest(res.items, channel);