Added conditions to Tokyo Hot scraper to prevent total failure.

This commit is contained in:
DebaucheryLibrarian 2023-07-31 23:41:32 +02:00
parent d2f81d446b
commit ae64c5225f
13 changed files with 170 additions and 95 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

View File

@ -5991,6 +5991,13 @@ const sites = [
independent: true, independent: true,
parent: 'modelmedia', parent: 'modelmedia',
}, },
{
slug: 'jerkaoke',
name: 'Jerkaoke',
url: 'https://www.jerkaoke.com',
independent: true,
parent: 'modelmedia',
},
// MOFOS // MOFOS
{ {
slug: 'girlsgonepink', slug: 'girlsgonepink',

View File

@ -279,7 +279,6 @@ async function findSourceDuplicates(baseMedias) {
const [existingSourceMedia, existingExtractMedia] = await Promise.all([ const [existingSourceMedia, existingExtractMedia] = await Promise.all([
// may try to check thousands of URLs at once, don't pass all of them to a single query // may try to check thousands of URLs at once, don't pass all of them to a single query
chunk(sourceUrls).reduce(async (chain, sourceUrlsChunk) => { chunk(sourceUrls).reduce(async (chain, sourceUrlsChunk) => {
console.log(sourceUrlsChunk);
const accUrls = await chain; const accUrls = await chain;
const existingUrls = await knex('media').whereIn('source', sourceUrlsChunk); const existingUrls = await knex('media').whereIn('source', sourceUrlsChunk);

View File

@ -1,85 +0,0 @@
'use strict';
const unprint = require('unprint');
const http = require('../utils/http');
const qu = require('../utils/qu');
function scrapeAll(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.title = query.content('.video-title div');
console.log(release);
return release;
});
}
async function fetchLatest(channel, page) {
const session = http.session();
await http.get(channel.url, { session });
await http.post(`${channel.url}/adult_confirmation_and_accept_cookie`, null, { session });
const checkRes = await http.get(`${channel.url}/check_adult_confirmation_and_accept_cookie`, { session });
const res = await http.get(`${channel.url}/videos?sort=published_at&page=${page}`, { session });
// const res = await http.get(`${channel.url}/videos?sort=published_at&page=${page}`, { selectAll: '.row a[video-id]' });
console.log(checkRes.body);
console.log(res.body);
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
/*
function confirmAdultCookie() {
// console.log('confirmed Adult');
$('#adult_confirmation').modal('hide');
var adultCookieRequest = $.ajax({
xhrFields: {
withCredentials: true
},
type: "POST",
url: "https://www.delphinefilms.com/adult_confirmation_and_accept_cookie"
});
$.when(adultCookieRequest).done(function() {
// console.log('Both requests completed successfully');
waitForSessionUpdateAndRefresh();
});
return false;
}
function waitForSessionUpdateAndRefresh() {
var interval = setInterval(function() {
$.ajax({
type: "GET",
url: "https://www.delphinefilms.com/check_adult_confirmation_and_accept_cookie",
success: function(response) {
// console.log(response)
if (response === "1") {
// console.log('Session updated: Adult confirmed and cookie accepted');
clearInterval(interval);
location.reload();
} else {
// console.log('Session not updated: Adult not confirmed or cookie not accepted');
}
}
});
}, 100);
}
*/
module.exports = {
fetchLatest,
};

151
src/scrapers/modelmedia.js Normal file
View File

@ -0,0 +1,151 @@
'use strict';
const unprint = require('unprint');
function scrapeAll(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.entryId = query.attribute(null, 'video-id');
const url = query.url(null);
if (url) {
const { origin, pathname, searchParams } = new URL(url);
release.url = `${origin}${pathname}`;
release.actors = searchParams.get('models_name')?.split(',');
}
release.title = query.content('.video-title div');
release.duration = query.duration('.timestamp');
const poster = query.img('img', { attribute: 'data-src' });
if (poster) {
release.poster = [
poster.replace(/w=\d+/, 'w=1920').replace(/h=\d+/, 'h=1080'),
poster,
];
}
release.teaser = query.video(null, { attribute: 'data-video-src' });
console.log(release);
return release;
});
}
function scrapeProfile({ query }) {
const profile = {};
const avatar = query.img('div[class*="prof-pic"] > img');
if (avatar) {
profile.avatar = [
avatar.replace(/w=\d+/, 'w=720').replace(/h=\d+/, 'h=1080'),
avatar,
];
}
profile.description = query.content('h2') || null;
profile.height = query.number('//span[text()="Measurements"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
profile.weight = query.number('//span[text()="Weight"]/following-sibling::span', { match: /(\d+) kg/, matchIndex: 1 });
profile.measurements = query.number('//span[text()="Birth Place"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
profile.birthPlace = query.number('//span[text()="Birth Place"]/following-sibling::span');
profile.banner = query.img('div[class*="banner"] > img');
profile.photos = query.imgs('#MusModelSwiper img');
console.log(profile);
return profile;
}
async function getCookie(channel) {
const tokenRes = await unprint.get(channel.url);
if (!tokenRes.ok) {
return tokenRes.status;
}
const csrfToken = tokenRes.context?.query.attribute('meta[name="csrf-token"]', 'content');
const cookie = tokenRes.response.headers['set-cookie']?.join(';');
if (!csrfToken || !cookie) {
return null;
}
const confirmAdultRes = await unprint.post(`${channel.url}/adult_confirmation_and_accept_cookie`, null, {
headers: {
cookie,
'x-csrf-token': csrfToken,
},
});
if (!confirmAdultRes.ok) {
return confirmAdultRes.status;
}
return cookie;
}
async function fetchLatest(channel, page) {
const cookie = await getCookie(channel);
const res = await unprint.get(`${channel.url}/videos?sort=published_at&page=${page}`, {
selectAll: '.row a[video-id]',
headers: {
cookie,
},
});
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
// deep pages are paywalled
async function searchProfile(actor, context, cookie) {
const searchRes = await unprint.get(`${context.channel.url}/livesearch?keyword=${actor.name}`, {
headers: {
cookie,
},
});
if (!searchRes.ok) {
return searchRes.status;
}
return searchRes.context.query.url(`a[title="${actor.name}"]`);
}
async function fetchProfile(actor, context) {
const cookie = await getCookie(context.entity);
const actorUrl = actor.url || await searchProfile(actor, context, cookie);
if (!actorUrl) {
return null;
}
const res = await unprint.get(actorUrl, {
headers: {
cookie,
},
});
if (res.ok) {
return scrapeProfile(res.context, actorUrl);
}
return null;
}
module.exports = {
fetchLatest,
fetchProfile,
};

View File

@ -15,7 +15,7 @@ const cherrypimps = require('./cherrypimps');
const cumlouder = require('./cumlouder'); const cumlouder = require('./cumlouder');
const czechav = require('./czechav'); const czechav = require('./czechav');
const ddfnetwork = require('./ddfnetwork'); const ddfnetwork = require('./ddfnetwork');
const delphine = require('./delphine'); const modelmedia = require('./modelmedia');
const dorcel = require('./dorcel'); const dorcel = require('./dorcel');
const fabulouscash = require('./fabulouscash'); const fabulouscash = require('./fabulouscash');
const famedigital = require('./famedigital'); const famedigital = require('./famedigital');
@ -95,7 +95,7 @@ const scrapers = {
cumlouder, cumlouder,
czechav, czechav,
pornworld: ddfnetwork, pornworld: ddfnetwork,
delphine, delphine: modelmedia,
dorcel, dorcel,
elegantangel: adultempire, elegantangel: adultempire,
famedigital, famedigital,
@ -191,6 +191,7 @@ const scrapers = {
blackedraw: vixen, blackedraw: vixen,
blackambush: elevatedx, blackambush: elevatedx,
bluedonkeymedia, bluedonkeymedia,
delphine: modelmedia,
meidenvanholland: bluedonkeymedia, meidenvanholland: bluedonkeymedia,
vurigvlaanderen: bluedonkeymedia, vurigvlaanderen: bluedonkeymedia,
boobpedia, boobpedia,

View File

@ -19,10 +19,12 @@ function scrapeAll(scenes, channel) {
const poster = query.img(); const poster = query.img();
if (poster) {
release.poster = [ release.poster = [
poster.replace('220x124', '820x462'), poster.replace('220x124', '820x462'),
poster, poster,
]; ];
}
return release; return release;
}); });
@ -59,12 +61,12 @@ function scrapeScene({ query }, url, channel) {
release.photos = query.imgs('.scap a', { attribute: 'href' }).map((img) => [ release.photos = query.imgs('.scap a', { attribute: 'href' }).map((img) => [
img, img,
img.replace('640x480_wlimited', '150x150_default'), img?.replace('640x480_wlimited', '150x150_default'),
]); ]);
release.caps = query.imgs('.vcap a', { attribute: 'href' }).map((img) => [ release.caps = query.imgs('.vcap a', { attribute: 'href' }).map((img) => [
img, img,
img.replace('640x480_wlimited', '120x120_default'), img?.replace('640x480_wlimited', '120x120_default'),
]); ]);
return release; return release;
@ -109,7 +111,7 @@ function scrapeProfile({ query }) {
profile.hairStyle = bio.hair_style; profile.hairStyle = bio.hair_style;
profile.shoeSize = getMeasurement(bio.shoes_size); profile.shoeSize = getMeasurement(bio.shoes_size);
profile.bloodType = bio.blood_type.replace('type', '').trim(); profile.bloodType = bio.blood_type?.replace('type', '').trim();
profile.avatar = query.img('#profile img'); profile.avatar = query.img('#profile img');