Fixed various Kelly Madison scraper issues.
After Width: | Height: | Size: 1017 KiB |
After Width: | Height: | Size: 874 KiB |
After Width: | Height: | Size: 8.6 KiB |
After Width: | Height: | Size: 8.5 KiB |
After Width: | Height: | Size: 729 KiB |
After Width: | Height: | Size: 45 KiB |
After Width: | Height: | Size: 44 KiB |
Before Width: | Height: | Size: 5.7 MiB After Width: | Height: | Size: 5.7 MiB |
Before Width: | Height: | Size: 550 KiB After Width: | Height: | Size: 550 KiB |
Before Width: | Height: | Size: 839 KiB After Width: | Height: | Size: 839 KiB |
Before Width: | Height: | Size: 3.0 MiB After Width: | Height: | Size: 3.0 MiB |
Before Width: | Height: | Size: 3.9 MiB After Width: | Height: | Size: 3.9 MiB |
Before Width: | Height: | Size: 90 KiB After Width: | Height: | Size: 90 KiB |
Before Width: | Height: | Size: 699 KiB After Width: | Height: | Size: 699 KiB |
Before Width: | Height: | Size: 699 KiB After Width: | Height: | Size: 699 KiB |
Before Width: | Height: | Size: 706 KiB After Width: | Height: | Size: 706 KiB |
Before Width: | Height: | Size: 708 KiB After Width: | Height: | Size: 708 KiB |
Before Width: | Height: | Size: 821 KiB After Width: | Height: | Size: 821 KiB |
Before Width: | Height: | Size: 682 KiB After Width: | Height: | Size: 682 KiB |
Before Width: | Height: | Size: 843 KiB After Width: | Height: | Size: 843 KiB |
Before Width: | Height: | Size: 6.5 MiB After Width: | Height: | Size: 6.5 MiB |
|
@ -788,8 +788,9 @@ const tagPhotos = [
|
|||
['ebony', 1, 'Ana Foxxx in "DP Me 4" for HardX'],
|
||||
['facial', 2, 'Ashly Anderson for Hookup Hotshot'],
|
||||
['facial', 'poster', 'Jynx Maze'],
|
||||
['facefucking', 0, 'Ashly Anderson in "Rough Love" for Hookup Hotshot'],
|
||||
['facefucking', 6, 'Halle Hayes in "Towering Temptress" for 5K Porn'],
|
||||
['facefucking', 1, 'Paige Owens in "Dark Meat 12" for Evil Angel'],
|
||||
['facefucking', 0, 'Ashly Anderson in "Rough Love" for Hookup Hotshot'],
|
||||
['facefucking', 2, 'Jynx Maze for Throated'],
|
||||
['facefucking', 4, 'Brooklyn Gray in "Throats Fucks 6" for Evil Angel'],
|
||||
['facefucking', 3, 'Adriana Chechik in "Performing Magic Butt Tricks With Jules Jordan. What Will Disappear In Her Ass?" for Jules Jordan'],
|
||||
|
|
|
@ -17,9 +17,9 @@ const siteMapBySlug = Object.entries(siteMapByKey).reduce((acc, [key, value]) =>
|
|||
|
||||
function scrapeLatest(scenes, site) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = { site };
|
||||
const release = {};
|
||||
|
||||
release.shootId = query.q('.card-meta .text-right, .row .text-right', true);
|
||||
release.shootId = query.q('.card-meta .text-right, .row .text-right, .card-footer-item:last-child', true);
|
||||
|
||||
const siteId = release.shootId.match(/\d?\w{2}/)[0];
|
||||
const siteSlug = siteMapByKey[siteId];
|
||||
|
@ -29,24 +29,24 @@ function scrapeLatest(scenes, site) {
|
|||
return null;
|
||||
}
|
||||
|
||||
const { pathname } = new URL(query.url('h5 a, .ep-title a'));
|
||||
const { pathname } = new URL(query.url('h5 a, .ep-title a, .title a'));
|
||||
[release.entryId] = pathname.match(/\d+$/);
|
||||
release.url = `${site.url}${pathname}`;
|
||||
|
||||
release.title = query.q('h5 a, .ep-title a', true);
|
||||
release.title = query.cnt('h5 a, .ep-title a, .title a');
|
||||
|
||||
release.date = query.date('.card-meta .text-left, .row .col-4:first-child', ['MMM D', 'MMM D, YYYY'], /\w+ \d+(, \w+)?/);
|
||||
release.actors = query.all('.models a, .ep-models a', true);
|
||||
release.date = query.date('.card-meta .text-left, .row .col-4:first-child, .card-footer-item:first-child', ['MMM D', 'MMM D, YYYY'], /\w+ \d+(, \w+)?/);
|
||||
release.actors = query.cnts('.models a, .ep-models a, a[href*="models/"]');
|
||||
|
||||
release.duration = query.dur('.content a');
|
||||
|
||||
const duration = query.q('.content a, .ep-runtime strong', true).match(/(\d+) min/)[1];
|
||||
const duration = query.cnt('.content a, .ep-runtime strong, .subtitle:last-child a')?.match(/(\d+) min/)?.[1];
|
||||
if (duration) release.duration = Number(duration) * 60;
|
||||
|
||||
if (query.exists('.episodes-preview')) {
|
||||
[release.poster, ...release.photos] = query.imgs('.episodes-preview img');
|
||||
} else {
|
||||
release.poster = query.img('.card-img-top');
|
||||
release.poster = query.img('.card-img-top, .image img');
|
||||
release.teaser = {
|
||||
src: query.video('video'),
|
||||
};
|
||||
|
@ -56,32 +56,32 @@ function scrapeLatest(scenes, site) {
|
|||
}).filter(scene => scene);
|
||||
}
|
||||
|
||||
async function scrapeScene({ query, html }, url, baseRelease) {
|
||||
const { pathname, origin } = new URL(url);
|
||||
async function scrapeScene({ query, html }, url, baseRelease, channel, session) {
|
||||
const { pathname } = new URL(url);
|
||||
const release = {};
|
||||
|
||||
[release.entryId] = pathname.match(/\d+$/);
|
||||
|
||||
const titleString = query.q('.card-header.row h4, .trailer-starring span', true);
|
||||
const episode = titleString.match(/#\d+$/)[0];
|
||||
const titleString = query.cnt('.card-header.row h4, .trailer-starring span, .level-left .level-item');
|
||||
const episode = titleString?.match(/#\d+$/)?.[0];
|
||||
|
||||
release.title = query.q('.trailer-title', true) || titleString.match(/Trailer: ([\w\s]+) -/)[1];
|
||||
release.channel = slugify(titleString.match(/([\w\s]+) #\d+$/)[1], '');
|
||||
release.title = query.cnt('.trailer-title') || titleString?.match(/(?:Trailer: )?([\w\s]+) -/)?.[1];
|
||||
release.channel = slugify(titleString?.match(/([\w\s]+) #\d+$/)?.[1], '');
|
||||
|
||||
const siteKey = siteMapBySlug[release.channel];
|
||||
|
||||
release.shootId = `${siteKey} ${episode}`;
|
||||
release.description = query.q('p.card-text, h5.heavy + p', true);
|
||||
release.description = query.text('p.card-text, h5.heavy + p, .card-content .is-three-fifths');
|
||||
|
||||
// order not reliable, get keys
|
||||
const detailElsByKey = query.all('.card-body h4.card-title, .video-summary h5').reduce((acc, rowEl) => ({
|
||||
const detailElsByKey = query.all('.card-body h4.card-title, .video-summary h5, .columns li').reduce((acc, rowEl) => ({
|
||||
...acc,
|
||||
[slugify(rowEl?.textContent.match(/(\w+):/)?.[1])]: rowEl,
|
||||
}), {});
|
||||
|
||||
release.date = query.date(detailElsByKey.published, null, 'YYYY-MM-DD');
|
||||
release.date = query.date(detailElsByKey.published, null, 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
||||
release.duration = query.dur(detailElsByKey.episode);
|
||||
release.actors = query.all(detailElsByKey.starring, 'a', true);
|
||||
release.actors = query.cnts(detailElsByKey.starring, 'a');
|
||||
|
||||
const posterPrefix = html.indexOf('poster:');
|
||||
const poster = query.img('.trailer-poster') || html.slice(html.indexOf('http', posterPrefix), html.indexOf('.jpg', posterPrefix) + 4);
|
||||
|
@ -94,20 +94,20 @@ async function scrapeScene({ query, html }, url, baseRelease) {
|
|||
}
|
||||
}
|
||||
|
||||
const token = query.meta('name=_token');
|
||||
const trailerInfoUrl = `${origin}/episodes/trailer/sources/${release.entryId}?type=trailer`;
|
||||
const trailerInfoRes = await http.post(trailerInfoUrl, null, {
|
||||
headers: {
|
||||
'X-CSRF-Token': token,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
// const token = query.meta('name=_token');
|
||||
// const trailerInfoUrl = `${channel.url}/episodes/trailer/sources/${release.entryId}?type=trailer`;
|
||||
const trailerInfoUrl = html.match(/'(http.*\/trailer\/sources.*)'/)?.[1];
|
||||
const trailerInfoRes = await http.post(trailerInfoUrl, null, { session });
|
||||
|
||||
if (trailerInfoRes.ok && trailerInfoRes.body.sources.length > 0) {
|
||||
if (trailerInfoRes.ok && trailerInfoRes.body.sources?.length > 0) {
|
||||
release.trailer = trailerInfoRes.body.sources.map(trailer => ({
|
||||
src: trailer.src,
|
||||
type: trailer.type,
|
||||
quality: trailer.res.replace(4000, 2160),
|
||||
/* unreliable, sometimes actual video is 720p
|
||||
quality: trailer.res
|
||||
.replace(4000, 2160)
|
||||
.replace(5000, 2880),
|
||||
*/
|
||||
}));
|
||||
}
|
||||
|
||||
|
@ -151,11 +151,13 @@ async function fetchLatest(channel, page = 1) {
|
|||
}
|
||||
|
||||
async function fetchScene(url, channel, baseRelease) {
|
||||
const session = http.session();
|
||||
|
||||
const res = await qu.get(url, null, {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
});
|
||||
}, { session });
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, baseRelease) : res.status;
|
||||
return res.ok ? scrapeScene(res.item, url, baseRelease, channel, session) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }) {
|
||||
|
|
|
@ -79,6 +79,10 @@ async function request(method = 'get', url, body, requestOptions = {}, limiter)
|
|||
const options = {
|
||||
...defaultOptions,
|
||||
...requestOptions,
|
||||
headers: {
|
||||
...defaultOptions.headers,
|
||||
...requestOptions.headers,
|
||||
},
|
||||
responseTimeout: requestOptions.responseTimeout || requestOptions.timeout || 60000,
|
||||
stream: !!requestOptions.destination,
|
||||
session: null,
|
||||
|
@ -158,7 +162,7 @@ async function head(url, options) {
|
|||
}
|
||||
|
||||
function getSession(options) {
|
||||
return bhttp.session(options);
|
||||
return bhttp.session({ ...defaultOptions, ...options });
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
|
|
@ -69,6 +69,10 @@ function prefixUrl(urlValue, origin, protocol = 'https') {
|
|||
}
|
||||
|
||||
function q(context, selector, attrArg, applyTrim = true) {
|
||||
if (!selector && context.nodeName === '#document') {
|
||||
return null;
|
||||
}
|
||||
|
||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||
|
||||
if (attr) {
|
||||
|
|