Updated Jules Jordan scraper for new update page layout.
This commit is contained in:
parent
2018d5c298
commit
b93a5715cb
|
@ -136,14 +136,18 @@ function getEntryId(html) {
|
||||||
function scrapeAll(scenes, site, entryIdFromTitle) {
|
function scrapeAll(scenes, site, entryIdFromTitle) {
|
||||||
return scenes.map(({ el, query }) => {
|
return scenes.map(({ el, query }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
const title = query.cnt('.content_img div, .dvd_info > a, a ~ a');
|
||||||
|
|
||||||
release.url = query.url('.update_title a, .dvd_info > a, a ~ a');
|
release.title = title.slice(0, title.match(/starring:/i)?.index || Infinity).trim();
|
||||||
release.title = query.q('.update_title a, .dvd_info > a, a ~ a', true);
|
release.url = query.url('.content_img a, .dvd_info > a, a ~ a');
|
||||||
release.date = query.date('.update_date', 'MM/DD/YYYY');
|
release.date = query.date('.update_date', 'MM/DD/YYYY');
|
||||||
|
|
||||||
release.entryId = (entryIdFromTitle && slugify(release.title)) || el.dataset.setid || query.q('.rating_box')?.dataset.id;
|
release.entryId = (entryIdFromTitle && slugify(release.title)) || el.dataset.setid || query.q('.rating_box')?.dataset.id;
|
||||||
|
|
||||||
release.actors = query.all('.update_models a', true);
|
release.actors = query.all('.content_img .update_models a').map((actorEl) => ({
|
||||||
|
name: query.cnt(actorEl),
|
||||||
|
url: query.url(actorEl, null),
|
||||||
|
}));
|
||||||
|
|
||||||
const dvdPhotos = query.imgs('.dvd_preview_thumb');
|
const dvdPhotos = query.imgs('.dvd_preview_thumb');
|
||||||
const photoCount = Number(query.q('a img.thumbs', 'cnt')) || 1;
|
const photoCount = Number(query.q('a img.thumbs', 'cnt')) || 1;
|
||||||
|
@ -183,9 +187,9 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
|
||||||
}).filter(Boolean);
|
}).filter(Boolean);
|
||||||
|
|
||||||
const teaserScript = query.html('script');
|
const teaserScript = query.html('script');
|
||||||
|
|
||||||
if (teaserScript) {
|
if (teaserScript) {
|
||||||
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
release.teaser = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
||||||
if (src) release.teaser = { src };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
|
@ -235,17 +239,21 @@ function scrapeUpcoming(html, site) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeScene({ html, query }, url, site, include) {
|
async function scrapeScene({ html, query }, url, site, options) {
|
||||||
const release = { url, site };
|
const release = {};
|
||||||
|
|
||||||
release.entryId = getEntryId(html);
|
release.entryId = getEntryId(html);
|
||||||
release.title = query.q('.title_bar_hilite', true);
|
release.title = query.cnt('.title_bar_hilite');
|
||||||
release.description = query.q('.update_description', true);
|
release.description = query.cnt('.update_description');
|
||||||
|
|
||||||
release.date = query.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
|
release.date = query.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
|
||||||
|
|
||||||
release.actors = query.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
|
release.actors = query.all('.backgroundcolor_info > .update_models a, .item .update_models a').map((actorEl) => ({
|
||||||
release.tags = query.all('.update_tags a', true);
|
name: query.cnt(actorEl),
|
||||||
|
url: query.url(actorEl, null),
|
||||||
|
}));
|
||||||
|
|
||||||
|
release.tags = query.cnts('.update_tags a');
|
||||||
|
|
||||||
const posterPath = html.match(/useimage = "(.*)"/)?.[1];
|
const posterPath = html.match(/useimage = "(.*)"/)?.[1];
|
||||||
|
|
||||||
|
@ -260,7 +268,7 @@ async function scrapeScene({ html, query }, url, site, include) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (include.trailer && site.slug !== 'manuelferrara') {
|
if (options.includeTrailers && site.slug !== 'manuelferrara') {
|
||||||
const trailerLines = html.split('\n').filter((line) => /movie\["trailer\w*"\]\[/i.test(line));
|
const trailerLines = html.split('\n').filter((line) => /movie\["trailer\w*"\]\[/i.test(line));
|
||||||
|
|
||||||
if (trailerLines.length) {
|
if (trailerLines.length) {
|
||||||
|
@ -277,19 +285,20 @@ async function scrapeScene({ html, query }, url, site, include) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (include.photos) release.photos = await getPhotos(release.entryId, site);
|
if (options.includePhotos) {
|
||||||
|
release.photos = await getPhotos(release.entryId, site);
|
||||||
|
}
|
||||||
|
|
||||||
if (query.exists('.update_dvds a')) {
|
if (query.exists('.update_dvds a')) {
|
||||||
release.movie = {
|
release.movie = {
|
||||||
url: query.url('.update_dvds a'),
|
url: query.url('.update_dvds a'),
|
||||||
title: query.q('.update_dvds a', true),
|
title: query.cnt('.update_dvds a'),
|
||||||
};
|
};
|
||||||
|
|
||||||
release.movie.entryId = new URL(release.movie.url).pathname.split('/').slice(-1)[0]?.replace('.html', '');
|
release.movie.entryId = new URL(release.movie.url).pathname.split('/').slice(-1)[0]?.replace('.html', '');
|
||||||
}
|
}
|
||||||
|
|
||||||
const stars = Number(query.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
|
release.stars = query.number('.avg_rating');
|
||||||
if (stars) release.stars = stars;
|
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
@ -298,7 +307,7 @@ function scrapeMovie({ el, query }, url, site) {
|
||||||
const movie = { url, site };
|
const movie = { url, site };
|
||||||
|
|
||||||
movie.entryId = new URL(url).pathname.split('/').slice(-1)[0]?.replace('.html', '');
|
movie.entryId = new URL(url).pathname.split('/').slice(-1)[0]?.replace('.html', '');
|
||||||
movie.title = query.q('.title_bar span', true);
|
movie.title = query.cnt('.title_bar span');
|
||||||
movie.covers = query.urls('#dvd-cover-flip > a');
|
movie.covers = query.urls('#dvd-cover-flip > a');
|
||||||
movie.channel = slugify(query.q('.update_date a', true), '');
|
movie.channel = slugify(query.q('.update_date a', true), '');
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue