Fixed PornDoe scraper.
This commit is contained in:
@@ -10,21 +10,26 @@ function scrapeAll(scenes) {
|
||||
release.url = query.url('a');
|
||||
release.entryId = release.url.match(/\/watch\/(\d+)/)[1];
|
||||
|
||||
release.title = query.cnt('.bottom .h4') || query.q('.bottom .link', 'title');
|
||||
release.date = query.date('.extra-info:not(.actors)', 'DD MMMM YYYY');
|
||||
release.title = query.cnt('[class*="item-title"] a') || query.q('.bottom .link', 'title');
|
||||
release.date = query.date('[class*="item-date"]', 'MMM DD, YYYY');
|
||||
|
||||
release.actors = query.all('.actors a strong', true);
|
||||
release.actors = query.all('[class*="item-actors"] a').map(el => ({
|
||||
name: query.cnt(el),
|
||||
url: query.url(el, null),
|
||||
}));
|
||||
|
||||
// const poster = query.img('.thumb img');
|
||||
const poster = query.img('.thumb', 'data-bg');
|
||||
const poster = query.img('[class*="thumb"]', 'data-bg');
|
||||
|
||||
release.poster = [
|
||||
poster.replace('512x288', '1472x828'),
|
||||
poster,
|
||||
];
|
||||
if (poster) {
|
||||
release.poster = [
|
||||
poster.replace('512x288', '1472x828'),
|
||||
poster,
|
||||
];
|
||||
}
|
||||
|
||||
release.teaser = { src: query.video('video-preview') };
|
||||
release.channel = slugify(query.q('.bottom a[href*="/channels"]', 'title'), '');
|
||||
release.teaser = query.video('video-preview');
|
||||
release.channel = slugify(query.q('[class*="item-channel"] a', 'title'), '');
|
||||
|
||||
return release;
|
||||
});
|
||||
@@ -38,21 +43,22 @@ function scrapeScene({ query }, url) {
|
||||
|
||||
release.description = query.meta('name=description') || query.q('read-even-more', true);
|
||||
|
||||
release.date = query.date('.h5-published', 'DD MMMM YYYY', /\d{1,2} \w+ \d{4}/);
|
||||
release.actors = query.all('.video-top-details .actors a[href*="/models"]', true);
|
||||
release.date = query.date('.h5-published', 'MMM DD, YYYY', /\w{3} \d{1,2}, \d{4}/);
|
||||
release.actors = query.all('.video-top-details .actors a[href*="/models"]').map(el => ({
|
||||
name: query.cnt(el),
|
||||
url: query.url(el, null),
|
||||
}));
|
||||
|
||||
release.duration = query.dur('meta[itemprop="duration"]', null, 'content');
|
||||
release.tags = query.all('.video-top-details a[href*="/categories"], .video-top-details a[href*="/tags"]', true);
|
||||
|
||||
release.poster = query.img('.poster img') || query.meta('itemprop=thumbnailUrl');
|
||||
release.photos = query.imgs('#gallery-thumbs picture img').slice(1).map(photo => [ // first image is poster
|
||||
release.photos = query.imgs('#gallery-thumbs [class*="thumb"]', 'data-bg').slice(1).map(photo => [ // first image is poster
|
||||
photo.replace('512x288', '1472x828'),
|
||||
photo,
|
||||
]);
|
||||
|
||||
release.trailer = {
|
||||
src: query.meta('itemprop=contentURL'),
|
||||
};
|
||||
release.trailer = query.meta('itemprop=contentURL');
|
||||
|
||||
release.channel = slugify(query.q('.video-top-details .actors a[href*="/channels"] strong', true), '');
|
||||
|
||||
@@ -60,7 +66,7 @@ function scrapeScene({ query }, url) {
|
||||
}
|
||||
|
||||
async function fetchActorReleases({ query }, url, page = 1, accReleases = []) {
|
||||
const releases = scrapeAll(qu.initAll(query.all('.main-content .card-video')));
|
||||
const releases = scrapeAll(qu.initAll(query.all('.main-content .card-video, .main-content .global-video-card')));
|
||||
const hasNextPage = query.exists('.pages a.active + a');
|
||||
|
||||
if (hasNextPage) {
|
||||
@@ -77,15 +83,23 @@ async function fetchActorReleases({ query }, url, page = 1, accReleases = []) {
|
||||
async function scrapeProfile({ query }, url, include) {
|
||||
const profile = {};
|
||||
|
||||
const keys = query.all('.col .h4:not(.more-less-actors)', true);
|
||||
const values = query.all('.col .h4-big', true);
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
|
||||
const bio = query.all('[class*="list"] [class*="list-item"]').reduce((acc, el) => ({
|
||||
...acc,
|
||||
[slugify(query.text(el), '_')]: query.cnt(el, 'span'),
|
||||
}), {});
|
||||
|
||||
const tags = query.all('.more-less-actors a[href*="/tags"], .more-less-actors a[href*="/categories"]', true);
|
||||
const tags = query.cnts(`
|
||||
[class$="description"] [class*="more-less"] a[href*="/categories"],
|
||||
[class$="description"] [class*="more-less"] a[href*="/tags"],
|
||||
[class*="seo-text"] a[href*="/categories"],
|
||||
[class*="seo-text"] a[href*="/tags"]
|
||||
`);
|
||||
|
||||
profile.nationality = bio.nationality;
|
||||
profile.placeOfBirth = bio.birth_place;
|
||||
|
||||
profile.dateOfBirth = qu.extractDate(bio.birth_date, 'MMM D, YYYY');
|
||||
|
||||
if (/enhanced/i.test(bio.tits_type)) profile.naturalBoobs = false;
|
||||
if (/natural/i.test(bio.tits_type)) profile.naturalBoobs = true;
|
||||
|
||||
@@ -95,8 +109,8 @@ async function scrapeProfile({ query }, url, include) {
|
||||
if (tags.includes('tattoo') || tags.includes('tattoos')) profile.hasTattoos = true;
|
||||
if (tags.includes('piercing') || tags.includes('piercings')) profile.hasPiercings = true;
|
||||
|
||||
profile.description = query.q('.more-less-actors read-even-more', true);
|
||||
profile.avatar = query.img('.avatar .thumb img') || null;
|
||||
profile.description = query.text('[class$="description"] [class*="more-less"]');
|
||||
profile.avatar = query.img('[class*="poster"] img') || null;
|
||||
|
||||
if (include.releases) {
|
||||
profile.releases = await fetchActorReleases({ query }, url);
|
||||
@@ -106,7 +120,7 @@ async function scrapeProfile({ query }, url, include) {
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const res = await qu.getAll(`${channel.url}?page=${page}`, '.main-content .card-video');
|
||||
const res = await qu.getAll(`${channel.url}?page=${page}`, '.main-content .card-video, .main-content .global-video-card');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, channel) : res.status;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user