forked from DebaucheryLibrarian/traxxx
Refactored Bang! scraper, added My Stepdaughters Friend.
This commit is contained in:
@@ -51,13 +51,15 @@ function scrapeAll(scenes, entity) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('.video_preview_container > a', { origin: entity.url });
|
||||
release.entryId = query.attribute(null, 'data-video-id') || decodeId(new URL(release.url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
|
||||
release.url = query.url('a', { origin: entity.url });
|
||||
release.entryId = query.dataset('a', 'videopreview-id-value') || decodeId(new URL(release.url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
|
||||
|
||||
release.title = query.content('.video_preview_container >a > span.block');
|
||||
release.date = query.date('.videoInfo .statistics span', 'MMM DD, YYYY');
|
||||
release.title = query.content('a > span.block');
|
||||
release.date = query.date('a + div', 'MMM DD, YYYY');
|
||||
|
||||
release.actors = query.elements('.videoInfo a[href*="/pornstar"]').map((el) => ({
|
||||
release.duration = query.duration('[data-videopreview-target="duration"]', { attribute: 'data-content' });
|
||||
|
||||
release.actors = query.elements('a + div a[href*="/pornstar"]').map((el) => ({
|
||||
name: unprint.query.content(el),
|
||||
url: unprint.query.url(el, null, { origin: 'https://www.bang.com' }),
|
||||
}));
|
||||
@@ -72,7 +74,16 @@ function scrapeAll(scenes, entity) {
|
||||
];
|
||||
}
|
||||
|
||||
release.teaser = query.video();
|
||||
const videoData = query.json('a', { attribute: 'data-videopreview-sources-value' });
|
||||
|
||||
if (videoData) {
|
||||
release.teaser = [
|
||||
videoData.mp4_large,
|
||||
videoData.webm_large,
|
||||
videoData.mp4,
|
||||
videoData.webm,
|
||||
];
|
||||
}
|
||||
|
||||
return release;
|
||||
});
|
||||
@@ -80,21 +91,21 @@ function scrapeAll(scenes, entity) {
|
||||
|
||||
async function scrapeScene({ query }, { url, entity }) {
|
||||
const release = {};
|
||||
const data = query.json('script[type="application/ld+json"]');
|
||||
const data = query.json('//script[contains(text(), "VideoObject")]');
|
||||
|
||||
release.entryId = data?.['@id'] || decodeId(new URL(url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
|
||||
|
||||
release.title = data?.name || query.content('.video-heading');
|
||||
release.description = data?.description || query.content('.expanded p.clear-both');
|
||||
release.title = data?.name || query.content('.video-container + div h1');
|
||||
release.description = data?.description || query.content('//div[contains(@class, "actions")]/preceding-sibling::p');
|
||||
|
||||
release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD') || query.date('//p[contains(text(), "Date:")]', 'MMM DD, YYYY');
|
||||
release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span');
|
||||
|
||||
if (data?.actors) {
|
||||
if (data?.actor) {
|
||||
release.actors = data.actor.map((actor) => ({
|
||||
name: actor.name,
|
||||
url: actor.url,
|
||||
avatar: getAvatarFallback(query.img(`.video-actors img[alt="${actor.name}"]`)),
|
||||
avatar: getAvatarFallback(query.img(`a[href*="/pornstar"] img[alt="${actor.name}"]`)),
|
||||
}));
|
||||
} else {
|
||||
release.actors = query.elements('//div[contains(@class, "video-actors")]//a[img|picture]').map((element) => ({
|
||||
@@ -104,27 +115,35 @@ async function scrapeScene({ query }, { url, entity }) {
|
||||
}));
|
||||
}
|
||||
|
||||
release.tags = query.contents('.expanded .genres');
|
||||
release.tags = query.contents('.actions .genres');
|
||||
|
||||
release.poster = data?.thumbnailUrl || data?.contentUrl || query.attribute('meta[name*="og:image"]', 'content');
|
||||
release.teaser = query.video('video[data-videocontainer-target] source');
|
||||
const videoData = query.json('.video-container [data-videopreview-sources-value]', { attribute: 'data-videopreview-sources-value' });
|
||||
|
||||
release.photos = JSON.parse(query.attribute('[data-video-gallery-photos-value]', 'data-video-gallery-photos-value'));
|
||||
release.photoCount = query.number('[data-video-gallery-count-value]', { attribute: 'data-video-gallery-count-value' });
|
||||
release.poster = data?.thumbnailUrl || query.attribute('meta[property="og:image"]', 'content');
|
||||
release.teaser = (videoData && [
|
||||
videoData.mp4_large,
|
||||
videoData.webm_large,
|
||||
videoData.mp4,
|
||||
videoData.webm,
|
||||
])
|
||||
|| data?.contentUrl
|
||||
|| query.attribute('meta[property="og:video"]')
|
||||
|| query.video('video[data-videocontainer-target] source');
|
||||
|
||||
const channelName = query.content('.expanded a[href*="?in="]')?.trim();
|
||||
release.photos = query.sourceSets('.photo-set img');
|
||||
release.photoCount = query.number('//h2[contains(text(), "Photos")]/following-sibling::span');
|
||||
|
||||
const channelName = query.content('.video-container + div a[href*="?in="]')?.trim();
|
||||
|
||||
if (channelName) {
|
||||
release.channel = entity.children?.find((channel) => new RegExp(channel.name, 'i').test(channelName) || slugify(channelName) === channel.slug)?.slug;
|
||||
}
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchActorScenes(element, url, entity, page = 1, acc = []) {
|
||||
const scenes = scrapeAll(unprint.initAll(element, '.search-grid li'), entity);
|
||||
const scenes = scrapeAll(unprint.initAll(element, '.video_container'), entity);
|
||||
|
||||
if (scenes.length) {
|
||||
const nextPageRes = await unprint.post(url, { page: page + 1 });
|
||||
@@ -141,7 +160,7 @@ async function scrapeProfile({ query, element }, url, entity, include) {
|
||||
const profile = { url };
|
||||
|
||||
profile.dateOfBirth = query.date('//text()[contains(., "Born")]/following-sibling::span[contains(@class, "font-bold")][1]', 'MMMM D, YYYY');
|
||||
profile.birthPlace = query.content('//text()[contains(., "in")]/following-sibling::span[contains(@class, "font-bold")][1]');
|
||||
profile.birthPlace = query.content('//text()[contains(., "From")]/following-sibling::span[contains(@class, "font-bold")][1]');
|
||||
|
||||
profile.ethnicity = query.content('//text()[contains(., "Ethnicity")]/following-sibling::span[contains(@class, "font-bold")][1]');
|
||||
|
||||
@@ -158,8 +177,8 @@ async function scrapeProfile({ query, element }, url, entity, include) {
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}&page=${page}`;
|
||||
const res = await unprint.get(url, { selectAll: '.search-grid li' });
|
||||
const url = `${channel.url}&by=date.desc&page=${page}`;
|
||||
const res = await unprint.get(url, { selectAll: '.video_container' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
@@ -168,8 +187,8 @@ async function fetchLatest(channel, page = 1) {
|
||||
return res.status;
|
||||
}
|
||||
async function fetchUpcoming(channel) {
|
||||
const url = `${channel.url}&early-access=true`;
|
||||
const res = await unprint.get(url, { selectAll: '.search-grid li' });
|
||||
const url = `${channel.url}&by=date.desc&early-access=true`;
|
||||
const res = await unprint.get(url, { selectAll: '.video_container' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
|
||||
@@ -76,6 +76,8 @@ function scrapeSceneBlock({ query }) {
|
||||
release.poster = query.img('#trailer_thumb img[src*=content]');
|
||||
release.trailer = query.video('#trailerVideo source');
|
||||
|
||||
release.photoCount = query.number('.sceneDateP', { match: /(\d+)\s+(photo|pic)/i, matchIndex: 1 });
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user