Refactored Bang! scraper, added My Stepdaughters Friend.

This commit is contained in:
DebaucheryLibrarian 2023-06-05 03:32:24 +02:00
parent adda78f0c6
commit d3da2359de
48 changed files with 61 additions and 33 deletions

14
package-lock.json generated
View File

@ -78,7 +78,7 @@
"tunnel": "0.0.6", "tunnel": "0.0.6",
"ua-parser-js": "^1.0.32", "ua-parser-js": "^1.0.32",
"undici": "^4.13.0", "undici": "^4.13.0",
"unprint": "^0.9.1", "unprint": "^0.9.3",
"url-pattern": "^1.0.3", "url-pattern": "^1.0.3",
"v-tooltip": "^2.0.3", "v-tooltip": "^2.0.3",
"video.js": "^7.11.4", "video.js": "^7.11.4",
@ -17004,9 +17004,9 @@
} }
}, },
"node_modules/unprint": { "node_modules/unprint": {
"version": "0.9.1", "version": "0.9.3",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.9.1.tgz", "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.9.3.tgz",
"integrity": "sha512-TEdPfly6qOST3Vessa9gNFFkJsSsdWe04k0FCU5XuvdpCSb8eVRrAvLouuyJI/GkIhrn6ZHc9VgX/gRW/R5UcQ==", "integrity": "sha512-ujDlQL0yeVVd6V+kN5uURG/6F9jUblF0VWMOcpI9u3ZjsWp2tC4mQy0/kK4epU8QhkEFPE9uZ0pAMKORzEdp5g==",
"dependencies": { "dependencies": {
"axios": "^0.27.2", "axios": "^0.27.2",
"bottleneck": "^2.19.5", "bottleneck": "^2.19.5",
@ -31449,9 +31449,9 @@
"integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=" "integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw="
}, },
"unprint": { "unprint": {
"version": "0.9.1", "version": "0.9.3",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.9.1.tgz", "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.9.3.tgz",
"integrity": "sha512-TEdPfly6qOST3Vessa9gNFFkJsSsdWe04k0FCU5XuvdpCSb8eVRrAvLouuyJI/GkIhrn6ZHc9VgX/gRW/R5UcQ==", "integrity": "sha512-ujDlQL0yeVVd6V+kN5uURG/6F9jUblF0VWMOcpI9u3ZjsWp2tC4mQy0/kK4epU8QhkEFPE9uZ0pAMKORzEdp5g==",
"requires": { "requires": {
"axios": "^0.27.2", "axios": "^0.27.2",
"bottleneck": "^2.19.5", "bottleneck": "^2.19.5",

View File

@ -137,7 +137,7 @@
"tunnel": "0.0.6", "tunnel": "0.0.6",
"ua-parser-js": "^1.0.32", "ua-parser-js": "^1.0.32",
"undici": "^4.13.0", "undici": "^4.13.0",
"unprint": "^0.9.1", "unprint": "^0.9.3",
"url-pattern": "^1.0.3", "url-pattern": "^1.0.3",
"v-tooltip": "^2.0.3", "v-tooltip": "^2.0.3",
"video.js": "^7.11.4", "video.js": "^7.11.4",

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.5 KiB

After

Width:  |  Height:  |  Size: 2.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 2.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.2 KiB

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.6 KiB

After

Width:  |  Height:  |  Size: 2.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.4 KiB

After

Width:  |  Height:  |  Size: 2.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.9 KiB

After

Width:  |  Height:  |  Size: 2.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.4 KiB

After

Width:  |  Height:  |  Size: 2.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.1 KiB

After

Width:  |  Height:  |  Size: 3.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 2.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.2 KiB

After

Width:  |  Height:  |  Size: 3.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 2.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.5 KiB

After

Width:  |  Height:  |  Size: 6.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 2.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.3 KiB

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.7 KiB

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.1 KiB

After

Width:  |  Height:  |  Size: 8.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.1 KiB

After

Width:  |  Height:  |  Size: 8.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.0 KiB

After

Width:  |  Height:  |  Size: 6.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 10 KiB

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.3 KiB

After

Width:  |  Height:  |  Size: 4.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.1 KiB

After

Width:  |  Height:  |  Size: 7.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.5 KiB

After

Width:  |  Height:  |  Size: 9.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 2.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.5 KiB

After

Width:  |  Height:  |  Size: 6.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 2.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.7 KiB

After

Width:  |  Height:  |  Size: 5.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.1 KiB

After

Width:  |  Height:  |  Size: 6.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -1020,6 +1020,13 @@ const sites = [
parameters: { siteId: 6305 }, parameters: { siteId: 6305 },
parent: 'bang', parent: 'bang',
}, },
{
name: 'My Stepdaughter\'s Friend',
slug: 'mystepdaughtersfriend',
url: 'https://www.bang.com/videos?in=My%20Stepdaughter%27s%20Friend',
parameters: { siteId: 7072 },
parent: 'bang',
},
// BANGBROS // BANGBROS
{ {
name: 'Ass Parade', name: 'Ass Parade',

View File

@ -51,13 +51,15 @@ function scrapeAll(scenes, entity) {
return scenes.map(({ query }) => { return scenes.map(({ query }) => {
const release = {}; const release = {};
release.url = query.url('.video_preview_container > a', { origin: entity.url }); release.url = query.url('a', { origin: entity.url });
release.entryId = query.attribute(null, 'data-video-id') || decodeId(new URL(release.url).pathname.match(/\/video\/([\w-]+)\//)?.[1]); release.entryId = query.dataset('a', 'videopreview-id-value') || decodeId(new URL(release.url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
release.title = query.content('.video_preview_container >a > span.block'); release.title = query.content('a > span.block');
release.date = query.date('.videoInfo .statistics span', 'MMM DD, YYYY'); release.date = query.date('a + div', 'MMM DD, YYYY');
release.actors = query.elements('.videoInfo a[href*="/pornstar"]').map((el) => ({ release.duration = query.duration('[data-videopreview-target="duration"]', { attribute: 'data-content' });
release.actors = query.elements('a + div a[href*="/pornstar"]').map((el) => ({
name: unprint.query.content(el), name: unprint.query.content(el),
url: unprint.query.url(el, null, { origin: 'https://www.bang.com' }), url: unprint.query.url(el, null, { origin: 'https://www.bang.com' }),
})); }));
@ -72,7 +74,16 @@ function scrapeAll(scenes, entity) {
]; ];
} }
release.teaser = query.video(); const videoData = query.json('a', { attribute: 'data-videopreview-sources-value' });
if (videoData) {
release.teaser = [
videoData.mp4_large,
videoData.webm_large,
videoData.mp4,
videoData.webm,
];
}
return release; return release;
}); });
@ -80,21 +91,21 @@ function scrapeAll(scenes, entity) {
async function scrapeScene({ query }, { url, entity }) { async function scrapeScene({ query }, { url, entity }) {
const release = {}; const release = {};
const data = query.json('script[type="application/ld+json"]'); const data = query.json('//script[contains(text(), "VideoObject")]');
release.entryId = data?.['@id'] || decodeId(new URL(url).pathname.match(/\/video\/([\w-]+)\//)?.[1]); release.entryId = data?.['@id'] || decodeId(new URL(url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
release.title = data?.name || query.content('.video-heading'); release.title = data?.name || query.content('.video-container + div h1');
release.description = data?.description || query.content('.expanded p.clear-both'); release.description = data?.description || query.content('//div[contains(@class, "actions")]/preceding-sibling::p');
release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD') || query.date('//p[contains(text(), "Date:")]', 'MMM DD, YYYY'); release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD') || query.date('//p[contains(text(), "Date:")]', 'MMM DD, YYYY');
release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span'); release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span');
if (data?.actors) { if (data?.actor) {
release.actors = data.actor.map((actor) => ({ release.actors = data.actor.map((actor) => ({
name: actor.name, name: actor.name,
url: actor.url, url: actor.url,
avatar: getAvatarFallback(query.img(`.video-actors img[alt="${actor.name}"]`)), avatar: getAvatarFallback(query.img(`a[href*="/pornstar"] img[alt="${actor.name}"]`)),
})); }));
} else { } else {
release.actors = query.elements('//div[contains(@class, "video-actors")]//a[img|picture]').map((element) => ({ release.actors = query.elements('//div[contains(@class, "video-actors")]//a[img|picture]').map((element) => ({
@ -104,27 +115,35 @@ async function scrapeScene({ query }, { url, entity }) {
})); }));
} }
release.tags = query.contents('.expanded .genres'); release.tags = query.contents('.actions .genres');
release.poster = data?.thumbnailUrl || data?.contentUrl || query.attribute('meta[name*="og:image"]', 'content'); const videoData = query.json('.video-container [data-videopreview-sources-value]', { attribute: 'data-videopreview-sources-value' });
release.teaser = query.video('video[data-videocontainer-target] source');
release.photos = JSON.parse(query.attribute('[data-video-gallery-photos-value]', 'data-video-gallery-photos-value')); release.poster = data?.thumbnailUrl || query.attribute('meta[property="og:image"]', 'content');
release.photoCount = query.number('[data-video-gallery-count-value]', { attribute: 'data-video-gallery-count-value' }); release.teaser = (videoData && [
videoData.mp4_large,
videoData.webm_large,
videoData.mp4,
videoData.webm,
])
|| data?.contentUrl
|| query.attribute('meta[property="og:video"]')
|| query.video('video[data-videocontainer-target] source');
const channelName = query.content('.expanded a[href*="?in="]')?.trim(); release.photos = query.sourceSets('.photo-set img');
release.photoCount = query.number('//h2[contains(text(), "Photos")]/following-sibling::span');
const channelName = query.content('.video-container + div a[href*="?in="]')?.trim();
if (channelName) { if (channelName) {
release.channel = entity.children?.find((channel) => new RegExp(channel.name, 'i').test(channelName) || slugify(channelName) === channel.slug)?.slug; release.channel = entity.children?.find((channel) => new RegExp(channel.name, 'i').test(channelName) || slugify(channelName) === channel.slug)?.slug;
} }
console.log(release);
return release; return release;
} }
async function fetchActorScenes(element, url, entity, page = 1, acc = []) { async function fetchActorScenes(element, url, entity, page = 1, acc = []) {
const scenes = scrapeAll(unprint.initAll(element, '.search-grid li'), entity); const scenes = scrapeAll(unprint.initAll(element, '.video_container'), entity);
if (scenes.length) { if (scenes.length) {
const nextPageRes = await unprint.post(url, { page: page + 1 }); const nextPageRes = await unprint.post(url, { page: page + 1 });
@ -141,7 +160,7 @@ async function scrapeProfile({ query, element }, url, entity, include) {
const profile = { url }; const profile = { url };
profile.dateOfBirth = query.date('//text()[contains(., "Born")]/following-sibling::span[contains(@class, "font-bold")][1]', 'MMMM D, YYYY'); profile.dateOfBirth = query.date('//text()[contains(., "Born")]/following-sibling::span[contains(@class, "font-bold")][1]', 'MMMM D, YYYY');
profile.birthPlace = query.content('//text()[contains(., "in")]/following-sibling::span[contains(@class, "font-bold")][1]'); profile.birthPlace = query.content('//text()[contains(., "From")]/following-sibling::span[contains(@class, "font-bold")][1]');
profile.ethnicity = query.content('//text()[contains(., "Ethnicity")]/following-sibling::span[contains(@class, "font-bold")][1]'); profile.ethnicity = query.content('//text()[contains(., "Ethnicity")]/following-sibling::span[contains(@class, "font-bold")][1]');
@ -158,8 +177,8 @@ async function scrapeProfile({ query, element }, url, entity, include) {
} }
async function fetchLatest(channel, page = 1) { async function fetchLatest(channel, page = 1) {
const url = `${channel.url}&page=${page}`; const url = `${channel.url}&by=date.desc&page=${page}`;
const res = await unprint.get(url, { selectAll: '.search-grid li' }); const res = await unprint.get(url, { selectAll: '.video_container' });
if (res.ok) { if (res.ok) {
return scrapeAll(res.context, channel); return scrapeAll(res.context, channel);
@ -168,8 +187,8 @@ async function fetchLatest(channel, page = 1) {
return res.status; return res.status;
} }
async function fetchUpcoming(channel) { async function fetchUpcoming(channel) {
const url = `${channel.url}&early-access=true`; const url = `${channel.url}&by=date.desc&early-access=true`;
const res = await unprint.get(url, { selectAll: '.search-grid li' }); const res = await unprint.get(url, { selectAll: '.video_container' });
if (res.ok) { if (res.ok) {
return scrapeAll(res.context, channel); return scrapeAll(res.context, channel);

View File

@ -76,6 +76,8 @@ function scrapeSceneBlock({ query }) {
release.poster = query.img('#trailer_thumb img[src*=content]'); release.poster = query.img('#trailer_thumb img[src*=content]');
release.trailer = query.video('#trailerVideo source'); release.trailer = query.video('#trailerVideo source');
release.photoCount = query.number('.sceneDateP', { match: /(\d+)\s+(photo|pic)/i, matchIndex: 1 });
return release; return release;
} }