Added Sperm Swallowers and The Ass Factory to Jules Jordan scraper. Added photo source.
This commit is contained in:
parent
ad1a0376e7
commit
a03a00f5d9
Binary file not shown.
Before Width: | Height: | Size: 59 KiB After Width: | Height: | Size: 59 KiB |
Binary file not shown.
Before Width: | Height: | Size: 59 KiB After Width: | Height: | Size: 59 KiB |
Binary file not shown.
After Width: | Height: | Size: 5.5 KiB |
Binary file not shown.
After Width: | Height: | Size: 102 KiB |
|
@ -1045,7 +1045,18 @@ function getSites(networksMap) {
|
||||||
name: 'Jules Jordan',
|
name: 'Jules Jordan',
|
||||||
url: 'https://www.julesjordan.com',
|
url: 'https://www.julesjordan.com',
|
||||||
description: 'Jules Jordan\'s Official Membership Site',
|
description: 'Jules Jordan\'s Official Membership Site',
|
||||||
parameters: JSON.stringify({ independent: true }),
|
network_id: networksMap.julesjordan,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
slug: 'theassfactory',
|
||||||
|
name: 'The Ass Factory',
|
||||||
|
url: 'https://www.theassfactory.com',
|
||||||
|
network_id: networksMap.julesjordan,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
slug: 'spermswallowers',
|
||||||
|
name: 'Sperm Swallowers',
|
||||||
|
url: 'https://www.spermswallowers.com',
|
||||||
network_id: networksMap.julesjordan,
|
network_id: networksMap.julesjordan,
|
||||||
},
|
},
|
||||||
// KELLY MADISON MEDIA
|
// KELLY MADISON MEDIA
|
||||||
|
|
14
src/media.js
14
src/media.js
|
@ -112,8 +112,16 @@ async function filterHashDuplicates(files, domains = ['releases'], roles = ['pho
|
||||||
|
|
||||||
async function fetchPhoto(photoUrl, index, identifier, attempt = 1) {
|
async function fetchPhoto(photoUrl, index, identifier, attempt = 1) {
|
||||||
if (Array.isArray(photoUrl)) {
|
if (Array.isArray(photoUrl)) {
|
||||||
return fetchPhoto(photoUrl[0], index, identifier);
|
// return fetchPhoto(photoUrl[0], index, identifier);
|
||||||
// return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => fetchPhoto(url, index, identifier)), Promise.reject());
|
return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => {
|
||||||
|
const photo = await fetchPhoto(url, index, identifier);
|
||||||
|
|
||||||
|
if (photo) {
|
||||||
|
return photo;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error('Photo not available');
|
||||||
|
}), Promise.reject(new Error()));
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -214,6 +222,8 @@ async function storePhotos(release, releaseId) {
|
||||||
concurrency: 10,
|
concurrency: 10,
|
||||||
}).filter(photo => photo);
|
}).filter(photo => photo);
|
||||||
|
|
||||||
|
console.log(metaFiles);
|
||||||
|
|
||||||
const uniquePhotos = await filterHashDuplicates(metaFiles, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
const uniquePhotos = await filterHashDuplicates(metaFiles, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||||
const savedPhotos = await savePhotos(uniquePhotos, release, releaseId);
|
const savedPhotos = await savePhotos(uniquePhotos, release, releaseId);
|
||||||
|
|
||||||
|
|
|
@ -142,8 +142,6 @@ async function scrapeReleases() {
|
||||||
concurrency: 5,
|
concurrency: 5,
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log(scrapedReleases.flat(2).map(release => release.movie));
|
|
||||||
|
|
||||||
if (argv.save) {
|
if (argv.save) {
|
||||||
await storeReleases(scrapedReleases.flat(2));
|
await storeReleases(scrapedReleases.flat(2));
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,8 +22,12 @@ function scrapePhotos(html) {
|
||||||
.map((photoElement) => {
|
.map((photoElement) => {
|
||||||
const src = $(photoElement).attr('src');
|
const src = $(photoElement).attr('src');
|
||||||
|
|
||||||
// high res often available in photos/ directory, but not always, provide original as fallback
|
// high res often available in alternative directories, but not always, provide original as fallback
|
||||||
return [src.replace('thumbs/', 'photos/'), src];
|
return [
|
||||||
|
src.replace('thumbs/', 'photos/'),
|
||||||
|
src.replace('thumbs/', '1024watermarked/'),
|
||||||
|
src,
|
||||||
|
];
|
||||||
});
|
});
|
||||||
|
|
||||||
return photos;
|
return photos;
|
||||||
|
@ -60,7 +64,14 @@ function scrapeLatest(html, site) {
|
||||||
return scenesElements.map((element) => {
|
return scenesElements.map((element) => {
|
||||||
const photoElement = $(element).find('a img.thumbs');
|
const photoElement = $(element).find('a img.thumbs');
|
||||||
const photoCount = Number(photoElement.attr('cnt'));
|
const photoCount = Number(photoElement.attr('cnt'));
|
||||||
const [poster, ...photos] = Array.from({ length: photoCount }, (value, index) => photoElement.attr(`src${index}_1x`)).filter(photoUrl => photoUrl !== undefined);
|
const [poster, ...photos] = Array.from({ length: photoCount }, (value, index) => {
|
||||||
|
const src = photoElement.attr(`src${index}_1x`) || photoElement.attr(`src${index}`);
|
||||||
|
|
||||||
|
if (!src) return null;
|
||||||
|
if (src.match(/^http/)) return src;
|
||||||
|
|
||||||
|
return `${site.url}${src}`;
|
||||||
|
}).filter(photoUrl => photoUrl);
|
||||||
|
|
||||||
const sceneLinkElement = $(element).children('a').eq(1);
|
const sceneLinkElement = $(element).children('a').eq(1);
|
||||||
const url = sceneLinkElement.attr('href');
|
const url = sceneLinkElement.attr('href');
|
||||||
|
@ -116,7 +127,8 @@ function scrapeUpcoming(html, site) {
|
||||||
.toDate();
|
.toDate();
|
||||||
|
|
||||||
const photoElement = $(element).find('a img.thumbs');
|
const photoElement = $(element).find('a img.thumbs');
|
||||||
const poster = photoElement.attr('src');
|
const posterPath = photoElement.attr('src');
|
||||||
|
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
|
||||||
|
|
||||||
const videoClass = $(element).find('.update_thumbnail div').attr('class');
|
const videoClass = $(element).find('.update_thumbnail div').attr('class');
|
||||||
const videoScript = $(element).find(`script:contains(${videoClass})`).html();
|
const videoScript = $(element).find(`script:contains(${videoClass})`).html();
|
||||||
|
@ -159,7 +171,8 @@ async function scrapeScene(html, url, site) {
|
||||||
.html()
|
.html()
|
||||||
.split('\n');
|
.split('\n');
|
||||||
|
|
||||||
const poster = infoLines.find(line => line.match('useimage')).replace('useimage = "', '').slice(0, -2);
|
const posterPath = infoLines.find(line => line.match('useimage')).replace('useimage = "', '').slice(0, -2);
|
||||||
|
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
|
||||||
|
|
||||||
const trailerLine = infoLines.find(line => line.match('movie["Trailer_720"]'));
|
const trailerLine = infoLines.find(line => line.match('movie["Trailer_720"]'));
|
||||||
const trailer = trailerLine.slice(trailerLine.indexOf('path:"') + 6, trailerLine.indexOf('",movie'));
|
const trailer = trailerLine.slice(trailerLine.indexOf('path:"') + 6, trailerLine.indexOf('",movie'));
|
||||||
|
@ -167,7 +180,7 @@ async function scrapeScene(html, url, site) {
|
||||||
const photos = await getPhotos(entryId, site);
|
const photos = await getPhotos(entryId, site);
|
||||||
|
|
||||||
const tags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
const tags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||||
const movie = $('.update_dvds a').href;
|
const movie = $('.update_dvds a').attr('href');
|
||||||
|
|
||||||
return {
|
return {
|
||||||
url,
|
url,
|
||||||
|
|
Loading…
Reference in New Issue