Added Sperm Swallowers and The Ass Factory to Jules Jordan scraper. Added photo source.

This commit is contained in:
ThePendulum 2019-12-12 05:18:43 +01:00
parent ad1a0376e7
commit a03a00f5d9
8 changed files with 43 additions and 11 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 59 KiB

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 59 KiB

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 102 KiB

View File

@ -1045,7 +1045,18 @@ function getSites(networksMap) {
name: 'Jules Jordan',
url: 'https://www.julesjordan.com',
description: 'Jules Jordan\'s Official Membership Site',
parameters: JSON.stringify({ independent: true }),
network_id: networksMap.julesjordan,
},
{
slug: 'theassfactory',
name: 'The Ass Factory',
url: 'https://www.theassfactory.com',
network_id: networksMap.julesjordan,
},
{
slug: 'spermswallowers',
name: 'Sperm Swallowers',
url: 'https://www.spermswallowers.com',
network_id: networksMap.julesjordan,
},
// KELLY MADISON MEDIA

View File

@ -112,8 +112,16 @@ async function filterHashDuplicates(files, domains = ['releases'], roles = ['pho
async function fetchPhoto(photoUrl, index, identifier, attempt = 1) {
if (Array.isArray(photoUrl)) {
return fetchPhoto(photoUrl[0], index, identifier);
// return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => fetchPhoto(url, index, identifier)), Promise.reject());
// return fetchPhoto(photoUrl[0], index, identifier);
return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => {
const photo = await fetchPhoto(url, index, identifier);
if (photo) {
return photo;
}
throw new Error('Photo not available');
}), Promise.reject(new Error()));
}
try {
@ -214,6 +222,8 @@ async function storePhotos(release, releaseId) {
concurrency: 10,
}).filter(photo => photo);
console.log(metaFiles);
const uniquePhotos = await filterHashDuplicates(metaFiles, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
const savedPhotos = await savePhotos(uniquePhotos, release, releaseId);

View File

@ -142,8 +142,6 @@ async function scrapeReleases() {
concurrency: 5,
});
console.log(scrapedReleases.flat(2).map(release => release.movie));
if (argv.save) {
await storeReleases(scrapedReleases.flat(2));
}

View File

@ -22,8 +22,12 @@ function scrapePhotos(html) {
.map((photoElement) => {
const src = $(photoElement).attr('src');
// high res often available in photos/ directory, but not always, provide original as fallback
return [src.replace('thumbs/', 'photos/'), src];
// high res often available in alternative directories, but not always, provide original as fallback
return [
src.replace('thumbs/', 'photos/'),
src.replace('thumbs/', '1024watermarked/'),
src,
];
});
return photos;
@ -60,7 +64,14 @@ function scrapeLatest(html, site) {
return scenesElements.map((element) => {
const photoElement = $(element).find('a img.thumbs');
const photoCount = Number(photoElement.attr('cnt'));
const [poster, ...photos] = Array.from({ length: photoCount }, (value, index) => photoElement.attr(`src${index}_1x`)).filter(photoUrl => photoUrl !== undefined);
const [poster, ...photos] = Array.from({ length: photoCount }, (value, index) => {
const src = photoElement.attr(`src${index}_1x`) || photoElement.attr(`src${index}`);
if (!src) return null;
if (src.match(/^http/)) return src;
return `${site.url}${src}`;
}).filter(photoUrl => photoUrl);
const sceneLinkElement = $(element).children('a').eq(1);
const url = sceneLinkElement.attr('href');
@ -116,7 +127,8 @@ function scrapeUpcoming(html, site) {
.toDate();
const photoElement = $(element).find('a img.thumbs');
const poster = photoElement.attr('src');
const posterPath = photoElement.attr('src');
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
const videoClass = $(element).find('.update_thumbnail div').attr('class');
const videoScript = $(element).find(`script:contains(${videoClass})`).html();
@ -159,7 +171,8 @@ async function scrapeScene(html, url, site) {
.html()
.split('\n');
const poster = infoLines.find(line => line.match('useimage')).replace('useimage = "', '').slice(0, -2);
const posterPath = infoLines.find(line => line.match('useimage')).replace('useimage = "', '').slice(0, -2);
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
const trailerLine = infoLines.find(line => line.match('movie["Trailer_720"]'));
const trailer = trailerLine.slice(trailerLine.indexOf('path:"') + 6, trailerLine.indexOf('",movie'));
@ -167,7 +180,7 @@ async function scrapeScene(html, url, site) {
const photos = await getPhotos(entryId, site);
const tags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const movie = $('.update_dvds a').href;
const movie = $('.update_dvds a').attr('href');
return {
url,