Added Sperm Swallowers and The Ass Factory to Jules Jordan scraper. Added photo source.

This commit is contained in:
ThePendulum 2019-12-12 05:18:43 +01:00
parent ad1a0376e7
commit a03a00f5d9
8 changed files with 43 additions and 11 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 59 KiB

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 59 KiB

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 102 KiB

View File

@ -1045,7 +1045,18 @@ function getSites(networksMap) {
name: 'Jules Jordan', name: 'Jules Jordan',
url: 'https://www.julesjordan.com', url: 'https://www.julesjordan.com',
description: 'Jules Jordan\'s Official Membership Site', description: 'Jules Jordan\'s Official Membership Site',
parameters: JSON.stringify({ independent: true }), network_id: networksMap.julesjordan,
},
{
slug: 'theassfactory',
name: 'The Ass Factory',
url: 'https://www.theassfactory.com',
network_id: networksMap.julesjordan,
},
{
slug: 'spermswallowers',
name: 'Sperm Swallowers',
url: 'https://www.spermswallowers.com',
network_id: networksMap.julesjordan, network_id: networksMap.julesjordan,
}, },
// KELLY MADISON MEDIA // KELLY MADISON MEDIA

View File

@ -112,8 +112,16 @@ async function filterHashDuplicates(files, domains = ['releases'], roles = ['pho
async function fetchPhoto(photoUrl, index, identifier, attempt = 1) { async function fetchPhoto(photoUrl, index, identifier, attempt = 1) {
if (Array.isArray(photoUrl)) { if (Array.isArray(photoUrl)) {
return fetchPhoto(photoUrl[0], index, identifier); // return fetchPhoto(photoUrl[0], index, identifier);
// return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => fetchPhoto(url, index, identifier)), Promise.reject()); return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => {
const photo = await fetchPhoto(url, index, identifier);
if (photo) {
return photo;
}
throw new Error('Photo not available');
}), Promise.reject(new Error()));
} }
try { try {
@ -214,6 +222,8 @@ async function storePhotos(release, releaseId) {
concurrency: 10, concurrency: 10,
}).filter(photo => photo); }).filter(photo => photo);
console.log(metaFiles);
const uniquePhotos = await filterHashDuplicates(metaFiles, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`); const uniquePhotos = await filterHashDuplicates(metaFiles, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
const savedPhotos = await savePhotos(uniquePhotos, release, releaseId); const savedPhotos = await savePhotos(uniquePhotos, release, releaseId);

View File

@ -142,8 +142,6 @@ async function scrapeReleases() {
concurrency: 5, concurrency: 5,
}); });
console.log(scrapedReleases.flat(2).map(release => release.movie));
if (argv.save) { if (argv.save) {
await storeReleases(scrapedReleases.flat(2)); await storeReleases(scrapedReleases.flat(2));
} }

View File

@ -22,8 +22,12 @@ function scrapePhotos(html) {
.map((photoElement) => { .map((photoElement) => {
const src = $(photoElement).attr('src'); const src = $(photoElement).attr('src');
// high res often available in photos/ directory, but not always, provide original as fallback // high res often available in alternative directories, but not always, provide original as fallback
return [src.replace('thumbs/', 'photos/'), src]; return [
src.replace('thumbs/', 'photos/'),
src.replace('thumbs/', '1024watermarked/'),
src,
];
}); });
return photos; return photos;
@ -60,7 +64,14 @@ function scrapeLatest(html, site) {
return scenesElements.map((element) => { return scenesElements.map((element) => {
const photoElement = $(element).find('a img.thumbs'); const photoElement = $(element).find('a img.thumbs');
const photoCount = Number(photoElement.attr('cnt')); const photoCount = Number(photoElement.attr('cnt'));
const [poster, ...photos] = Array.from({ length: photoCount }, (value, index) => photoElement.attr(`src${index}_1x`)).filter(photoUrl => photoUrl !== undefined); const [poster, ...photos] = Array.from({ length: photoCount }, (value, index) => {
const src = photoElement.attr(`src${index}_1x`) || photoElement.attr(`src${index}`);
if (!src) return null;
if (src.match(/^http/)) return src;
return `${site.url}${src}`;
}).filter(photoUrl => photoUrl);
const sceneLinkElement = $(element).children('a').eq(1); const sceneLinkElement = $(element).children('a').eq(1);
const url = sceneLinkElement.attr('href'); const url = sceneLinkElement.attr('href');
@ -116,7 +127,8 @@ function scrapeUpcoming(html, site) {
.toDate(); .toDate();
const photoElement = $(element).find('a img.thumbs'); const photoElement = $(element).find('a img.thumbs');
const poster = photoElement.attr('src'); const posterPath = photoElement.attr('src');
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
const videoClass = $(element).find('.update_thumbnail div').attr('class'); const videoClass = $(element).find('.update_thumbnail div').attr('class');
const videoScript = $(element).find(`script:contains(${videoClass})`).html(); const videoScript = $(element).find(`script:contains(${videoClass})`).html();
@ -159,7 +171,8 @@ async function scrapeScene(html, url, site) {
.html() .html()
.split('\n'); .split('\n');
const poster = infoLines.find(line => line.match('useimage')).replace('useimage = "', '').slice(0, -2); const posterPath = infoLines.find(line => line.match('useimage')).replace('useimage = "', '').slice(0, -2);
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
const trailerLine = infoLines.find(line => line.match('movie["Trailer_720"]')); const trailerLine = infoLines.find(line => line.match('movie["Trailer_720"]'));
const trailer = trailerLine.slice(trailerLine.indexOf('path:"') + 6, trailerLine.indexOf('",movie')); const trailer = trailerLine.slice(trailerLine.indexOf('path:"') + 6, trailerLine.indexOf('",movie'));
@ -167,7 +180,7 @@ async function scrapeScene(html, url, site) {
const photos = await getPhotos(entryId, site); const photos = await getPhotos(entryId, site);
const tags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); const tags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const movie = $('.update_dvds a').href; const movie = $('.update_dvds a').attr('href');
return { return {
url, url,