Added Sperm Swallowers and The Ass Factory to Jules Jordan scraper. Added photo source.
This commit is contained in:
14
src/media.js
14
src/media.js
@@ -112,8 +112,16 @@ async function filterHashDuplicates(files, domains = ['releases'], roles = ['pho
|
||||
|
||||
async function fetchPhoto(photoUrl, index, identifier, attempt = 1) {
|
||||
if (Array.isArray(photoUrl)) {
|
||||
return fetchPhoto(photoUrl[0], index, identifier);
|
||||
// return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => fetchPhoto(url, index, identifier)), Promise.reject());
|
||||
// return fetchPhoto(photoUrl[0], index, identifier);
|
||||
return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => {
|
||||
const photo = await fetchPhoto(url, index, identifier);
|
||||
|
||||
if (photo) {
|
||||
return photo;
|
||||
}
|
||||
|
||||
throw new Error('Photo not available');
|
||||
}), Promise.reject(new Error()));
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -214,6 +222,8 @@ async function storePhotos(release, releaseId) {
|
||||
concurrency: 10,
|
||||
}).filter(photo => photo);
|
||||
|
||||
console.log(metaFiles);
|
||||
|
||||
const uniquePhotos = await filterHashDuplicates(metaFiles, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
const savedPhotos = await savePhotos(uniquePhotos, release, releaseId);
|
||||
|
||||
|
||||
@@ -142,8 +142,6 @@ async function scrapeReleases() {
|
||||
concurrency: 5,
|
||||
});
|
||||
|
||||
console.log(scrapedReleases.flat(2).map(release => release.movie));
|
||||
|
||||
if (argv.save) {
|
||||
await storeReleases(scrapedReleases.flat(2));
|
||||
}
|
||||
|
||||
@@ -22,8 +22,12 @@ function scrapePhotos(html) {
|
||||
.map((photoElement) => {
|
||||
const src = $(photoElement).attr('src');
|
||||
|
||||
// high res often available in photos/ directory, but not always, provide original as fallback
|
||||
return [src.replace('thumbs/', 'photos/'), src];
|
||||
// high res often available in alternative directories, but not always, provide original as fallback
|
||||
return [
|
||||
src.replace('thumbs/', 'photos/'),
|
||||
src.replace('thumbs/', '1024watermarked/'),
|
||||
src,
|
||||
];
|
||||
});
|
||||
|
||||
return photos;
|
||||
@@ -60,7 +64,14 @@ function scrapeLatest(html, site) {
|
||||
return scenesElements.map((element) => {
|
||||
const photoElement = $(element).find('a img.thumbs');
|
||||
const photoCount = Number(photoElement.attr('cnt'));
|
||||
const [poster, ...photos] = Array.from({ length: photoCount }, (value, index) => photoElement.attr(`src${index}_1x`)).filter(photoUrl => photoUrl !== undefined);
|
||||
const [poster, ...photos] = Array.from({ length: photoCount }, (value, index) => {
|
||||
const src = photoElement.attr(`src${index}_1x`) || photoElement.attr(`src${index}`);
|
||||
|
||||
if (!src) return null;
|
||||
if (src.match(/^http/)) return src;
|
||||
|
||||
return `${site.url}${src}`;
|
||||
}).filter(photoUrl => photoUrl);
|
||||
|
||||
const sceneLinkElement = $(element).children('a').eq(1);
|
||||
const url = sceneLinkElement.attr('href');
|
||||
@@ -116,7 +127,8 @@ function scrapeUpcoming(html, site) {
|
||||
.toDate();
|
||||
|
||||
const photoElement = $(element).find('a img.thumbs');
|
||||
const poster = photoElement.attr('src');
|
||||
const posterPath = photoElement.attr('src');
|
||||
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
|
||||
|
||||
const videoClass = $(element).find('.update_thumbnail div').attr('class');
|
||||
const videoScript = $(element).find(`script:contains(${videoClass})`).html();
|
||||
@@ -159,7 +171,8 @@ async function scrapeScene(html, url, site) {
|
||||
.html()
|
||||
.split('\n');
|
||||
|
||||
const poster = infoLines.find(line => line.match('useimage')).replace('useimage = "', '').slice(0, -2);
|
||||
const posterPath = infoLines.find(line => line.match('useimage')).replace('useimage = "', '').slice(0, -2);
|
||||
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
|
||||
|
||||
const trailerLine = infoLines.find(line => line.match('movie["Trailer_720"]'));
|
||||
const trailer = trailerLine.slice(trailerLine.indexOf('path:"') + 6, trailerLine.indexOf('",movie'));
|
||||
@@ -167,7 +180,7 @@ async function scrapeScene(html, url, site) {
|
||||
const photos = await getPhotos(entryId, site);
|
||||
|
||||
const tags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
const movie = $('.update_dvds a').href;
|
||||
const movie = $('.update_dvds a').attr('href');
|
||||
|
||||
return {
|
||||
url,
|
||||
|
||||
Reference in New Issue
Block a user