Added Sperm Swallowers and The Ass Factory to Jules Jordan scraper. Added photo source.
This commit is contained in:
parent
ad1a0376e7
commit
a03a00f5d9
Binary file not shown.
Before Width: | Height: | Size: 59 KiB After Width: | Height: | Size: 59 KiB |
Binary file not shown.
Before Width: | Height: | Size: 59 KiB After Width: | Height: | Size: 59 KiB |
Binary file not shown.
After Width: | Height: | Size: 5.5 KiB |
Binary file not shown.
After Width: | Height: | Size: 102 KiB |
|
@ -1045,7 +1045,18 @@ function getSites(networksMap) {
|
|||
name: 'Jules Jordan',
|
||||
url: 'https://www.julesjordan.com',
|
||||
description: 'Jules Jordan\'s Official Membership Site',
|
||||
parameters: JSON.stringify({ independent: true }),
|
||||
network_id: networksMap.julesjordan,
|
||||
},
|
||||
{
|
||||
slug: 'theassfactory',
|
||||
name: 'The Ass Factory',
|
||||
url: 'https://www.theassfactory.com',
|
||||
network_id: networksMap.julesjordan,
|
||||
},
|
||||
{
|
||||
slug: 'spermswallowers',
|
||||
name: 'Sperm Swallowers',
|
||||
url: 'https://www.spermswallowers.com',
|
||||
network_id: networksMap.julesjordan,
|
||||
},
|
||||
// KELLY MADISON MEDIA
|
||||
|
|
14
src/media.js
14
src/media.js
|
@ -112,8 +112,16 @@ async function filterHashDuplicates(files, domains = ['releases'], roles = ['pho
|
|||
|
||||
async function fetchPhoto(photoUrl, index, identifier, attempt = 1) {
|
||||
if (Array.isArray(photoUrl)) {
|
||||
return fetchPhoto(photoUrl[0], index, identifier);
|
||||
// return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => fetchPhoto(url, index, identifier)), Promise.reject());
|
||||
// return fetchPhoto(photoUrl[0], index, identifier);
|
||||
return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => {
|
||||
const photo = await fetchPhoto(url, index, identifier);
|
||||
|
||||
if (photo) {
|
||||
return photo;
|
||||
}
|
||||
|
||||
throw new Error('Photo not available');
|
||||
}), Promise.reject(new Error()));
|
||||
}
|
||||
|
||||
try {
|
||||
|
@ -214,6 +222,8 @@ async function storePhotos(release, releaseId) {
|
|||
concurrency: 10,
|
||||
}).filter(photo => photo);
|
||||
|
||||
console.log(metaFiles);
|
||||
|
||||
const uniquePhotos = await filterHashDuplicates(metaFiles, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
const savedPhotos = await savePhotos(uniquePhotos, release, releaseId);
|
||||
|
||||
|
|
|
@ -142,8 +142,6 @@ async function scrapeReleases() {
|
|||
concurrency: 5,
|
||||
});
|
||||
|
||||
console.log(scrapedReleases.flat(2).map(release => release.movie));
|
||||
|
||||
if (argv.save) {
|
||||
await storeReleases(scrapedReleases.flat(2));
|
||||
}
|
||||
|
|
|
@ -22,8 +22,12 @@ function scrapePhotos(html) {
|
|||
.map((photoElement) => {
|
||||
const src = $(photoElement).attr('src');
|
||||
|
||||
// high res often available in photos/ directory, but not always, provide original as fallback
|
||||
return [src.replace('thumbs/', 'photos/'), src];
|
||||
// high res often available in alternative directories, but not always, provide original as fallback
|
||||
return [
|
||||
src.replace('thumbs/', 'photos/'),
|
||||
src.replace('thumbs/', '1024watermarked/'),
|
||||
src,
|
||||
];
|
||||
});
|
||||
|
||||
return photos;
|
||||
|
@ -60,7 +64,14 @@ function scrapeLatest(html, site) {
|
|||
return scenesElements.map((element) => {
|
||||
const photoElement = $(element).find('a img.thumbs');
|
||||
const photoCount = Number(photoElement.attr('cnt'));
|
||||
const [poster, ...photos] = Array.from({ length: photoCount }, (value, index) => photoElement.attr(`src${index}_1x`)).filter(photoUrl => photoUrl !== undefined);
|
||||
const [poster, ...photos] = Array.from({ length: photoCount }, (value, index) => {
|
||||
const src = photoElement.attr(`src${index}_1x`) || photoElement.attr(`src${index}`);
|
||||
|
||||
if (!src) return null;
|
||||
if (src.match(/^http/)) return src;
|
||||
|
||||
return `${site.url}${src}`;
|
||||
}).filter(photoUrl => photoUrl);
|
||||
|
||||
const sceneLinkElement = $(element).children('a').eq(1);
|
||||
const url = sceneLinkElement.attr('href');
|
||||
|
@ -116,7 +127,8 @@ function scrapeUpcoming(html, site) {
|
|||
.toDate();
|
||||
|
||||
const photoElement = $(element).find('a img.thumbs');
|
||||
const poster = photoElement.attr('src');
|
||||
const posterPath = photoElement.attr('src');
|
||||
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
|
||||
|
||||
const videoClass = $(element).find('.update_thumbnail div').attr('class');
|
||||
const videoScript = $(element).find(`script:contains(${videoClass})`).html();
|
||||
|
@ -159,7 +171,8 @@ async function scrapeScene(html, url, site) {
|
|||
.html()
|
||||
.split('\n');
|
||||
|
||||
const poster = infoLines.find(line => line.match('useimage')).replace('useimage = "', '').slice(0, -2);
|
||||
const posterPath = infoLines.find(line => line.match('useimage')).replace('useimage = "', '').slice(0, -2);
|
||||
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
|
||||
|
||||
const trailerLine = infoLines.find(line => line.match('movie["Trailer_720"]'));
|
||||
const trailer = trailerLine.slice(trailerLine.indexOf('path:"') + 6, trailerLine.indexOf('",movie'));
|
||||
|
@ -167,7 +180,7 @@ async function scrapeScene(html, url, site) {
|
|||
const photos = await getPhotos(entryId, site);
|
||||
|
||||
const tags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
const movie = $('.update_dvds a').href;
|
||||
const movie = $('.update_dvds a').attr('href');
|
||||
|
||||
return {
|
||||
url,
|
||||
|
|
Loading…
Reference in New Issue