Switched to tabs. Adding missing actor entries when scraping actors, with batch ID.

This commit is contained in:
2020-05-14 04:26:05 +02:00
parent f1eb29c713
commit 11eb66f834
178 changed files with 16594 additions and 16929 deletions

View File

@@ -6,10 +6,11 @@
},
"rules": {
"strict": 0,
"indent": ["error", "tab"],
"no-tabs": "off",
"no-unused-vars": ["error", {"argsIgnorePattern": "^_"}],
"no-console": 0,
"no-underscore-dangle": 0,
"indent": "off",
"prefer-destructuring": "off",
"template-curly-spacing": "off",
"object-curly-newline": "off",

View File

@@ -18,522 +18,522 @@ const { curateSites } = require('./sites');
const { storeMedia, associateMedia } = require('./media');
async function curateActor(actor) {
const [aliases, avatar, photos, social] = await Promise.all([
knex('actors').where({ alias_for: actor.id }),
knex('actors_avatars')
.where('actor_id', actor.id)
.join('media', 'media.id', 'actors_avatars.media_id')
.first(),
knex('actors_photos')
.where('actor_id', actor.id)
.join('media', 'media.id', 'actors_photos.media_id')
.orderBy('index'),
knex('actors_social')
.where('actor_id', actor.id)
.orderBy('platform', 'desc'),
]);
const [aliases, avatar, photos, social] = await Promise.all([
knex('actors').where({ alias_for: actor.id }),
knex('actors_avatars')
.where('actor_id', actor.id)
.join('media', 'media.id', 'actors_avatars.media_id')
.first(),
knex('actors_photos')
.where('actor_id', actor.id)
.join('media', 'media.id', 'actors_photos.media_id')
.orderBy('index'),
knex('actors_social')
.where('actor_id', actor.id)
.orderBy('platform', 'desc'),
]);
const curatedActor = {
id: actor.id,
gender: actor.gender,
name: actor.name,
description: actor.description,
birthdate: actor.birthdate && new Date(actor.birthdate),
country: actor.country_alpha2,
origin: (actor.birth_city || actor.birth_state || actor.birth_country_alpha2) ? {} : null,
residence: (actor.residence_city || actor.residence_state || actor.residence_country_alpha2) ? {} : null,
ethnicity: actor.ethnicity,
height: actor.height,
weight: actor.weight,
bust: actor.bust,
waist: actor.waist,
hip: actor.hip,
naturalBoobs: actor.natural_boobs,
aliases: aliases.map(({ name }) => name),
slug: actor.slug,
avatar,
photos,
hasTattoos: actor.has_tattoos,
hasPiercings: actor.has_piercings,
tattoos: actor.tattoos,
piercings: actor.piercings,
social,
scrapedAt: actor.scraped_at,
};
const curatedActor = {
id: actor.id,
gender: actor.gender,
name: actor.name,
description: actor.description,
birthdate: actor.birthdate && new Date(actor.birthdate),
country: actor.country_alpha2,
origin: (actor.birth_city || actor.birth_state || actor.birth_country_alpha2) ? {} : null,
residence: (actor.residence_city || actor.residence_state || actor.residence_country_alpha2) ? {} : null,
ethnicity: actor.ethnicity,
height: actor.height,
weight: actor.weight,
bust: actor.bust,
waist: actor.waist,
hip: actor.hip,
naturalBoobs: actor.natural_boobs,
aliases: aliases.map(({ name }) => name),
slug: actor.slug,
avatar,
photos,
hasTattoos: actor.has_tattoos,
hasPiercings: actor.has_piercings,
tattoos: actor.tattoos,
piercings: actor.piercings,
social,
scrapedAt: actor.scraped_at,
};
if (curatedActor.birthdate) {
curatedActor.age = moment().diff(curatedActor.birthdate, 'years');
}
if (curatedActor.birthdate) {
curatedActor.age = moment().diff(curatedActor.birthdate, 'years');
}
if (actor.birth_city) curatedActor.origin.city = actor.birth_city;
if (actor.birth_state) curatedActor.origin.state = actor.birth_state;
if (actor.birth_city) curatedActor.origin.city = actor.birth_city;
if (actor.birth_state) curatedActor.origin.state = actor.birth_state;
if (actor.birth_country_alpha2) {
curatedActor.origin.country = {
alpha2: actor.birth_country_alpha2,
name: actor.birth_country_name,
alias: actor.birth_country_alias,
};
}
if (actor.birth_country_alpha2) {
curatedActor.origin.country = {
alpha2: actor.birth_country_alpha2,
name: actor.birth_country_name,
alias: actor.birth_country_alias,
};
}
if (actor.residence_city) curatedActor.residence.city = actor.residence_city;
if (actor.residence_state) curatedActor.residence.state = actor.residence_state;
if (actor.residence_city) curatedActor.residence.city = actor.residence_city;
if (actor.residence_state) curatedActor.residence.state = actor.residence_state;
if (actor.residence_country_alpha2) {
curatedActor.residence.country = {
alpha2: actor.residence_country_alpha2,
name: actor.residence_country_name,
alias: actor.residence_country_alias,
};
}
if (actor.residence_country_alpha2) {
curatedActor.residence.country = {
alpha2: actor.residence_country_alpha2,
name: actor.residence_country_name,
alias: actor.residence_country_alias,
};
}
return curatedActor;
return curatedActor;
}
function curateActors(releases) {
return Promise.all(releases.map(async release => curateActor(release)));
return Promise.all(releases.map(async release => curateActor(release)));
}
function curateActorEntry(actor, scraped, scrapeSuccess) {
const curatedActor = {
name: capitalize(actor.name),
slug: slugify(actor.name),
birthdate: actor.birthdate,
description: actor.description,
gender: actor.gender,
ethnicity: actor.ethnicity,
bust: actor.bust,
waist: actor.waist,
hip: actor.hip,
natural_boobs: actor.naturalBoobs,
height: actor.height,
weight: actor.weight,
hair: actor.hair,
eyes: actor.eyes,
has_tattoos: actor.hasTattoos,
has_piercings: actor.hasPiercings,
tattoos: actor.tattoos,
piercings: actor.piercings,
};
const curatedActor = {
name: capitalize(actor.name),
slug: slugify(actor.name),
birthdate: actor.birthdate,
description: actor.description,
gender: actor.gender,
ethnicity: actor.ethnicity,
bust: actor.bust,
waist: actor.waist,
hip: actor.hip,
natural_boobs: actor.naturalBoobs,
height: actor.height,
weight: actor.weight,
hair: actor.hair,
eyes: actor.eyes,
has_tattoos: actor.hasTattoos,
has_piercings: actor.hasPiercings,
tattoos: actor.tattoos,
piercings: actor.piercings,
};
if (actor.id) {
curatedActor.id = actor.id;
}
if (actor.id) {
curatedActor.id = actor.id;
}
if (actor.birthPlace) {
curatedActor.birth_city = actor.birthPlace.city;
curatedActor.birth_state = actor.birthPlace.state;
curatedActor.birth_country_alpha2 = actor.birthPlace.country;
}
if (actor.birthPlace) {
curatedActor.birth_city = actor.birthPlace.city;
curatedActor.birth_state = actor.birthPlace.state;
curatedActor.birth_country_alpha2 = actor.birthPlace.country;
}
if (actor.residencePlace) {
curatedActor.residence_city = actor.residencePlace.city;
curatedActor.residence_state = actor.residencePlace.state;
curatedActor.residence_country_alpha2 = actor.residencePlace.country;
}
if (actor.residencePlace) {
curatedActor.residence_city = actor.residencePlace.city;
curatedActor.residence_state = actor.residencePlace.state;
curatedActor.residence_country_alpha2 = actor.residencePlace.country;
}
if (scraped) {
curatedActor.scraped_at = new Date();
curatedActor.scrape_success = scrapeSuccess;
}
if (scraped) {
curatedActor.scraped_at = new Date();
curatedActor.scrape_success = scrapeSuccess;
}
return curatedActor;
return curatedActor;
}
function curateSocialEntry(url, actorId) {
const platforms = [
// links supplied by PH often look like domain.com/domain.com/username
{
label: 'twitter',
pattern: 'http(s)\\://(*)twitter.com/:username(/)(?*)',
format: username => `https://www.twitter.com/${username}`,
},
{
label: 'youtube',
pattern: 'http(s)\\://(*)youtube.com/channel/:username(?*)',
format: username => `https://www.youtube.com/channel/${username}`,
},
{
label: 'instagram',
pattern: 'http(s)\\://(*)instagram.com/:username(/)(?*)',
format: username => `https://www.instagram.com/${username}`,
},
{
label: 'snapchat',
pattern: 'http(s)\\://(*)snapchat.com/add/:username(/)(?*)',
format: username => `https://www.snapchat.com/add/${username}`,
},
{
label: 'tumblr',
pattern: 'http(s)\\://:username.tumblr.com(*)',
format: username => `https://${username}.tumblr.com`,
},
{
label: 'onlyfans',
pattern: 'http(s)\\://(*)onlyfans.com/:username(/)(?*)',
format: username => `https://www.onlyfans.com/${username}`,
},
{
label: 'fancentro',
pattern: 'http(s)\\://(*)fancentro.com/:username(/)(?*)',
format: username => `https://www.fancentro.com/${username}`,
},
{
label: 'modelhub',
pattern: 'http(s)\\://(*)modelhub.com/:username(/)(?*)',
format: username => `https://www.modelhub.com/${username}`,
},
];
const platforms = [
// links supplied by PH often look like domain.com/domain.com/username
{
label: 'twitter',
pattern: 'http(s)\\://(*)twitter.com/:username(/)(?*)',
format: username => `https://www.twitter.com/${username}`,
},
{
label: 'youtube',
pattern: 'http(s)\\://(*)youtube.com/channel/:username(?*)',
format: username => `https://www.youtube.com/channel/${username}`,
},
{
label: 'instagram',
pattern: 'http(s)\\://(*)instagram.com/:username(/)(?*)',
format: username => `https://www.instagram.com/${username}`,
},
{
label: 'snapchat',
pattern: 'http(s)\\://(*)snapchat.com/add/:username(/)(?*)',
format: username => `https://www.snapchat.com/add/${username}`,
},
{
label: 'tumblr',
pattern: 'http(s)\\://:username.tumblr.com(*)',
format: username => `https://${username}.tumblr.com`,
},
{
label: 'onlyfans',
pattern: 'http(s)\\://(*)onlyfans.com/:username(/)(?*)',
format: username => `https://www.onlyfans.com/${username}`,
},
{
label: 'fancentro',
pattern: 'http(s)\\://(*)fancentro.com/:username(/)(?*)',
format: username => `https://www.fancentro.com/${username}`,
},
{
label: 'modelhub',
pattern: 'http(s)\\://(*)modelhub.com/:username(/)(?*)',
format: username => `https://www.modelhub.com/${username}`,
},
];
const match = platforms.reduce((acc, platform) => {
if (acc) return acc;
const match = platforms.reduce((acc, platform) => {
if (acc) return acc;
const patternMatch = new UrlPattern(platform.pattern).match(url);
const patternMatch = new UrlPattern(platform.pattern).match(url);
if (patternMatch) {
return {
platform: platform.label,
original: url,
username: patternMatch.username,
url: platform.format ? platform.format(patternMatch.username) : url,
};
}
if (patternMatch) {
return {
platform: platform.label,
original: url,
username: patternMatch.username,
url: platform.format ? platform.format(patternMatch.username) : url,
};
}
return null;
}, null) || { url };
return null;
}, null) || { url };
return {
url: match.url,
platform: match.platform,
actor_id: actorId,
};
return {
url: match.url,
platform: match.platform,
actor_id: actorId,
};
}
async function curateSocialEntries(urls, actorId) {
if (!urls) {
return [];
}
if (!urls) {
return [];
}
const existingSocialLinks = await knex('actors_social').where('actor_id', actorId);
const existingSocialLinks = await knex('actors_social').where('actor_id', actorId);
return urls.reduce((acc, url) => {
const socialEntry = curateSocialEntry(url, actorId);
return urls.reduce((acc, url) => {
const socialEntry = curateSocialEntry(url, actorId);
if (acc.some(entry => socialEntry.url.toLowerCase() === entry.url.toLowerCase()) || existingSocialLinks.some(entry => socialEntry.url.toLowerCase() === entry.url.toLowerCase())) {
// prevent duplicates
return acc;
}
if (acc.some(entry => socialEntry.url.toLowerCase() === entry.url.toLowerCase()) || existingSocialLinks.some(entry => socialEntry.url.toLowerCase() === entry.url.toLowerCase())) {
// prevent duplicates
return acc;
}
return [...acc, socialEntry];
}, []);
return [...acc, socialEntry];
}, []);
}
async function fetchActors(queryObject, limit = 100) {
const releases = await knex('actors')
.select(
'actors.*',
'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'birth_countries.alias as birth_country_alias',
'residence_countries.alpha2 as residence_country_alpha2', 'residence_countries.name as residence_country_name', 'residence_countries.alias as residence_country_alias',
)
.leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2')
.leftJoin('countries as residence_countries', 'actors.residence_country_alpha2', 'residence_countries.alpha2')
.orderBy(['actors.name', 'actors.gender'])
.where(builder => whereOr(queryObject, 'actors', builder))
.limit(limit);
const releases = await knex('actors')
.select(
'actors.*',
'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'birth_countries.alias as birth_country_alias',
'residence_countries.alpha2 as residence_country_alpha2', 'residence_countries.name as residence_country_name', 'residence_countries.alias as residence_country_alias',
)
.leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2')
.leftJoin('countries as residence_countries', 'actors.residence_country_alpha2', 'residence_countries.alpha2')
.orderBy(['actors.name', 'actors.gender'])
.where(builder => whereOr(queryObject, 'actors', builder))
.limit(limit);
return curateActors(releases);
return curateActors(releases);
}
async function storeSocialLinks(urls, actorId) {
const curatedSocialEntries = await curateSocialEntries(urls, actorId);
const curatedSocialEntries = await curateSocialEntries(urls, actorId);
await knex('actors_social').insert(curatedSocialEntries);
await knex('actors_social').insert(curatedSocialEntries);
}
async function storeAvatars(avatars, actorId) {
if (!avatars || avatars.length === 0) {
return [];
}
if (!avatars || avatars.length === 0) {
return [];
}
const avatarsBySource = await storeMedia(avatars, 'actor', 'avatar');
await associateMedia({ [actorId]: avatars }, avatarsBySource, 'actor', 'photo', 'avatar');
const avatarsBySource = await storeMedia(avatars, 'actor', 'avatar');
await associateMedia({ [actorId]: avatars }, avatarsBySource, 'actor', 'photo', 'avatar');
return avatarsBySource;
return avatarsBySource;
}
async function storeActor(actor, scraped = false, scrapeSuccess = false) {
const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
const [actorEntry] = await knex('actors')
.insert(curatedActor)
.returning('*');
const [actorEntry] = await knex('actors')
.insert(curatedActor)
.returning('*');
await storeSocialLinks(actor.social, actorEntry.id);
await storeSocialLinks(actor.social, actorEntry.id);
if (actor.avatars) {
await storeAvatars(actor.avatars, actorEntry.id);
}
if (actor.avatars) {
await storeAvatars(actor.avatars, actorEntry.id);
}
logger.info(`Added new entry for actor '${actor.name}'`);
logger.info(`Added new entry for actor '${actor.name}'`);
return actorEntry;
return actorEntry;
}
async function updateActor(actor, scraped = false, scrapeSuccess = false) {
const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
const [actorEntry] = await knex('actors')
.where({ id: actor.id })
.update(curatedActor)
.returning('*');
const [actorEntry] = await knex('actors')
.where({ id: actor.id })
.update(curatedActor)
.returning('*');
await storeSocialLinks(actor.social, actor.id);
await storeSocialLinks(actor.social, actor.id);
logger.info(`Updated entry for actor '${actor.name}'`);
logger.info(`Updated entry for actor '${actor.name}'`);
return actorEntry;
return actorEntry;
}
async function mergeProfiles(profiles, actor) {
if (profiles.filter(Boolean).length === 0) {
return null;
}
if (profiles.filter(Boolean).length === 0) {
return null;
}
const mergedProfile = profiles.reduce((prevProfile, profile) => {
if (profile === null) {
return prevProfile;
}
const mergedProfile = profiles.reduce((prevProfile, profile) => {
if (profile === null) {
return prevProfile;
}
const accProfile = {
id: actor ? actor.id : null,
name: actor ? actor.name : (prevProfile.name || profile.name),
description: prevProfile.description || profile.description,
gender: prevProfile.gender || profile.gender,
birthdate: !prevProfile.birthdate || Number.isNaN(Number(prevProfile.birthdate)) ? profile.birthdate : prevProfile.birthdate,
birthPlace: prevProfile.birthPlace || profile.birthPlace,
residencePlace: prevProfile.residencePlace || profile.residencePlace,
nationality: prevProfile.nationality || profile.nationality, // used to derive country when not available
ethnicity: prevProfile.ethnicity || profile.ethnicity,
bust: prevProfile.bust || (/\d+\w+/.test(profile.bust) ? profile.bust : null),
waist: prevProfile.waist || profile.waist,
hip: prevProfile.hip || profile.hip,
naturalBoobs: prevProfile.naturalBoobs === undefined ? profile.naturalBoobs : prevProfile.naturalBoobs,
height: prevProfile.height || profile.height,
weight: prevProfile.weight || profile.weight,
hair: prevProfile.hair || profile.hair,
eyes: prevProfile.eyes || profile.eyes,
hasPiercings: prevProfile.hasPiercings === undefined ? profile.hasPiercings : prevProfile.hasPiercings,
hasTattoos: prevProfile.hasTattoos === undefined ? profile.hasTattoos : prevProfile.hasTattoos,
piercings: prevProfile.piercings || profile.piercings,
tattoos: prevProfile.tattoos || profile.tattoos,
social: prevProfile.social.concat(profile.social || []),
releases: prevProfile.releases.concat(profile.releases ? profile.releases : []), // don't flatten fallbacks
};
const accProfile = {
id: actor ? actor.id : null,
name: actor ? actor.name : (prevProfile.name || profile.name),
description: prevProfile.description || profile.description,
gender: prevProfile.gender || profile.gender,
birthdate: !prevProfile.birthdate || Number.isNaN(Number(prevProfile.birthdate)) ? profile.birthdate : prevProfile.birthdate,
birthPlace: prevProfile.birthPlace || profile.birthPlace,
residencePlace: prevProfile.residencePlace || profile.residencePlace,
nationality: prevProfile.nationality || profile.nationality, // used to derive country when not available
ethnicity: prevProfile.ethnicity || profile.ethnicity,
bust: prevProfile.bust || (/\d+\w+/.test(profile.bust) ? profile.bust : null),
waist: prevProfile.waist || profile.waist,
hip: prevProfile.hip || profile.hip,
naturalBoobs: prevProfile.naturalBoobs === undefined ? profile.naturalBoobs : prevProfile.naturalBoobs,
height: prevProfile.height || profile.height,
weight: prevProfile.weight || profile.weight,
hair: prevProfile.hair || profile.hair,
eyes: prevProfile.eyes || profile.eyes,
hasPiercings: prevProfile.hasPiercings === undefined ? profile.hasPiercings : prevProfile.hasPiercings,
hasTattoos: prevProfile.hasTattoos === undefined ? profile.hasTattoos : prevProfile.hasTattoos,
piercings: prevProfile.piercings || profile.piercings,
tattoos: prevProfile.tattoos || profile.tattoos,
social: prevProfile.social.concat(profile.social || []),
releases: prevProfile.releases.concat(profile.releases ? profile.releases : []), // don't flatten fallbacks
};
if (profile.avatar) {
const avatar = Array.isArray(profile.avatar)
? profile.avatar.map(avatarX => ({
src: avatarX.src || avatarX,
scraper: profile.scraper,
copyright: avatarX.copyright === undefined ? capitalize(profile.site?.name || profile.scraper) : profile.avatar.copyright,
}))
: {
src: profile.avatar.src || profile.avatar,
scraper: profile.scraper,
copyright: profile.avatar.copyright === undefined ? capitalize(profile.site?.name || profile.scraper) : profile.avatar.copyright,
};
if (profile.avatar) {
const avatar = Array.isArray(profile.avatar)
? profile.avatar.map(avatarX => ({
src: avatarX.src || avatarX,
scraper: profile.scraper,
copyright: avatarX.copyright === undefined ? capitalize(profile.site?.name || profile.scraper) : profile.avatar.copyright,
}))
: {
src: profile.avatar.src || profile.avatar,
scraper: profile.scraper,
copyright: profile.avatar.copyright === undefined ? capitalize(profile.site?.name || profile.scraper) : profile.avatar.copyright,
};
accProfile.avatars = prevProfile.avatars.concat([avatar]); // don't flatten fallbacks
} else {
accProfile.avatars = prevProfile.avatars;
}
accProfile.avatars = prevProfile.avatars.concat([avatar]); // don't flatten fallbacks
} else {
accProfile.avatars = prevProfile.avatars;
}
return accProfile;
}, {
social: [],
avatars: [],
releases: [],
});
return accProfile;
}, {
social: [],
avatars: [],
releases: [],
});
const [birthPlace, residencePlace] = await Promise.all([
resolvePlace(mergedProfile.birthPlace),
resolvePlace(mergedProfile.residencePlace),
]);
const [birthPlace, residencePlace] = await Promise.all([
resolvePlace(mergedProfile.birthPlace),
resolvePlace(mergedProfile.residencePlace),
]);
mergedProfile.birthPlace = birthPlace;
mergedProfile.residencePlace = residencePlace;
mergedProfile.birthPlace = birthPlace;
mergedProfile.residencePlace = residencePlace;
if (!mergedProfile.birthPlace && mergedProfile.nationality) {
const country = await knex('countries')
.where('nationality', 'ilike', `%${mergedProfile.nationality}%`)
.orderBy('priority', 'desc')
.first();
if (!mergedProfile.birthPlace && mergedProfile.nationality) {
const country = await knex('countries')
.where('nationality', 'ilike', `%${mergedProfile.nationality}%`)
.orderBy('priority', 'desc')
.first();
mergedProfile.birthPlace = {
country: country.alpha2,
};
}
mergedProfile.birthPlace = {
country: country.alpha2,
};
}
return mergedProfile;
return mergedProfile;
}
async function scrapeProfiles(sources, actorName, actorEntry, sitesBySlug) {
return Promise.map(sources, async (source) => {
// const [scraperSlug, scraper] = source;
const profileScrapers = [].concat(source).map(slug => ({ scraperSlug: slug, scraper: scrapers.actors[slug] }));
return Promise.map(sources, async (source) => {
// const [scraperSlug, scraper] = source;
const profileScrapers = [].concat(source).map(slug => ({ scraperSlug: slug, scraper: scrapers.actors[slug] }));
try {
return await profileScrapers.reduce(async (outcome, { scraper, scraperSlug }) => outcome.catch(async () => {
if (!scraper) {
logger.warn(`No profile profile scraper available for ${scraperSlug}`);
throw Object.assign(new Error(`No profile scraper available for ${scraperSlug}`));
}
try {
return await profileScrapers.reduce(async (outcome, { scraper, scraperSlug }) => outcome.catch(async () => {
if (!scraper) {
logger.warn(`No profile profile scraper available for ${scraperSlug}`);
throw Object.assign(new Error(`No profile scraper available for ${scraperSlug}`));
}
logger.verbose(`Searching '${actorName}' on ${scraperSlug}`);
logger.verbose(`Searching '${actorName}' on ${scraperSlug}`);
const site = sitesBySlug[scraperSlug] || null;
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, include);
const site = sitesBySlug[scraperSlug] || null;
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, include);
if (profile && typeof profile !== 'number') {
logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`);
if (profile && typeof profile !== 'number') {
logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`);
return {
...profile,
name: actorName,
scraper: scraperSlug,
site,
releases: profile.releases?.map(release => (typeof release === 'string'
? { url: release, site }
: { ...release, site: release.site || site }
)),
};
}
return {
...profile,
name: actorName,
scraper: scraperSlug,
site,
releases: profile.releases?.map(release => (typeof release === 'string'
? { url: release, site }
: { ...release, site: release.site || site }
)),
};
}
logger.verbose(`No profile for '${actorName}' available on ${scraperSlug}: ${profile}`);
throw Object.assign(new Error(`Profile for ${actorName} not available on ${scraperSlug}`), { warn: false });
}), Promise.reject(new Error()));
} catch (error) {
if (error.warn !== false) {
logger.warn(`Error in scraper ${source}: ${error.message}`);
// logger.error(error.stack);
}
}
logger.verbose(`No profile for '${actorName}' available on ${scraperSlug}: ${profile}`);
throw Object.assign(new Error(`Profile for ${actorName} not available on ${scraperSlug}`), { warn: false });
}), Promise.reject(new Error()));
} catch (error) {
if (error.warn !== false) {
logger.warn(`Error in scraper ${source}: ${error.message}`);
// logger.error(error.stack);
}
}
return null;
});
return null;
});
}
async function scrapeActors(actorNames) {
return Promise.map(actorNames || argv.actors, async (actorName) => {
try {
const actorSlug = slugify(actorName);
const actorEntry = await knex('actors').where({ slug: actorSlug }).first();
const sources = argv.sources || config.profiles || Object.keys(scrapers.actors);
return Promise.map(actorNames || argv.actors, async (actorName) => {
try {
const actorSlug = slugify(actorName);
const actorEntry = await knex('actors').where({ slug: actorSlug }).first();
const sources = argv.sources || config.profiles || Object.keys(scrapers.actors);
const finalSources = argv.withReleases ? sources.flat() : sources; // ignore race-to-success grouping when scenes are requested
const finalSources = argv.withReleases ? sources.flat() : sources; // ignore race-to-success grouping when scenes are requested
const [siteEntries, networkEntries] = await Promise.all([
knex('sites')
.leftJoin('networks', 'sites.network_id', 'networks.id')
.select(
'sites.*',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
)
.whereIn('sites.slug', finalSources.flat()),
knex('networks').select('*').whereIn('slug', finalSources.flat()),
]);
const [siteEntries, networkEntries] = await Promise.all([
knex('sites')
.leftJoin('networks', 'sites.network_id', 'networks.id')
.select(
'sites.*',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
)
.whereIn('sites.slug', finalSources.flat()),
knex('networks').select('*').whereIn('slug', finalSources.flat()),
]);
const sites = await curateSites(siteEntries, true);
const networks = networkEntries.map(network => ({ ...network, isFallback: true }));
const sitesBySlug = [].concat(networks, sites).reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
const sites = await curateSites(siteEntries, true);
const networks = networkEntries.map(network => ({ ...network, isFallback: true }));
const sitesBySlug = [].concat(networks, sites).reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
const profiles = await scrapeProfiles(sources, actorName, actorEntry, sitesBySlug);
const profile = await mergeProfiles(profiles, actorEntry);
const profiles = await scrapeProfiles(sources, actorName, actorEntry, sitesBySlug);
const profile = await mergeProfiles(profiles, actorEntry);
if (profile === null) {
logger.warn(`Could not find profile for actor '${actorName}'`);
if (profile === null) {
logger.warn(`Could not find profile for actor '${actorName}'`);
if (argv.save && !actorEntry) {
await storeActor({ name: actorName }, false, false);
}
if (argv.save && !actorEntry) {
await storeActor({ name: actorName }, false, false);
}
return null;
}
return null;
}
if (argv.inspect) {
console.log(profile);
logger.info(`Found ${profile.releases.length} releases for ${actorName}`);
}
if (argv.inspect) {
console.log(profile);
logger.info(`Found ${profile.releases.length} releases for ${actorName}`);
}
if (argv.save) {
if (actorEntry && profile) {
await Promise.all([
updateActor(profile, true, true),
storeAvatars(profile.avatars, actorEntry.id),
]);
if (argv.save) {
if (actorEntry && profile) {
await Promise.all([
updateActor(profile, true, true),
storeAvatars(profile.avatars, actorEntry.id),
]);
return profile;
}
return profile;
}
await storeActor(profile, true, true);
}
await storeActor(profile, true, true);
}
return profile;
} catch (error) {
console.log(error);
logger.warn(`${actorName}: ${error}`);
return profile;
} catch (error) {
console.log(error);
logger.warn(`${actorName}: ${error}`);
return null;
}
}, {
concurrency: 3,
});
return null;
}
}, {
concurrency: 3,
});
}
async function scrapeBasicActors() {
const basicActors = await knex('actors').where('scraped_at', null);
const basicActors = await knex('actors').where('scraped_at', null);
return scrapeActors(basicActors.map(actor => actor.name));
return scrapeActors(basicActors.map(actor => actor.name));
}
async function associateActors(mappedActors, releases) {
const [existingActorEntries, existingAssociationEntries] = await Promise.all([
knex('actors')
.whereIn('name', Object.values(mappedActors).map(actor => actor.name))
.orWhereIn('slug', Object.keys(mappedActors)),
knex('releases_actors').whereIn('release_id', releases.map(release => release.id)),
]);
const [existingActorEntries, existingAssociationEntries] = await Promise.all([
knex('actors')
.whereIn('name', Object.values(mappedActors).map(actor => actor.name))
.orWhereIn('slug', Object.keys(mappedActors)),
knex('releases_actors').whereIn('release_id', releases.map(release => release.id)),
]);
const associations = await Promise.map(Object.entries(mappedActors), async ([actorSlug, actor]) => {
try {
const actorEntry = existingActorEntries.find(actorX => actorX.slug === actorSlug)
const associations = await Promise.map(Object.entries(mappedActors), async ([actorSlug, actor]) => {
try {
const actorEntry = existingActorEntries.find(actorX => actorX.slug === actorSlug)
|| await storeActor(actor);
// if a scene
return Array.from(actor.releaseIds)
.map(releaseId => ({
release_id: releaseId,
actor_id: actorEntry.id,
}))
.filter(association => !existingAssociationEntries
// remove associations already in database
.some(associationEntry => associationEntry.actor_id === association.actor_id
// if a scene
return Array.from(actor.releaseIds)
.map(releaseId => ({
release_id: releaseId,
actor_id: actorEntry.id,
}))
.filter(association => !existingAssociationEntries
// remove associations already in database
.some(associationEntry => associationEntry.actor_id === association.actor_id
&& associationEntry.release_id === association.release_id));
} catch (error) {
logger.error(actor.name, error);
return null;
}
});
} catch (error) {
logger.error(actor.name, error);
return null;
}
});
await knex('releases_actors').insert(associations.filter(association => association).flat());
await knex('releases_actors').insert(associations.filter(association => association).flat());
// basic actor scraping is failure prone, don't run together with actor association
// await scrapebasicactors(),
// basic actor scraping is failure prone, don't run together with actor association
// await scrapebasicactors(),
}
module.exports = {
associateActors,
fetchActors,
scrapeActors,
scrapeBasicActors,
associateActors,
fetchActors,
scrapeActors,
scrapeBasicActors,
};

View File

@@ -1,125 +1,156 @@
'use strict';
const config = require('config');
const Promise = require('bluebird');
// const logger = require('./logger')(__filename);
const knex = require('./knex');
const scrapers = require('./scrapers/scrapers');
const argv = require('./argv');
const slugify = require('./utils/slugify');
const capitalize = require('./utils/capitalize');
function toBaseActors(actorsOrNames, release) {
return actorsOrNames.map((actorOrName) => {
const name = capitalize(actorOrName.name || actorOrName);
const slug = slugify(name);
return actorsOrNames.map((actorOrName) => {
const name = capitalize(actorOrName.name || actorOrName);
const slug = slugify(name);
const baseActor = {
name,
slug,
network: release.site.network,
};
const baseActor = {
name,
slug,
network: release?.site.network,
};
if (actorOrName.name) {
return {
...actorOrName,
...baseActor,
};
}
if (actorOrName.name) {
return {
...actorOrName,
...baseActor,
};
}
return baseActor;
});
return baseActor;
});
}
function curateActorEntry(baseActor, batchId) {
return {
name: baseActor.name,
slug: baseActor.slug,
network_id: null,
batch_id: batchId,
};
return {
name: baseActor.name,
slug: baseActor.slug,
network_id: null,
batch_id: batchId,
};
}
function curateActorEntries(baseActors, batchId) {
return baseActors.map(baseActor => curateActorEntry(baseActor, batchId));
return baseActors.map(baseActor => curateActorEntry(baseActor, batchId));
}
async function scrapeProfiles() {
async function scrapeActors(actorNames) {
const baseActors = toBaseActors(actorNames);
const sources = argv.sources || config.profiles || Object.keys(scrapers.actors);
const siteSlugs = sources.flat();
const [networks, sites, existingActorEntries] = await Promise.all([
knex('networks').whereIn('slug', siteSlugs),
knex('sites').whereIn('slug', siteSlugs),
knex('actors')
.select(['id', 'name', 'slug'])
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
.whereNull('network_id'),
]);
const existingActorEntriesBySlug = existingActorEntries.reduce((acc, actorEntry) => ({ ...acc, [actorEntry.slug]: actorEntry }), {});
const networksBySlug = networks.reduce((acc, network) => ({ ...acc, [network.slug]: { ...network, isNetwork: true } }), {});
const sitesBySlug = sites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlug[baseActor.slug]);
const [batchId] = newBaseActors.length > 0 ? await knex('batches').insert({ comment: null }).returning('id') : [null];
const curatedActorEntries = batchId && curateActorEntries(newBaseActors, batchId);
const newActorEntries = batchId && await knex('actors').insert(curatedActorEntries).returning(['id', 'name', 'slug']);
const actorEntries = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []);
console.log(actorEntries, newActorEntries, actorEntries);
}
async function getOrCreateActors(baseActors, batchId) {
const existingActors = await knex('actors')
.select('id', 'alias_for', 'name', 'slug', 'network_id')
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
.whereNull('network_id')
.orWhereIn(['slug', 'network_id'], baseActors.map(baseActor => [baseActor.slug, baseActor.network.id]));
const existingActors = await knex('actors')
.select('id', 'alias_for', 'name', 'slug', 'network_id')
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
.whereNull('network_id')
.orWhereIn(['slug', 'network_id'], baseActors.map(baseActor => [baseActor.slug, baseActor.network.id]));
// const existingActorSlugs = new Set(existingActors.map(actor => actor.slug));
const existingActorSlugs = existingActors.reduce((acc, actor) => ({
...acc,
[actor.network_id]: {
...acc[actor.network_id],
[actor.slug]: true,
},
}), {});
// const existingActorSlugs = new Set(existingActors.map(actor => actor.slug));
const existingActorSlugs = existingActors.reduce((acc, actor) => ({
...acc,
[actor.network_id]: {
...acc[actor.network_id],
[actor.slug]: true,
},
}), {});
const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.network.id]?.[baseActor.slug] && !existingActorSlugs.null?.[baseActor.slug]);
const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.network.id]?.[baseActor.slug] && !existingActorSlugs.null?.[baseActor.slug]);
const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId);
const newActors = await knex('actors').insert(curatedActorEntries, ['id', 'alias_for', 'name', 'slug', 'network_id']);
const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId);
const newActors = await knex('actors').insert(curatedActorEntries, ['id', 'alias_for', 'name', 'slug', 'network_id']);
if (Array.isArray(newActors)) {
return newActors.concat(existingActors);
}
if (Array.isArray(newActors)) {
return newActors.concat(existingActors);
}
return existingActors;
return existingActors;
}
async function associateActors(releases, batchId) {
const baseActorsByReleaseId = releases.reduce((acc, release) => {
if (release.actors) {
acc[release.id] = toBaseActors(release.actors, release);
}
const baseActorsByReleaseId = releases.reduce((acc, release) => {
if (release.actors) {
acc[release.id] = toBaseActors(release.actors, release);
}
return acc;
}, {});
return acc;
}, {});
const baseActors = Object.values(baseActorsByReleaseId).flat();
const baseActors = Object.values(baseActorsByReleaseId).flat();
if (baseActors.length === 0) {
return;
}
if (baseActors.length === 0) {
return;
}
const baseActorsBySlugAndNetworkId = baseActors.reduce((acc, baseActor) => ({
...acc,
[baseActor.slug]: {
...acc[baseActor.slug],
[baseActor.network.id]: baseActor,
},
}), {});
const baseActorsBySlugAndNetworkId = baseActors.reduce((acc, baseActor) => ({
...acc,
[baseActor.slug]: {
...acc[baseActor.slug],
[baseActor.network.id]: baseActor,
},
}), {});
const uniqueBaseActors = Object.values(baseActorsBySlugAndNetworkId).map(baseActorsByNetworkId => Object.values(baseActorsByNetworkId)).flat();
const uniqueBaseActors = Object.values(baseActorsBySlugAndNetworkId).map(baseActorsByNetworkId => Object.values(baseActorsByNetworkId)).flat();
const actors = await getOrCreateActors(uniqueBaseActors, batchId);
console.log(actors);
const actorIdsBySlugAndNetworkId = actors.reduce((acc, actor) => ({
...acc,
[actor.network_id]: {
...acc[actor.network_id],
[actor.slug]: actor.alias_for || actor.id,
},
}), {});
const actors = await getOrCreateActors(uniqueBaseActors, batchId);
console.log(actorIdsBySlugAndNetworkId);
const actorIdsBySlugAndNetworkId = actors.reduce((acc, actor) => ({
...acc,
[actor.network_id]: {
...acc[actor.network_id],
[actor.slug]: actor.alias_for || actor.id,
},
}), {});
const releaseActorAssociations = Object.entries(baseActorsByReleaseId)
.map(([releaseId, releaseActors]) => releaseActors
.map(releaseActor => ({
release_id: releaseId,
actor_id: actorIdsBySlugAndNetworkId[releaseActor.network.id]?.[releaseActor.slug] || actorIdsBySlugAndNetworkId.null[releaseActor.slug],
})))
.flat();
const releaseActorAssociations = Object.entries(baseActorsByReleaseId)
.map(([releaseId, releaseActors]) => releaseActors
.map(releaseActor => ({
release_id: releaseId,
actor_id: actorIdsBySlugAndNetworkId[releaseActor.network.id]?.[releaseActor.slug] || actorIdsBySlugAndNetworkId.null[releaseActor.slug],
})))
.flat();
await knex.raw(`${knex('releases_actors').insert(releaseActorAssociations).toString()} ON CONFLICT DO NOTHING;`);
await knex.raw(`${knex('releases_actors').insert(releaseActorAssociations).toString()} ON CONFLICT DO NOTHING;`);
}
module.exports = {
associateActors,
associateActors,
scrapeActors,
};

View File

@@ -7,39 +7,39 @@ const knex = require('./knex');
const fetchUpdates = require('./updates');
const { fetchScenes, fetchMovies } = require('./deep');
const { storeReleases, updateReleasesSearch } = require('./store-releases');
const { scrapeActors } = require('./actors-legacy');
const { scrapeActors } = require('./actors');
async function init() {
if (argv.server) {
await initServer();
return;
}
if (argv.server) {
await initServer();
return;
}
if (argv.updateSearch) {
await updateReleasesSearch();
}
if (argv.updateSearch) {
await updateReleasesSearch();
}
if (argv.actors) {
await scrapeActors(argv.actors);
}
if (argv.actors) {
await scrapeActors(argv.actors);
}
const updateBaseScenes = (argv.scrape || argv.sites || argv.networks) && await fetchUpdates();
const updateBaseScenes = (argv.scrape || argv.sites || argv.networks) && await fetchUpdates();
const deepScenes = argv.deep
? await fetchScenes([...(argv.scenes || []), ...(updateBaseScenes || [])])
: updateBaseScenes;
const deepScenes = argv.deep
? await fetchScenes([...(argv.scenes || []), ...(updateBaseScenes || [])])
: updateBaseScenes;
const sceneMovies = deepScenes && argv.sceneMovies && deepScenes.map(scene => scene.movie).filter(Boolean);
const deepMovies = await fetchMovies([...(argv.movies || []), ...(sceneMovies || [])]);
const sceneMovies = deepScenes && argv.sceneMovies && deepScenes.map(scene => scene.movie).filter(Boolean);
const deepMovies = await fetchMovies([...(argv.movies || []), ...(sceneMovies || [])]);
if (argv.save) {
await storeReleases([
...(deepScenes || []),
...(deepMovies || []),
]);
}
if (argv.save) {
await storeReleases([
...(deepScenes || []),
...(deepMovies || []),
]);
}
knex.destroy();
knex.destroy();
}
module.exports = init;

View File

@@ -4,188 +4,188 @@ const config = require('config');
const yargs = require('yargs');
const { argv } = yargs
.command('npm start')
.option('server', {
describe: 'Start web server',
type: 'boolean',
alias: 'web',
})
.option('scrape', {
describe: 'Scrape sites and networks defined in configuration',
type: 'boolean',
})
.option('networks', {
describe: 'Networks to scrape (overrides configuration)',
type: 'array',
alias: 'network',
})
.option('sites', {
describe: 'Sites to scrape (overrides configuration)',
type: 'array',
alias: 'site',
})
.option('actors', {
describe: 'Scrape actors by name or slug',
type: 'array',
alias: 'actor',
})
.option('actor-scenes', {
describe: 'Fetch all scenes for an actor',
type: 'boolean',
alias: 'with-releases',
default: false,
})
.option('movie-scenes', {
describe: 'Fetch all scenes for a movie',
type: 'boolean',
alias: 'with-releases',
default: false,
})
.option('scene-movies', {
describe: 'Fetch movies for scenes',
type: 'boolean',
default: true,
})
.option('profiles', {
describe: 'Scrape profiles for new actors after fetching scenes',
type: 'boolean',
alias: 'bios',
default: false,
})
.option('scene', {
describe: 'Scrape scene info from URL',
type: 'array',
alias: 'scenes',
})
.option('movie', {
describe: 'Scrape movie info from URL',
type: 'array',
alias: 'movies',
})
.option('sources', {
describe: 'Use these scrapers for actor data',
type: 'array',
alias: 'source',
})
.option('deep', {
describe: 'Fetch details for all releases',
type: 'boolean',
default: true,
})
.option('latest', {
describe: 'Scrape latest releases if available',
type: 'boolean',
default: true,
})
.option('upcoming', {
describe: 'Scrape upcoming releases if available',
type: 'boolean',
default: true,
})
.option('redownload', {
describe: 'Don\'t ignore duplicates, update existing entries',
type: 'boolean',
alias: 'force',
})
.option('after', {
describe: 'Don\'t fetch scenes older than',
type: 'string',
default: config.fetchAfter.join(' '),
})
.option('last', {
describe: 'Get the latest x releases, no matter the date range',
type: 'number',
})
.option('null-date-limit', {
describe: 'Limit amount of scenes when dates are missing.',
type: 'number',
default: config.nullDateLimit,
alias: 'limit',
})
.option('page', {
describe: 'Page to start scraping at',
type: 'number',
default: 1,
})
.option('save', {
describe: 'Save fetched releases to database',
type: 'boolean',
default: true,
})
.option('media', {
describe: 'Include any release media',
type: 'boolean',
default: true,
})
.option('media-limit', {
describe: 'Maximum amount of assets of each type per release',
type: 'number',
default: config.media.limit,
})
.option('images', {
describe: 'Include any photos, posters or covers',
type: 'boolean',
default: true,
alias: 'pics',
})
.option('videos', {
describe: 'Include any trailers or teasers',
type: 'boolean',
default: true,
})
.option('posters', {
describe: 'Include release posters',
type: 'boolean',
default: true,
alias: 'poster',
})
.option('covers', {
describe: 'Include release covers',
type: 'boolean',
default: true,
alias: 'cover',
})
.option('photos', {
describe: 'Include release photos',
type: 'boolean',
default: true,
})
.option('trailers', {
describe: 'Include release trailers',
type: 'boolean',
default: true,
alias: 'trailer',
})
.option('teasers', {
describe: 'Include release teasers',
type: 'boolean',
default: true,
alias: 'teaser',
})
.option('avatars', {
describe: 'Include actor avatars',
type: 'boolean',
default: true,
})
.option('inspect', {
describe: 'Show data in console.',
type: 'boolean',
default: false,
})
.option('level', {
describe: 'Log level',
type: 'string',
default: process.env.NODE_ENV === 'development' ? 'silly' : 'info',
})
.option('debug', {
describe: 'Show error stack traces',
type: 'boolean',
default: process.env.NODE_ENV === 'development',
})
.option('update-search', {
describe: 'Update search documents for all releases.',
type: 'boolean',
default: false,
});
.command('npm start')
.option('server', {
describe: 'Start web server',
type: 'boolean',
alias: 'web',
})
.option('scrape', {
describe: 'Scrape sites and networks defined in configuration',
type: 'boolean',
})
.option('networks', {
describe: 'Networks to scrape (overrides configuration)',
type: 'array',
alias: 'network',
})
.option('sites', {
describe: 'Sites to scrape (overrides configuration)',
type: 'array',
alias: 'site',
})
.option('actors', {
describe: 'Scrape actors by name or slug',
type: 'array',
alias: 'actor',
})
.option('actor-scenes', {
describe: 'Fetch all scenes for an actor',
type: 'boolean',
alias: 'with-releases',
default: false,
})
.option('movie-scenes', {
describe: 'Fetch all scenes for a movie',
type: 'boolean',
alias: 'with-releases',
default: false,
})
.option('scene-movies', {
describe: 'Fetch movies for scenes',
type: 'boolean',
default: true,
})
.option('profiles', {
describe: 'Scrape profiles for new actors after fetching scenes',
type: 'boolean',
alias: 'bios',
default: false,
})
.option('scene', {
describe: 'Scrape scene info from URL',
type: 'array',
alias: 'scenes',
})
.option('movie', {
describe: 'Scrape movie info from URL',
type: 'array',
alias: 'movies',
})
.option('sources', {
describe: 'Use these scrapers for actor data',
type: 'array',
alias: 'source',
})
.option('deep', {
describe: 'Fetch details for all releases',
type: 'boolean',
default: true,
})
.option('latest', {
describe: 'Scrape latest releases if available',
type: 'boolean',
default: true,
})
.option('upcoming', {
describe: 'Scrape upcoming releases if available',
type: 'boolean',
default: true,
})
.option('redownload', {
describe: 'Don\'t ignore duplicates, update existing entries',
type: 'boolean',
alias: 'force',
})
.option('after', {
describe: 'Don\'t fetch scenes older than',
type: 'string',
default: config.fetchAfter.join(' '),
})
.option('last', {
describe: 'Get the latest x releases, no matter the date range',
type: 'number',
})
.option('null-date-limit', {
describe: 'Limit amount of scenes when dates are missing.',
type: 'number',
default: config.nullDateLimit,
alias: 'limit',
})
.option('page', {
describe: 'Page to start scraping at',
type: 'number',
default: 1,
})
.option('save', {
describe: 'Save fetched releases to database',
type: 'boolean',
default: true,
})
.option('media', {
describe: 'Include any release media',
type: 'boolean',
default: true,
})
.option('media-limit', {
describe: 'Maximum amount of assets of each type per release',
type: 'number',
default: config.media.limit,
})
.option('images', {
describe: 'Include any photos, posters or covers',
type: 'boolean',
default: true,
alias: 'pics',
})
.option('videos', {
describe: 'Include any trailers or teasers',
type: 'boolean',
default: true,
})
.option('posters', {
describe: 'Include release posters',
type: 'boolean',
default: true,
alias: 'poster',
})
.option('covers', {
describe: 'Include release covers',
type: 'boolean',
default: true,
alias: 'cover',
})
.option('photos', {
describe: 'Include release photos',
type: 'boolean',
default: true,
})
.option('trailers', {
describe: 'Include release trailers',
type: 'boolean',
default: true,
alias: 'trailer',
})
.option('teasers', {
describe: 'Include release teasers',
type: 'boolean',
default: true,
alias: 'teaser',
})
.option('avatars', {
describe: 'Include actor avatars',
type: 'boolean',
default: true,
})
.option('inspect', {
describe: 'Show data in console.',
type: 'boolean',
default: false,
})
.option('level', {
describe: 'Log level',
type: 'string',
default: process.env.NODE_ENV === 'development' ? 'silly' : 'info',
})
.option('debug', {
describe: 'Show error stack traces',
type: 'boolean',
default: process.env.NODE_ENV === 'development',
})
.option('update-search', {
describe: 'Update search documents for all releases.',
type: 'boolean',
default: false,
});
module.exports = argv;

View File

@@ -11,159 +11,160 @@ const { curateSites } = require('./sites');
const { curateNetworks } = require('./networks');
function urlToSiteSlug(url) {
try {
const slug = new URL(url)
.hostname
.match(/([\w-]+)\.\w+$/)?.[1];
try {
const slug = new URL(url)
.hostname
.match(/([\w-]+)\.\w+$/)?.[1];
return slug;
} catch (error) {
logger.warn(`Failed to derive site slug from '${url}': ${error.message}`);
return slug;
} catch (error) {
logger.warn(`Failed to derive site slug from '${url}': ${error.message}`);
return null;
}
return null;
}
}
async function findSites(baseReleases) {
const baseReleasesWithoutSite = baseReleases.filter(release => release.url && !release.site);
const baseReleasesWithoutSite = baseReleases.filter(release => release.url && !release.site);
const siteSlugs = Array.from(new Set(
baseReleasesWithoutSite
.map(baseRelease => urlToSiteSlug(baseRelease.url))
.filter(Boolean),
));
const siteSlugs = Array.from(new Set(
baseReleasesWithoutSite
.map(baseRelease => urlToSiteSlug(baseRelease.url))
.filter(Boolean),
));
const siteEntries = await knex('sites')
.leftJoin('networks', 'networks.id', 'sites.network_id')
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.parameters as network_parameters', 'networks.description as network_description')
.whereIn('sites.slug', siteSlugs);
const siteEntries = await knex('sites')
.leftJoin('networks', 'networks.id', 'sites.network_id')
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.parameters as network_parameters', 'networks.description as network_description')
.whereIn('sites.slug', siteSlugs);
const networkEntries = await knex('networks').whereIn('slug', siteSlugs);
const networkEntries = await knex('networks').whereIn('slug', siteSlugs);
const sites = await curateSites(siteEntries, true, false);
const networks = await curateNetworks(networkEntries, true, false, false);
const markedNetworks = networks.map(network => ({ ...network, isFallback: true }));
const sites = await curateSites(siteEntries, true, false);
const networks = await curateNetworks(networkEntries, true, false, false);
const markedNetworks = networks.map(network => ({ ...network, isNetwork: true }));
const sitesBySlug = []
.concat(markedNetworks, sites)
.reduce((accSites, site) => ({ ...accSites, [site.slug]: site }), {});
const sitesBySlug = []
.concat(markedNetworks, sites)
.reduce((accSites, site) => ({ ...accSites, [site.slug]: site }), {});
return sitesBySlug;
return sitesBySlug;
}
function toBaseReleases(baseReleasesOrUrls) {
return baseReleasesOrUrls
.map((baseReleaseOrUrl) => {
if (baseReleaseOrUrl.url) {
// base release with URL
return {
...baseReleaseOrUrl,
deep: false,
};
}
return baseReleasesOrUrls
.map((baseReleaseOrUrl) => {
if (baseReleaseOrUrl.url) {
// base release with URL
return {
...baseReleaseOrUrl,
deep: false,
};
}
if (/^http/.test(baseReleaseOrUrl)) {
// URL
return {
url: baseReleaseOrUrl,
deep: false,
};
}
if (/^http/.test(baseReleaseOrUrl)) {
// URL
return {
url: baseReleaseOrUrl,
deep: false,
};
}
if (typeof baseReleaseOrUrl === 'object' && !Array.isArray(baseReleaseOrUrl)) {
// base release without URL, prepare for passthrough
return {
...baseReleaseOrUrl,
deep: false,
};
}
if (typeof baseReleaseOrUrl === 'object' && !Array.isArray(baseReleaseOrUrl)) {
// base release without URL, prepare for passthrough
return {
...baseReleaseOrUrl,
deep: false,
};
}
logger.warn(`Malformed base release, discarding '${baseReleaseOrUrl}'`);
return null;
})
.filter(Boolean);
logger.warn(`Malformed base release, discarding '${baseReleaseOrUrl}'`);
return null;
})
.filter(Boolean);
}
async function scrapeRelease(baseRelease, sites, type = 'scene') {
const site = baseRelease.site || sites[urlToSiteSlug(baseRelease.url)];
const site = baseRelease.site || sites[urlToSiteSlug(baseRelease.url)];
const siteWithFallbackNetwork = site.isNetwork ? { ...site, network: site } : site; // make site.network available, even when site is network fallback
if (!site) {
logger.warn(`No site available for ${baseRelease.url}`);
return baseRelease;
}
if (!site) {
logger.warn(`No site available for ${baseRelease.url}`);
return baseRelease;
}
if ((!baseRelease.url && !baseRelease.path) || !argv.deep) {
return {
...baseRelease,
site,
};
}
if ((!baseRelease.url && !baseRelease.path) || !argv.deep) {
return {
...baseRelease,
site,
};
}
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
if (!scraper) {
logger.warn(`Could not find scraper for ${baseRelease.url}`);
return baseRelease;
}
if (!scraper) {
logger.warn(`Could not find scraper for ${baseRelease.url}`);
return baseRelease;
}
if ((type === 'scene' && !scraper.fetchScene) || (type === 'movie' && !scraper.fetchMovie)) {
logger.warn(`The '${site.name}'-scraper cannot fetch individual ${type}s`);
return baseRelease;
}
if ((type === 'scene' && !scraper.fetchScene) || (type === 'movie' && !scraper.fetchMovie)) {
logger.warn(`The '${site.name}'-scraper cannot fetch individual ${type}s`);
return baseRelease;
}
try {
logger.verbose(`Fetching ${type} ${baseRelease.url}`);
try {
logger.verbose(`Fetching ${type} ${baseRelease.url}`);
const scrapedRelease = type === 'scene'
? await scraper.fetchScene(baseRelease.url, site, baseRelease, null, include)
: await scraper.fetchMovie(baseRelease.url, site, baseRelease, null, include);
const scrapedRelease = type === 'scene'
? await scraper.fetchScene(baseRelease.url, siteWithFallbackNetwork, baseRelease, null, include)
: await scraper.fetchMovie(baseRelease.url, siteWithFallbackNetwork, baseRelease, null, include);
const mergedRelease = {
...baseRelease,
...scrapedRelease,
deep: !!scrapedRelease,
site,
};
const mergedRelease = {
...baseRelease,
...scrapedRelease,
deep: !!scrapedRelease,
site,
};
if (scrapedRelease && baseRelease?.tags) {
// accumulate all available tags
mergedRelease.tags = baseRelease.tags.concat(scrapedRelease.tags);
}
if (scrapedRelease && baseRelease?.tags) {
// accumulate all available tags
mergedRelease.tags = baseRelease.tags.concat(scrapedRelease.tags);
}
return mergedRelease;
} catch (error) {
logger.error(`Deep scrape failed for ${baseRelease.url}: ${error.message}`);
return baseRelease;
}
return mergedRelease;
} catch (error) {
logger.error(`Deep scrape failed for ${baseRelease.url}: ${error.message}`);
return baseRelease;
}
}
async function scrapeReleases(baseReleases, sites, type) {
return Promise.map(
baseReleases,
async baseRelease => scrapeRelease(baseRelease, sites, type),
{ concurrency: 10 },
);
return Promise.map(
baseReleases,
async baseRelease => scrapeRelease(baseRelease, sites, type),
{ concurrency: 10 },
);
}
async function fetchReleases(baseReleasesOrUrls, type = 'scene') {
const baseReleases = toBaseReleases(baseReleasesOrUrls);
const sites = await findSites(baseReleases);
const baseReleases = toBaseReleases(baseReleasesOrUrls);
const sites = await findSites(baseReleases);
const deepReleases = await scrapeReleases(baseReleases, sites, type);
const deepReleases = await scrapeReleases(baseReleases, sites, type);
return deepReleases;
return deepReleases;
}
async function fetchScenes(baseReleasesOrUrls) {
return fetchReleases(baseReleasesOrUrls, 'scene');
return fetchReleases(baseReleasesOrUrls, 'scene');
}
async function fetchMovies(baseReleasesOrUrls) {
return fetchReleases(baseReleasesOrUrls, 'movie');
return fetchReleases(baseReleasesOrUrls, 'movie');
}
module.exports = {
fetchReleases,
fetchScenes,
fetchMovies,
fetchReleases,
fetchScenes,
fetchMovies,
};

View File

@@ -4,8 +4,8 @@ const config = require('config');
const knex = require('knex');
module.exports = knex({
client: 'pg',
connection: config.database,
// performance overhead, don't use asyncStackTraces in production
asyncStackTraces: process.env.NODE_ENV === 'development',
client: 'pg',
connection: config.database,
// performance overhead, don't use asyncStackTraces in production
asyncStackTraces: process.env.NODE_ENV === 'development',
});

View File

@@ -9,31 +9,31 @@ require('winston-daily-rotate-file');
const args = require('./argv');
function logger(filepath) {
const root = filepath.match(/src\/|dist\//);
const filename = filepath.slice(root.index + root[0].length)
.replace(path.extname(filepath), '');
const root = filepath.match(/src\/|dist\//);
const filename = filepath.slice(root.index + root[0].length)
.replace(path.extname(filepath), '');
return winston.createLogger({
format: winston.format.combine(
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
winston.format(info => (info instanceof Error
? { ...info, message: info.stack }
: { ...info, message: typeof info.message === 'string' ? info.message : util.inspect(info.message) }))(),
winston.format.colorize(),
winston.format.printf(({ level, timestamp, label, message }) => `${timestamp} ${level} [${label || filename}] ${message}`),
),
transports: [
new winston.transports.Console({
level: args.level,
timestamp: true,
}),
new winston.transports.DailyRotateFile({
datePattern: 'YYYY-MM-DD',
filename: 'log/%DATE%.log',
level: 'silly',
}),
],
});
return winston.createLogger({
format: winston.format.combine(
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
winston.format(info => (info instanceof Error
? { ...info, message: info.stack }
: { ...info, message: typeof info.message === 'string' ? info.message : util.inspect(info.message) }))(),
winston.format.colorize(),
winston.format.printf(({ level, timestamp, label, message }) => `${timestamp} ${level} [${label || filename}] ${message}`),
),
transports: [
new winston.transports.Console({
level: args.level,
timestamp: true,
}),
new winston.transports.DailyRotateFile({
datePattern: 'YYYY-MM-DD',
filename: 'log/%DATE%.log',
level: 'silly',
}),
],
});
}
module.exports = logger;

File diff suppressed because it is too large Load Diff

View File

@@ -5,77 +5,77 @@ const whereOr = require('./utils/where-or');
const { fetchSites } = require('./sites');
async function curateNetwork(network, includeParameters = false, includeSites = true, includeStudios = false) {
const curatedNetwork = {
id: network.id,
name: network.name,
url: network.url,
description: network.description,
slug: network.slug,
parameters: includeParameters ? network.parameters : null,
};
const curatedNetwork = {
id: network.id,
name: network.name,
url: network.url,
description: network.description,
slug: network.slug,
parameters: includeParameters ? network.parameters : null,
};
if (includeSites) {
curatedNetwork.sites = await fetchSites({ network_id: network.id });
}
if (includeSites) {
curatedNetwork.sites = await fetchSites({ network_id: network.id });
}
if (includeStudios) {
const studios = await knex('studios').where({ network_id: network.id });
if (includeStudios) {
const studios = await knex('studios').where({ network_id: network.id });
curatedNetwork.studios = studios.map(studio => ({
id: studio.id,
name: studio.name,
url: studio.url,
description: studio.description,
slug: studio.slug,
}));
}
curatedNetwork.studios = studios.map(studio => ({
id: studio.id,
name: studio.name,
url: studio.url,
description: studio.description,
slug: studio.slug,
}));
}
return curatedNetwork;
return curatedNetwork;
}
function curateNetworks(releases) {
return Promise.all(releases.map(async release => curateNetwork(release)));
return Promise.all(releases.map(async release => curateNetwork(release)));
}
async function findNetworkByUrl(url) {
const { hostname } = new URL(url);
const domain = hostname.replace(/^www./, '');
const { hostname } = new URL(url);
const domain = hostname.replace(/^www./, '');
const network = await knex('networks')
.where('networks.url', 'like', `%${domain}`)
.orWhere('networks.url', url)
.first();
const network = await knex('networks')
.where('networks.url', 'like', `%${domain}`)
.orWhere('networks.url', url)
.first();
if (network) {
return curateNetwork(network, true);
}
if (network) {
return curateNetwork(network, true);
}
return null;
return null;
}
async function fetchNetworks(queryObject) {
const releases = await knex('networks')
.where(builder => whereOr(queryObject, 'networks', builder))
.limit(100);
const releases = await knex('networks')
.where(builder => whereOr(queryObject, 'networks', builder))
.limit(100);
return curateNetworks(releases);
return curateNetworks(releases);
}
async function fetchNetworksFromReleases() {
const releases = await knex('releases')
.select('site_id', '')
.leftJoin('sites', 'sites.id', 'releases.site_id')
.leftJoin('networks', 'networks.id', 'sites.network_id')
.groupBy('networks.id')
.limit(100);
const releases = await knex('releases')
.select('site_id', '')
.leftJoin('sites', 'sites.id', 'releases.site_id')
.leftJoin('networks', 'networks.id', 'sites.network_id')
.groupBy('networks.id')
.limit(100);
return curateNetworks(releases);
return curateNetworks(releases);
}
module.exports = {
curateNetwork,
curateNetworks,
fetchNetworks,
fetchNetworksFromReleases,
findNetworkByUrl,
curateNetwork,
curateNetworks,
fetchNetworks,
fetchNetworksFromReleases,
findNetworkByUrl,
};

View File

@@ -11,356 +11,356 @@ const whereOr = require('./utils/where-or');
const { associateTags } = require('./tags');
const { associateActors, scrapeBasicActors } = require('./actors');
const {
pluckItems,
storeMedia,
associateMedia,
pluckItems,
storeMedia,
associateMedia,
} = require('./media');
const { fetchSites } = require('./sites');
const slugify = require('./utils/slugify');
const capitalize = require('./utils/capitalize');
function commonQuery(queryBuilder, {
filter = [],
after = new Date(0), // January 1970
before = new Date(2 ** 44), // May 2109
limit = 100,
filter = [],
after = new Date(0), // January 1970
before = new Date(2 ** 44), // May 2109
limit = 100,
}) {
const finalFilter = [].concat(filter); // ensure filter is array
const finalFilter = [].concat(filter); // ensure filter is array
queryBuilder
.leftJoin('sites', 'releases.site_id', 'sites.id')
.leftJoin('studios', 'releases.studio_id', 'studios.id')
.leftJoin('networks', 'sites.network_id', 'networks.id')
.select(
'releases.*',
'sites.name as site_name', 'sites.slug as site_slug', 'sites.url as site_url', 'sites.network_id', 'sites.parameters as site_parameters',
'studios.name as studio_name', 'sites.slug as site_slug', 'studios.url as studio_url',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description',
)
.whereNotExists((builder) => {
// apply tag filters
builder
.select('*')
.from('tags_associated')
.leftJoin('tags', 'tags_associated.tag_id', 'tags.id')
.whereIn('tags.slug', finalFilter)
.where('tags_associated.domain', 'releases')
.whereRaw('tags_associated.target_id = releases.id');
})
.andWhere('releases.date', '>', after)
.andWhere('releases.date', '<=', before)
.orderBy([{ column: 'date', order: 'desc' }, { column: 'created_at', order: 'desc' }])
.limit(limit);
queryBuilder
.leftJoin('sites', 'releases.site_id', 'sites.id')
.leftJoin('studios', 'releases.studio_id', 'studios.id')
.leftJoin('networks', 'sites.network_id', 'networks.id')
.select(
'releases.*',
'sites.name as site_name', 'sites.slug as site_slug', 'sites.url as site_url', 'sites.network_id', 'sites.parameters as site_parameters',
'studios.name as studio_name', 'sites.slug as site_slug', 'studios.url as studio_url',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description',
)
.whereNotExists((builder) => {
// apply tag filters
builder
.select('*')
.from('tags_associated')
.leftJoin('tags', 'tags_associated.tag_id', 'tags.id')
.whereIn('tags.slug', finalFilter)
.where('tags_associated.domain', 'releases')
.whereRaw('tags_associated.target_id = releases.id');
})
.andWhere('releases.date', '>', after)
.andWhere('releases.date', '<=', before)
.orderBy([{ column: 'date', order: 'desc' }, { column: 'created_at', order: 'desc' }])
.limit(limit);
}
async function curateRelease(release) {
const [actors, tags, media] = await Promise.all([
knex('actors_associated')
.select(
'actors.id', 'actors.name', 'actors.gender', 'actors.slug', 'actors.birthdate',
'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'birth_countries.alias as birth_country_alias',
'media.thumbnail as avatar',
)
.where({ release_id: release.id })
.leftJoin('actors', 'actors.id', 'actors_associated.actor_id')
.leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2')
.leftJoin('media', (builder) => {
builder
.on('media.target_id', 'actors.id')
.andOnVal('media.domain', 'actors')
.andOnVal('media.index', '0');
})
.orderBy('actors.gender'),
knex('tags_associated')
.select('tags.name', 'tags.slug')
.where({
domain: 'releases',
target_id: release.id,
})
.leftJoin('tags', 'tags.id', 'tags_associated.tag_id')
.orderBy('tags.priority', 'desc'),
knex('media')
.where({
target_id: release.id,
domain: 'releases',
})
.orderBy(['role', 'index']),
]);
const [actors, tags, media] = await Promise.all([
knex('actors_associated')
.select(
'actors.id', 'actors.name', 'actors.gender', 'actors.slug', 'actors.birthdate',
'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'birth_countries.alias as birth_country_alias',
'media.thumbnail as avatar',
)
.where({ release_id: release.id })
.leftJoin('actors', 'actors.id', 'actors_associated.actor_id')
.leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2')
.leftJoin('media', (builder) => {
builder
.on('media.target_id', 'actors.id')
.andOnVal('media.domain', 'actors')
.andOnVal('media.index', '0');
})
.orderBy('actors.gender'),
knex('tags_associated')
.select('tags.name', 'tags.slug')
.where({
domain: 'releases',
target_id: release.id,
})
.leftJoin('tags', 'tags.id', 'tags_associated.tag_id')
.orderBy('tags.priority', 'desc'),
knex('media')
.where({
target_id: release.id,
domain: 'releases',
})
.orderBy(['role', 'index']),
]);
const curatedRelease = {
id: release.id,
type: release.type,
title: release.title,
date: release.date,
dateAdded: release.created_at,
description: release.description,
url: release.url,
shootId: release.shoot_id,
entryId: release.entry_id,
actors: actors.map(actor => ({
id: actor.id,
slug: actor.slug,
name: actor.name,
gender: actor.gender,
birthdate: actor.birthdate,
age: moment().diff(actor.birthdate, 'years'),
ageThen: moment(release.date).diff(actor.birthdate, 'years'),
avatar: actor.avatar,
origin: actor.birth_country_alpha2
? {
country: {
name: actor.birth_country_alias,
alpha2: actor.birth_country_alpha2,
},
}
: null,
})),
director: release.director,
tags,
duration: release.duration,
photos: media.filter(item => item.role === 'photo'),
poster: media.filter(item => item.role === 'poster')[0],
covers: media.filter(item => item.role === 'cover'),
trailer: media.filter(item => item.role === 'trailer')[0],
site: {
id: release.site_id,
name: release.site_name,
independent: !!release.site_parameters?.independent,
slug: release.site_slug,
url: release.site_url,
},
studio: release.studio_id
? {
id: release.studio_id,
name: release.studio_name,
slug: release.studio_slug,
url: release.studio_url,
}
: null,
network: {
id: release.network_id,
name: release.network_name,
description: release.network_description,
slug: release.network_slug,
url: release.network_url,
},
};
const curatedRelease = {
id: release.id,
type: release.type,
title: release.title,
date: release.date,
dateAdded: release.created_at,
description: release.description,
url: release.url,
shootId: release.shoot_id,
entryId: release.entry_id,
actors: actors.map(actor => ({
id: actor.id,
slug: actor.slug,
name: actor.name,
gender: actor.gender,
birthdate: actor.birthdate,
age: moment().diff(actor.birthdate, 'years'),
ageThen: moment(release.date).diff(actor.birthdate, 'years'),
avatar: actor.avatar,
origin: actor.birth_country_alpha2
? {
country: {
name: actor.birth_country_alias,
alpha2: actor.birth_country_alpha2,
},
}
: null,
})),
director: release.director,
tags,
duration: release.duration,
photos: media.filter(item => item.role === 'photo'),
poster: media.filter(item => item.role === 'poster')[0],
covers: media.filter(item => item.role === 'cover'),
trailer: media.filter(item => item.role === 'trailer')[0],
site: {
id: release.site_id,
name: release.site_name,
independent: !!release.site_parameters?.independent,
slug: release.site_slug,
url: release.site_url,
},
studio: release.studio_id
? {
id: release.studio_id,
name: release.studio_name,
slug: release.studio_slug,
url: release.studio_url,
}
: null,
network: {
id: release.network_id,
name: release.network_name,
description: release.network_description,
slug: release.network_slug,
url: release.network_url,
},
};
return curatedRelease;
return curatedRelease;
}
function curateReleases(releases) {
return Promise.all(releases.map(async release => curateRelease(release)));
return Promise.all(releases.map(async release => curateRelease(release)));
}
async function attachChannelSite(release) {
if (!release.site?.isFallback && !release.channel?.force) {
return release;
}
if (!release.site?.isFallback && !release.channel?.force) {
return release;
}
if (!release.channel) {
throw new Error(`Unable to derive channel site from generic URL: ${release.url}`);
}
if (!release.channel) {
throw new Error(`Unable to derive channel site from generic URL: ${release.url}`);
}
const [site] = await fetchSites({
name: release.channel.name || release.channel,
slug: release.channel.slug || release.channel,
});
const [site] = await fetchSites({
name: release.channel.name || release.channel,
slug: release.channel.slug || release.channel,
});
if (site) {
return {
...release,
site,
};
}
if (site) {
return {
...release,
site,
};
}
throw new Error(`Unable to match channel '${release.channel.slug || release.channel}' from generic URL: ${release.url}`);
throw new Error(`Unable to match channel '${release.channel.slug || release.channel}' from generic URL: ${release.url}`);
}
async function attachStudio(release) {
if (!release.studio) {
return release;
}
if (!release.studio) {
return release;
}
const studio = await knex('studios')
.where('name', release.studio)
.orWhere('slug', release.studio)
.orWhere('url', release.studio)
.first();
const studio = await knex('studios')
.where('name', release.studio)
.orWhere('slug', release.studio)
.orWhere('url', release.studio)
.first();
return {
...release,
studio,
};
return {
...release,
studio,
};
}
async function curateReleaseEntry(release, batchId, existingRelease) {
const slug = slugify(release.title, {
encode: true,
limit: config.titleSlugLength,
});
const slug = slugify(release.title, {
encode: true,
limit: config.titleSlugLength,
});
const curatedRelease = {
site_id: release.site.id,
studio_id: release.studio ? release.studio.id : null,
shoot_id: release.shootId || null,
entry_id: release.entryId || null,
type: release.type,
url: release.url,
title: release.title,
slug,
date: release.date,
description: release.description,
// director: release.director,
duration: release.duration,
// likes: release.rating && release.rating.likes,
// dislikes: release.rating && release.rating.dislikes,
// rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
deep: typeof release.deep === 'boolean' ? release.deep : false,
deep_url: release.deepUrl,
updated_batch_id: batchId,
...(!existingRelease && { created_batch_id: batchId }),
};
const curatedRelease = {
site_id: release.site.id,
studio_id: release.studio ? release.studio.id : null,
shoot_id: release.shootId || null,
entry_id: release.entryId || null,
type: release.type,
url: release.url,
title: release.title,
slug,
date: release.date,
description: release.description,
// director: release.director,
duration: release.duration,
// likes: release.rating && release.rating.likes,
// dislikes: release.rating && release.rating.dislikes,
// rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
deep: typeof release.deep === 'boolean' ? release.deep : false,
deep_url: release.deepUrl,
updated_batch_id: batchId,
...(!existingRelease && { created_batch_id: batchId }),
};
return curatedRelease;
return curatedRelease;
}
async function fetchReleases(queryObject = {}, options = {}) {
const releases = await knex('releases')
.modify(commonQuery, options)
.andWhere(builder => whereOr(queryObject, 'releases', builder));
const releases = await knex('releases')
.modify(commonQuery, options)
.andWhere(builder => whereOr(queryObject, 'releases', builder));
return curateReleases(releases);
return curateReleases(releases);
}
async function fetchSiteReleases(queryObject, options = {}) {
const releases = await knex('releases')
.modify(commonQuery, options)
.where(builder => whereOr(queryObject, 'sites', builder));
const releases = await knex('releases')
.modify(commonQuery, options)
.where(builder => whereOr(queryObject, 'sites', builder));
return curateReleases(releases);
return curateReleases(releases);
}
async function fetchNetworkReleases(queryObject, options = {}) {
const releases = await knex('releases')
.modify(commonQuery, options)
.where(builder => whereOr(queryObject, 'networks', builder));
const releases = await knex('releases')
.modify(commonQuery, options)
.where(builder => whereOr(queryObject, 'networks', builder));
return curateReleases(releases);
return curateReleases(releases);
}
async function fetchActorReleases(queryObject, options = {}) {
const releases = await knex('actors_associated')
.leftJoin('releases', 'actors_associated.release_id', 'releases.id')
.leftJoin('actors', 'actors_associated.actor_id', 'actors.id')
.select(
'actors.name as actor_name',
)
.modify(commonQuery, options)
.where(builder => whereOr(queryObject, 'actors', builder));
const releases = await knex('actors_associated')
.leftJoin('releases', 'actors_associated.release_id', 'releases.id')
.leftJoin('actors', 'actors_associated.actor_id', 'actors.id')
.select(
'actors.name as actor_name',
)
.modify(commonQuery, options)
.where(builder => whereOr(queryObject, 'actors', builder));
return curateReleases(releases);
return curateReleases(releases);
}
async function fetchTagReleases(queryObject, options = {}) {
const releases = await knex('tags_associated')
.leftJoin('releases', 'tags_associated.target_id', 'releases.id')
.leftJoin('tags', 'tags_associated.tag_id', 'tags.id')
.select(
'tags.name as tag_name',
)
.modify(commonQuery, options)
.where('tags_associated.domain', 'releases')
.where(builder => whereOr(queryObject, 'tags', builder));
const releases = await knex('tags_associated')
.leftJoin('releases', 'tags_associated.target_id', 'releases.id')
.leftJoin('tags', 'tags_associated.tag_id', 'tags.id')
.select(
'tags.name as tag_name',
)
.modify(commonQuery, options)
.where('tags_associated.domain', 'releases')
.where(builder => whereOr(queryObject, 'tags', builder));
return curateReleases(releases);
return curateReleases(releases);
}
function accumulateActors(releases) {
return releases.reduce((acc, release) => {
if (!Array.isArray(release.actors)) return acc;
return releases.reduce((acc, release) => {
if (!Array.isArray(release.actors)) return acc;
release.actors.forEach((actor) => {
const actorName = actor.name ? actor.name.trim() : actor.trim();
const actorSlug = slugify(actorName);
release.actors.forEach((actor) => {
const actorName = actor.name ? actor.name.trim() : actor.trim();
const actorSlug = slugify(actorName);
if (!actorSlug) return;
if (!actorSlug) return;
if (!acc[actorSlug]) {
acc[actorSlug] = {
name: actorName,
slug: actorSlug,
releaseIds: new Set(),
avatars: [],
};
}
if (!acc[actorSlug]) {
acc[actorSlug] = {
name: actorName,
slug: actorSlug,
releaseIds: new Set(),
avatars: [],
};
}
acc[actorSlug].releaseIds.add(release.id);
acc[actorSlug].releaseIds.add(release.id);
if (actor.name) acc[actorSlug] = { ...acc[actorSlug], ...actor }; // actor input contains profile info
if (actor.avatar) {
const avatar = Array.isArray(actor.avatar)
? actor.avatar.map(avatarX => ({
src: avatarX.src || avatarX,
copyright: avatarX.copyright === undefined ? capitalize(release.site?.network?.name) : avatarX.copyright,
}))
: {
src: actor.avatar.src || actor.avatar,
copyright: actor.avatar.copyright === undefined ? capitalize(release.site?.network?.name) : actor.avatar.copyright,
};
if (actor.name) acc[actorSlug] = { ...acc[actorSlug], ...actor }; // actor input contains profile info
if (actor.avatar) {
const avatar = Array.isArray(actor.avatar)
? actor.avatar.map(avatarX => ({
src: avatarX.src || avatarX,
copyright: avatarX.copyright === undefined ? capitalize(release.site?.network?.name) : avatarX.copyright,
}))
: {
src: actor.avatar.src || actor.avatar,
copyright: actor.avatar.copyright === undefined ? capitalize(release.site?.network?.name) : actor.avatar.copyright,
};
acc[actorSlug].avatars = acc[actorSlug].avatars.concat([avatar]); // don't flatten fallbacks
}
});
acc[actorSlug].avatars = acc[actorSlug].avatars.concat([avatar]); // don't flatten fallbacks
}
});
return acc;
}, {});
return acc;
}, {});
}
async function storeReleaseAssets(releases) {
if (!argv.media) {
return;
}
if (!argv.media) {
return;
}
const releasePostersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.poster] }), {});
const releaseCoversById = releases.reduce((acc, release) => ({ ...acc, [release.id]: release.covers }), {});
const releaseTrailersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.trailer] }), {});
const releaseTeasersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.teaser] }), {});
const releasePhotosById = releases.reduce((acc, release) => ({
...acc,
[release.id]: pluckItems(release.photos),
}), {});
const releasePostersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.poster] }), {});
const releaseCoversById = releases.reduce((acc, release) => ({ ...acc, [release.id]: release.covers }), {});
const releaseTrailersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.trailer] }), {});
const releaseTeasersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.teaser] }), {});
const releasePhotosById = releases.reduce((acc, release) => ({
...acc,
[release.id]: pluckItems(release.photos),
}), {});
if (argv.images && argv.posters) {
const posters = await storeMedia(Object.values(releasePostersById).flat(), 'release', 'poster');
if (posters) await associateMedia(releasePostersById, posters, 'release', 'poster');
}
if (argv.images && argv.posters) {
const posters = await storeMedia(Object.values(releasePostersById).flat(), 'release', 'poster');
if (posters) await associateMedia(releasePostersById, posters, 'release', 'poster');
}
if (argv.images && argv.covers) {
const covers = await storeMedia(Object.values(releaseCoversById).flat(), 'release', 'cover');
if (covers) await associateMedia(releaseCoversById, covers, 'release', 'cover');
}
if (argv.images && argv.covers) {
const covers = await storeMedia(Object.values(releaseCoversById).flat(), 'release', 'cover');
if (covers) await associateMedia(releaseCoversById, covers, 'release', 'cover');
}
if (argv.images && argv.photos) {
const photos = await storeMedia(Object.values(releasePhotosById).flat(), 'release', 'photo');
if (photos) await associateMedia(releasePhotosById, photos, 'release', 'photo');
}
if (argv.images && argv.photos) {
const photos = await storeMedia(Object.values(releasePhotosById).flat(), 'release', 'photo');
if (photos) await associateMedia(releasePhotosById, photos, 'release', 'photo');
}
if (argv.videos && argv.trailers) {
const trailers = await storeMedia(Object.values(releaseTrailersById).flat(), 'release', 'trailer');
if (trailers) await associateMedia(releaseTrailersById, trailers, 'release', 'trailer');
}
if (argv.videos && argv.trailers) {
const trailers = await storeMedia(Object.values(releaseTrailersById).flat(), 'release', 'trailer');
if (trailers) await associateMedia(releaseTrailersById, trailers, 'release', 'trailer');
}
if (argv.videos && argv.teasers) {
const teasers = await storeMedia(Object.values(releaseTeasersById).flat(), 'release', 'teaser');
if (teasers) await associateMedia(releaseTeasersById, teasers, 'release', 'teaser');
}
if (argv.videos && argv.teasers) {
const teasers = await storeMedia(Object.values(releaseTeasersById).flat(), 'release', 'teaser');
if (teasers) await associateMedia(releaseTeasersById, teasers, 'release', 'teaser');
}
}
async function updateReleasesSearch(releaseIds) {
logger.info(`Updating search documents for ${releaseIds ? releaseIds.length : 'all' } releases`);
logger.info(`Updating search documents for ${releaseIds ? releaseIds.length : 'all' } releases`);
const documents = await knex.raw(`
const documents = await knex.raw(`
SELECT
releases.id AS release_id,
TO_TSVECTOR(
@@ -391,117 +391,117 @@ async function updateReleasesSearch(releaseIds) {
GROUP BY releases.id, sites.name, sites.slug, sites.alias, sites.url, networks.name, networks.slug, networks.url;
`, releaseIds && [releaseIds]);
if (documents.rows?.length > 0) {
const query = knex('releases_search').insert(documents.rows).toString();
await knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
}
if (documents.rows?.length > 0) {
const query = knex('releases_search').insert(documents.rows).toString();
await knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
}
}
async function storeRelease(release, batchId) {
if (!release.site) {
throw new Error(`Missing site, unable to store "${release.title}" (${release.url})`);
}
if (!release.site) {
throw new Error(`Missing site, unable to store "${release.title}" (${release.url})`);
}
if (!release.entryId) {
logger.warn(`Missing entry ID, unable to store "${release.title}" (${release.url})`);
return null;
}
if (!release.entryId) {
logger.warn(`Missing entry ID, unable to store "${release.title}" (${release.url})`);
return null;
}
const existingRelease = await knex('releases')
.where({
entry_id: release.entryId,
site_id: release.site.id,
})
.first();
const existingRelease = await knex('releases')
.where({
entry_id: release.entryId,
site_id: release.site.id,
})
.first();
const curatedRelease = await curateReleaseEntry(release, batchId, existingRelease);
const curatedRelease = await curateReleaseEntry(release, batchId, existingRelease);
if (existingRelease && !argv.redownload) {
return existingRelease;
}
if (existingRelease && !argv.redownload) {
return existingRelease;
}
if (existingRelease && argv.redownload) {
const [updatedRelease] = await knex('releases')
.where('id', existingRelease.id)
.update({
...existingRelease,
...curatedRelease,
})
.returning('*');
if (existingRelease && argv.redownload) {
const [updatedRelease] = await knex('releases')
.where('id', existingRelease.id)
.update({
...existingRelease,
...curatedRelease,
})
.returning('*');
if (updatedRelease) {
await associateTags(release, updatedRelease.id);
logger.info(`Updated release "${release.title}" (${existingRelease.id}, ${release.site.name})`);
}
if (updatedRelease) {
await associateTags(release, updatedRelease.id);
logger.info(`Updated release "${release.title}" (${existingRelease.id}, ${release.site.name})`);
}
await associateTags(release, existingRelease.id);
await associateTags(release, existingRelease.id);
return existingRelease;
}
return existingRelease;
}
const [releaseEntry] = await knex('releases')
.insert(curatedRelease)
.returning('*');
const [releaseEntry] = await knex('releases')
.insert(curatedRelease)
.returning('*');
await associateTags(release, releaseEntry.id);
await associateTags(release, releaseEntry.id);
logger.info(`Stored release "${release.title}" (${releaseEntry.id}, ${release.site.name})`);
logger.info(`Stored release "${release.title}" (${releaseEntry.id}, ${release.site.name})`);
return releaseEntry;
return releaseEntry;
}
async function storeReleases(releases) {
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
const storedReleases = await Promise.map(releases, async (release) => {
try {
const releaseWithChannelSite = await attachChannelSite(release);
const releaseWithStudio = await attachStudio(releaseWithChannelSite);
const storedRelease = await storeRelease(releaseWithStudio, batchId);
const storedReleases = await Promise.map(releases, async (release) => {
try {
const releaseWithChannelSite = await attachChannelSite(release);
const releaseWithStudio = await attachStudio(releaseWithChannelSite);
const storedRelease = await storeRelease(releaseWithStudio, batchId);
return storedRelease && {
id: storedRelease.id,
slug: storedRelease.slug,
...releaseWithChannelSite,
};
} catch (error) {
logger.error(error);
return storedRelease && {
id: storedRelease.id,
slug: storedRelease.slug,
...releaseWithChannelSite,
};
} catch (error) {
logger.error(error);
return null;
}
}, {
concurrency: 10,
}).filter(Boolean);
return null;
}
}, {
concurrency: 10,
}).filter(Boolean);
logger.info(`Stored ${storedReleases.length} new releases`);
logger.info(`Stored ${storedReleases.length} new releases`);
const actors = accumulateActors(storedReleases);
const actors = accumulateActors(storedReleases);
await associateActors(actors, storedReleases);
await associateActors(actors, storedReleases);
await Promise.all([
// actors need to be stored before generating search
updateReleasesSearch(storedReleases.map(release => release.id)),
storeReleaseAssets(storedReleases),
]);
await Promise.all([
// actors need to be stored before generating search
updateReleasesSearch(storedReleases.map(release => release.id)),
storeReleaseAssets(storedReleases),
]);
if (argv.withProfiles && Object.keys(actors).length > 0) {
await scrapeBasicActors();
}
if (argv.withProfiles && Object.keys(actors).length > 0) {
await scrapeBasicActors();
}
return {
releases: storedReleases,
actors,
};
return {
releases: storedReleases,
actors,
};
}
module.exports = {
fetchReleases,
fetchActorReleases,
fetchSiteReleases,
fetchNetworkReleases,
fetchTagReleases,
storeRelease,
storeReleases,
updateReleasesSearch,
fetchReleases,
fetchActorReleases,
fetchSiteReleases,
fetchNetworkReleases,
fetchTagReleases,
storeRelease,
storeReleases,
updateReleasesSearch,
};

View File

@@ -3,18 +3,18 @@
const knex = require('./knex');
async function fetchReleases(limit = 100) {
const releases = await knex('releases').limit(limit);
const releases = await knex('releases').limit(limit);
return releases;
return releases;
}
async function searchReleases(query, limit = 100) {
const releases = await knex.raw('SELECT * FROM search_releases(?) LIMIT ?;', [query, limit]);
const releases = await knex.raw('SELECT * FROM search_releases(?) LIMIT ?;', [query, limit]);
return releases.rows;
return releases.rows;
}
module.exports = {
fetchReleases,
searchReleases,
fetchReleases,
searchReleases,
};

View File

@@ -1,199 +0,0 @@
'use strict';
const config = require('config');
const Promise = require('bluebird');
const logger = require('./logger')(__filename);
const argv = require('./argv');
const include = require('./utils/argv-include')(argv);
const knex = require('./knex');
const scrapers = require('./scrapers/scrapers');
const { findSiteByUrl } = require('./sites');
const { findNetworkByUrl } = require('./networks');
const { storeReleases } = require('./releases');
async function findSite(url, release) {
if (release?.site) return release.site;
if (!url) return null;
const site = await findSiteByUrl(url);
if (site) {
return site;
}
const network = await findNetworkByUrl(url);
if (network) {
return {
...network,
network,
isFallback: true,
};
}
return null;
}
async function scrapeRelease(source, basicRelease = null, type = 'scene', beforeFetchLatest) {
// profile scraper may return either URLs or pre-scraped scenes
const sourceIsUrlOrEmpty = typeof source === 'string' || source === undefined;
const url = sourceIsUrlOrEmpty ? source : source?.url;
const release = sourceIsUrlOrEmpty ? basicRelease : source;
const site = basicRelease?.site || await findSite(url, release);
if (!site) {
throw new Error(`Could not find site for ${url} in database`);
}
if (!argv.deep && release) {
return {
...release,
site,
};
}
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
if (!scraper) {
throw new Error(`Could not find scraper for ${url}`);
}
if ((type === 'scene' && !scraper.fetchScene) || (type === 'movie' && !scraper.fetchMovie)) {
if (release) {
logger.warn(`The '${site.name}'-scraper cannot fetch individual ${type}s`);
return null;
}
throw new Error(`The '${site.name}'-scraper cannot fetch individual ${type}s`);
}
if (!release) {
logger.info(`Scraping release from ${url}`);
}
const scrapedRelease = type === 'scene'
? await scraper.fetchScene(url, site, release, beforeFetchLatest, include)
: await scraper.fetchMovie(url, site, release, beforeFetchLatest, include);
return {
...release,
...scrapedRelease,
...(scrapedRelease && release?.tags && {
tags: release.tags.concat(scrapedRelease.tags),
}),
site,
};
}
async function accumulateMovies(releases) {
if (!argv.withMovies) return [];
const moviesByUrl = releases.reduce((acc, release) => {
if (!release.movie) return acc;
const movie = release.movie.url ? release.movie : { url: release.movie };
if (!acc[movie.url]) {
acc[movie.url] = {
...movie,
type: 'movie',
sceneIds: [],
};
}
acc[movie.url].sceneIds = acc[movie.url].sceneIds.concat(release.id);
return acc;
}, {});
const movies = await Promise.map(Object.values(moviesByUrl), async movie => scrapeRelease(movie, null, 'movie'));
const { releases: storedMovies } = await storeReleases(movies);
const movieAssociations = storedMovies.reduce((acc, movie) => acc.concat(movie.sceneIds.map(sceneId => ({
movie_id: movie.id,
scene_id: sceneId,
}))), []);
await knex('releases_movies').insert(movieAssociations);
// console.log(moviesByUrl);
return movies;
}
async function scrapeReleases(sources, type = 'scene') {
const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, null, type), {
concurrency: 5,
}).filter(Boolean);
const curatedReleases = scrapedReleases.map(scrapedRelease => ({ ...scrapedRelease, type }));
if ((argv.scene || argv.movie) && argv.inspect) {
// only show when fetching from URL
}
if (argv.save) {
const { releases: storedReleases } = await storeReleases(curatedReleases);
await accumulateMovies(storedReleases);
if (storedReleases) {
logger.info(storedReleases.map(storedRelease => `\nhttp://${config.web.host}:${config.web.port}/scene/${storedRelease.id}/${storedRelease.slug}`).join(''));
}
return storedReleases;
}
return curatedReleases;
}
async function scrapeScenes(sources) {
return scrapeReleases(sources, 'scene');
}
async function scrapeMovies(sources) {
return scrapeReleases(sources, 'movie');
}
async function deepFetchReleases(baseReleases, beforeFetchLatest) {
const deepReleases = await Promise.map(baseReleases, async (release) => {
if (release.url || (release.path && release.site)) {
try {
const fullRelease = await scrapeRelease(release.url, release, 'scene', beforeFetchLatest);
if (fullRelease) {
return {
...release,
...fullRelease,
deep: true,
};
}
logger.warn(`Release scraper returned empty result for ${release.url}`);
return release;
} catch (error) {
logger.error(`Failed to scrape ${release.url}: ${error}`);
return {
...release,
deep: false,
};
}
}
return release;
}, {
concurrency: 2,
});
return deepReleases;
}
module.exports = {
deepFetchReleases,
scrapeMovies,
scrapeRelease,
scrapeReleases,
scrapeScenes,
};

View File

@@ -1,184 +0,0 @@
'use strict';
const Promise = require('bluebird');
const moment = require('moment');
const argv = require('./argv');
const include = require('./utils/argv-include')(argv);
const logger = require('./logger')(__filename);
const knex = require('./knex');
const { fetchIncludedSites } = require('./sites');
const scrapers = require('./scrapers/scrapers');
const { deepFetchReleases } = require('./scrape-releases');
const { storeReleases } = require('./releases');
function getAfterDate() {
if (/\d{2,4}-\d{2}-\d{2,4}/.test(argv.after)) {
// using date
return moment
.utc(argv.after, ['YYYY-MM-DD', 'DD-MM-YYYY'])
.toDate();
}
// using time distance (e.g. "1 month")
return moment
.utc()
.subtract(...argv.after.split(' '))
.toDate();
}
async function findDuplicateReleaseIds(latestReleases, accReleases) {
const duplicateReleases = await knex('releases')
.whereIn('entry_id', latestReleases.map(({ entryId }) => entryId));
// include accumulated releases as duplicates to prevent an infinite
// loop when the next page contains the same releases as the previous
return new Set(duplicateReleases
.map(release => String(release.entry_id))
.concat(accReleases.map(release => String(release.entryId))));
}
async function scrapeUniqueReleases(scraper, site, beforeFetchLatest, accSiteReleases, afterDate = getAfterDate(), accReleases = [], page = argv.page) {
if (!argv.latest || !scraper.fetchLatest) {
return [];
}
const latestReleases = await scraper.fetchLatest(site, page, beforeFetchLatest, accSiteReleases, include);
if (!Array.isArray(latestReleases)) {
logger.warn(`Scraper returned ${latestReleases || 'null'} when fetching latest from '${site.name}' on '${site.network.name}'`);
return accReleases;
}
if (latestReleases.length === 0) {
return accReleases;
}
const latestReleasesWithSite = latestReleases.map(release => ({ ...release, site }));
const oldestReleaseOnPage = latestReleases.slice(-1)[0].date;
const duplicateReleaseIds = argv.redownload ? new Set() : await findDuplicateReleaseIds(latestReleases, accReleases);
const uniqueReleases = latestReleasesWithSite
.filter(release => !duplicateReleaseIds.has(String(release.entryId)) // release is already in database
&& (argv.last || !release.date || moment(release.date).isAfter(afterDate))); // release is older than specified date limit
logger.verbose(`${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases`);
if (
uniqueReleases.length > 0
// && (oldestReleaseOnPage || page < argv.pages)
&& ((oldestReleaseOnPage
? moment(oldestReleaseOnPage).isAfter(afterDate)
: accReleases.length + uniqueReleases.length <= argv.nullDateLimit)
|| (argv.last && accReleases.length + uniqueReleases.length < argv.last))
) {
// oldest release on page is newer that specified date range, or latest count has not yet been met, fetch next page
return scrapeUniqueReleases(scraper, site, beforeFetchLatest, accSiteReleases, afterDate, accReleases.concat(uniqueReleases), page + 1);
}
if (argv.last && uniqueReleases.length >= argv.last) {
return accReleases.concat(uniqueReleases).slice(0, argv.last);
}
if (oldestReleaseOnPage) {
return accReleases.concat(uniqueReleases);
}
return accReleases.concat(uniqueReleases).slice(0, argv.nullDateLimit);
}
async function scrapeUpcomingReleases(scraper, site, beforeFetchLatest) {
if (argv.upcoming && scraper.fetchUpcoming) {
const upcomingReleases = await scraper.fetchUpcoming(site, 1, beforeFetchLatest, include);
return upcomingReleases
? upcomingReleases.map(release => ({ ...release, site, upcoming: true }))
: [];
}
return [];
}
async function scrapeSiteReleases(scraper, site, accSiteReleases) {
const beforeFetchLatest = await scraper.beforeFetchLatest?.(site, accSiteReleases);
const [newReleases, upcomingReleases] = await Promise.all([
scrapeUniqueReleases(scraper, site, beforeFetchLatest, accSiteReleases), // fetch basic release info from scene overview
scrapeUpcomingReleases(scraper, site, beforeFetchLatest, accSiteReleases), // fetch basic release info from upcoming overview
]);
if (argv.upcoming) {
logger.info(`${site.name}: ${argv.latest ? `Found ${newReleases.length}` : 'Ignoring'} latest releases,${argv.upcoming ? ' ' : ' ignoring '}${upcomingReleases.length || '0'} upcoming releases`);
}
const baseReleases = [...newReleases, ...upcomingReleases];
if (argv.deep) {
// follow URL for every release
return deepFetchReleases(baseReleases, beforeFetchLatest);
}
return baseReleases;
}
async function scrapeSite(site, network, accSiteReleases = []) {
if (site.parameters?.ignore) {
logger.warn(`Ignoring ${network.name}: ${site.name}`);
return [];
}
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
if (!scraper) {
logger.warn(`No scraper found for '${site.name}' (${site.slug})`);
return [];
}
try {
const siteReleases = await scrapeSiteReleases(scraper, site, accSiteReleases);
return siteReleases.map(release => ({ ...release, site }));
} catch (error) {
logger.error(`${site.name}: Failed to scrape releases: ${error.message}`);
return [];
}
}
async function scrapeSites() {
const networks = await fetchIncludedSites();
const scrapedNetworks = await Promise.map(networks, async (network) => {
if (network.parameters?.sequential) {
logger.info(`Scraping '${network.name}' sequentially`);
return Promise.reduce(network.sites, async (acc, site) => {
const accSiteReleases = await acc;
const siteReleases = await scrapeSite(site, network, accSiteReleases);
return accSiteReleases.concat(siteReleases);
}, Promise.resolve([]));
}
return Promise.map(network.sites, async site => scrapeSite(site, network), {
concurrency: network.parameters?.concurrency || 2,
});
},
{
// 5 networks at a time
concurrency: 5,
});
const releases = scrapedNetworks.flat(2);
if (argv.inspect) {
console.log(releases);
}
if (argv.save) {
await storeReleases(releases);
}
}
module.exports = scrapeSites;

View File

@@ -3,8 +3,8 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
};

View File

@@ -3,8 +3,8 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
};

View File

@@ -3,8 +3,8 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
};

View File

@@ -3,37 +3,37 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
function curateRelease(release, site) {
if (['bubblegumdungeon', 'ladygonzo'].includes(site.slug)) {
return {
...release,
title: release.title.split(/:|\|/)[1].trim(),
};
}
if (['bubblegumdungeon', 'ladygonzo'].includes(site.slug)) {
return {
...release,
title: release.title.split(/:|\|/)[1].trim(),
};
}
return release;
return release;
}
async function networkFetchScene(url, site, release) {
const scene = await fetchScene(url, site, release);
const scene = await fetchScene(url, site, release);
return curateRelease(scene, site);
return curateRelease(scene, site);
}
async function fetchLatest(site, page = 1) {
const releases = await fetchApiLatest(site, page, false);
const releases = await fetchApiLatest(site, page, false);
return releases.map(release => curateRelease(release, site));
return releases.map(release => curateRelease(release, site));
}
async function fetchUpcoming(site, page = 1) {
const releases = await fetchApiUpcoming(site, page, false);
const releases = await fetchApiUpcoming(site, page, false);
return releases.map(release => curateRelease(release, site));
return releases.map(release => curateRelease(release, site));
}
module.exports = {
fetchLatest,
fetchProfile: fetchApiProfile,
fetchScene: networkFetchScene,
fetchUpcoming,
fetchLatest,
fetchProfile: fetchApiProfile,
fetchScene: networkFetchScene,
fetchUpcoming,
};

View File

@@ -3,47 +3,47 @@
const { fetchLatest, fetchScene } = require('./julesjordan');
function extractActors(scene) {
const release = scene;
const release = scene;
if (!scene.actors || scene.actors.length === 0) {
const introActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (\w+ \w+)/i);
const introTwoActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (?:(\w+)|(\w+ \w+)) and (\w+ \w+)/i);
const returnActorMatches = scene.title.match(/(?:(^\w+)|(\w+ \w+))(?:,| (?:return|visit|pov|give|suck|lick|milk|love|enjoy|service|is))/i);
const returnTwoActorMatches = scene.title.match(/(\w+ \w+) and (?:(\w+)|(\w+ \w+)) (?:return|visit|give|suck|lick|milk|love|enjoy|service|are)/i);
if (!scene.actors || scene.actors.length === 0) {
const introActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (\w+ \w+)/i);
const introTwoActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (?:(\w+)|(\w+ \w+)) and (\w+ \w+)/i);
const returnActorMatches = scene.title.match(/(?:(^\w+)|(\w+ \w+))(?:,| (?:return|visit|pov|give|suck|lick|milk|love|enjoy|service|is))/i);
const returnTwoActorMatches = scene.title.match(/(\w+ \w+) and (?:(\w+)|(\w+ \w+)) (?:return|visit|give|suck|lick|milk|love|enjoy|service|are)/i);
const rawActors = (introTwoActorMatches || introActorMatches || returnTwoActorMatches || returnActorMatches)?.slice(1);
const actors = rawActors?.filter((actor) => {
if (!actor) return false;
if (/swallow|\bcum|fuck|suck|give|giving|take|takes|taking|head|teen|babe|cute|beaut|naughty|teacher|nanny|adorable|brunette|blonde|bust|audition|from|\band\b|\bto\b/i.test(actor)) return false;
const rawActors = (introTwoActorMatches || introActorMatches || returnTwoActorMatches || returnActorMatches)?.slice(1);
const actors = rawActors?.filter((actor) => {
if (!actor) return false;
if (/swallow|\bcum|fuck|suck|give|giving|take|takes|taking|head|teen|babe|cute|beaut|naughty|teacher|nanny|adorable|brunette|blonde|bust|audition|from|\band\b|\bto\b/i.test(actor)) return false;
return true;
});
return true;
});
if (actors) {
release.actors = actors;
}
}
if (actors) {
release.actors = actors;
}
}
if (release.actors?.length > 1 || /threesome|threeway/.test(scene.title)) {
release.tags = scene.tags ? [...scene.tags, 'mff'] : ['mff'];
}
if (release.actors?.length > 1 || /threesome|threeway/.test(scene.title)) {
release.tags = scene.tags ? [...scene.tags, 'mff'] : ['mff'];
}
return release;
return release;
}
async function fetchLatestWrap(site, page = 1) {
const latest = await fetchLatest(site, page);
const latest = await fetchLatest(site, page);
return latest.map(scene => extractActors(scene));
return latest.map(scene => extractActors(scene));
}
async function fetchSceneWrap(url, site) {
const scene = await fetchScene(url, site);
const scene = await fetchScene(url, site);
return extractActors(scene);
return extractActors(scene);
}
module.exports = {
fetchLatest: fetchLatestWrap,
fetchScene: fetchSceneWrap,
fetchLatest: fetchLatestWrap,
fetchScene: fetchSceneWrap,
};

View File

@@ -3,7 +3,7 @@
const { get, geta, ctxa } = require('../utils/q');
function extractActors(actorString) {
return actorString
return actorString
?.replace(/.*:|\(.*\)|\d+(-|\s)year(-|\s)old|nurses?|tangled/ig, '') // remove Patient:, (date) and other nonsense
.split(/\band\b|\bvs\b|\/|,|&/ig)
.map(actor => actor.trim())
@@ -12,120 +12,120 @@ function extractActors(actorString) {
}
function matchActors(actorString, models) {
return models
.filter(model => new RegExp(model.name, 'i')
.test(actorString));
return models
.filter(model => new RegExp(model.name, 'i')
.test(actorString));
}
function scrapeLatest(scenes, site, models) {
return scenes.map(({ qu }) => {
const release = {};
return scenes.map(({ qu }) => {
const release = {};
const pathname = qu.url('a.itemimg').slice(1);
[release.entryId] = pathname.split('/').slice(-1);
release.url = `${site.url}${pathname}`;
const pathname = qu.url('a.itemimg').slice(1);
[release.entryId] = pathname.split('/').slice(-1);
release.url = `${site.url}${pathname}`;
release.title = qu.q('.itemimg img', 'alt') || qu.q('h4 a', true);
release.description = qu.q('.mas_longdescription', true);
release.date = qu.date('.movie_info2', 'MM/DD/YY', /\d{2}\/\d{2}\/\d{2}/);
release.title = qu.q('.itemimg img', 'alt') || qu.q('h4 a', true);
release.description = qu.q('.mas_longdescription', true);
release.date = qu.date('.movie_info2', 'MM/DD/YY', /\d{2}\/\d{2}\/\d{2}/);
const actorString = qu.q('.mas_description', true);
const actors = matchActors(actorString, models);
if (actors.length > 0) release.actors = actors;
else release.actors = extractActors(actorString);
const actorString = qu.q('.mas_description', true);
const actors = matchActors(actorString, models);
if (actors.length > 0) release.actors = actors;
else release.actors = extractActors(actorString);
const posterPath = qu.img('.itemimg img');
release.poster = `${site.url}/${posterPath}`;
const posterPath = qu.img('.itemimg img');
release.poster = `${site.url}/${posterPath}`;
return release;
});
return release;
});
}
function scrapeScene({ html, qu }, url, site, models) {
const release = { url };
const release = { url };
[release.entryId] = url.split('/').slice(-1);
release.title = qu.q('.mas_title', true);
release.description = qu.q('.mas_longdescription', true);
release.date = qu.date('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
[release.entryId] = url.split('/').slice(-1);
release.title = qu.q('.mas_title', true);
release.description = qu.q('.mas_longdescription', true);
release.date = qu.date('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
const actorString = qu.q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, '');
const actors = matchActors(actorString, models);
if (actors.length > 0) release.actors = actors;
else release.actors = extractActors(actorString);
const actorString = qu.q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, '');
const actors = matchActors(actorString, models);
if (actors.length > 0) release.actors = actors;
else release.actors = extractActors(actorString);
release.tags = qu.all('.tags a', true);
release.tags = qu.all('.tags a', true);
release.photos = qu.imgs('.stills img').map(photoPath => `${site.url}/${photoPath}`);
release.photos = qu.imgs('.stills img').map(photoPath => `${site.url}/${photoPath}`);
const posterIndex = 'splash:';
const poster = html.slice(html.indexOf('faceimages/', posterIndex), html.indexOf('.jpg', posterIndex) + 4);
if (poster) release.poster = `${site.url}/${poster}`;
const posterIndex = 'splash:';
const poster = html.slice(html.indexOf('faceimages/', posterIndex), html.indexOf('.jpg', posterIndex) + 4);
if (poster) release.poster = `${site.url}/${poster}`;
const trailerIndex = html.indexOf('video/mp4');
const trailer = html.slice(html.indexOf('/content', trailerIndex), html.indexOf('.mp4', trailerIndex) + 4);
if (trailer) release.trailer = { src: `${site.url}${trailer}` };
const trailerIndex = html.indexOf('video/mp4');
const trailer = html.slice(html.indexOf('/content', trailerIndex), html.indexOf('.mp4', trailerIndex) + 4);
if (trailer) release.trailer = { src: `${site.url}${trailer}` };
return release;
return release;
}
function extractModels({ el }, site) {
const models = ctxa(el, '.item');
const models = ctxa(el, '.item');
return models.map(({ qu }) => {
const actor = { gender: 'female' };
return models.map(({ qu }) => {
const actor = { gender: 'female' };
const avatar = qu.q('.itemimg img');
actor.avatar = `${site.url}/${avatar.src}`;
actor.name = avatar.alt
.split(':').slice(-1)[0]
.replace(/xtreme girl|nurse/ig, '')
.trim();
const avatar = qu.q('.itemimg img');
actor.avatar = `${site.url}/${avatar.src}`;
actor.name = avatar.alt
.split(':').slice(-1)[0]
.replace(/xtreme girl|nurse/ig, '')
.trim();
const actorPath = qu.url('.itemimg');
actor.url = `${site.url}${actorPath.slice(1)}`;
const actorPath = qu.url('.itemimg');
actor.url = `${site.url}${actorPath.slice(1)}`;
return actor;
});
return actor;
});
}
async function fetchModels(site, page = 1, accModels = []) {
const url = `${site.url}/?models/${page}`;
const res = await get(url);
const url = `${site.url}/?models/${page}`;
const res = await get(url);
if (res.ok) {
const models = extractModels(res.item, site);
const nextPage = res.item.qa('.pagenumbers', true)
.map(pageX => Number(pageX))
.filter(Boolean) // remove << and >>
.includes(page + 1);
if (res.ok) {
const models = extractModels(res.item, site);
const nextPage = res.item.qa('.pagenumbers', true)
.map(pageX => Number(pageX))
.filter(Boolean) // remove << and >>
.includes(page + 1);
if (nextPage) {
return fetchModels(site, page + 1, accModels.concat(models));
}
if (nextPage) {
return fetchModels(site, page + 1, accModels.concat(models));
}
return accModels.concat(models, { name: 'Dr. Gray' });
}
return accModels.concat(models, { name: 'Dr. Gray' });
}
return [];
return [];
}
async function fetchLatest(site, page = 1, models) {
const url = `${site.url}/show.php?a=${site.parameters.a}_${page}`;
const res = await geta(url, '.item');
const url = `${site.url}/show.php?a=${site.parameters.a}_${page}`;
const res = await geta(url, '.item');
return res.ok ? scrapeLatest(res.items, site, models) : res.status;
return res.ok ? scrapeLatest(res.items, site, models) : res.status;
}
async function fetchScene(url, site, release, beforeFetchLatest) {
const models = beforeFetchLatest || await fetchModels(site);
const res = await get(url);
const models = beforeFetchLatest || await fetchModels(site);
const res = await get(url);
return res.ok ? scrapeScene(res.item, url, site, models) : res.status;
return res.ok ? scrapeScene(res.item, url, site, models) : res.status;
}
module.exports = {
fetchLatest,
fetchScene,
beforeFetchLatest: fetchModels,
fetchLatest,
fetchScene,
beforeFetchLatest: fetchModels,
};

View File

@@ -5,141 +5,141 @@ const { get, getAll, initAll, extractDate } = require('../utils/qu');
const { feetInchesToCm } = require('../utils/convert');
function getFallbacks(source) {
return [
source.replace('-1x.jpg', '-4x.jpg'),
source.replace('-1x.jpg', '-3x.jpg'),
source.replace('-1x.jpg', '-2x.jpg'),
source,
];
return [
source.replace('-1x.jpg', '-4x.jpg'),
source.replace('-1x.jpg', '-3x.jpg'),
source.replace('-1x.jpg', '-2x.jpg'),
source,
];
}
function scrapeAll(scenes, site) {
return scenes.map(({ qu }) => {
const release = {};
return scenes.map(({ qu }) => {
const release = {};
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
release.url = qu.url('a');
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
release.url = qu.url('a');
release.title = qu.q('h5 a', true);
release.date = qu.date('.icon-calendar + strong', 'MM/DD/YYYY');
release.title = qu.q('h5 a', true);
release.date = qu.date('.icon-calendar + strong', 'MM/DD/YYYY');
release.actors = qu.q('h3', true).replace(/featuring:\s?/i, '').split(', ');
release.actors = qu.q('h3', true).replace(/featuring:\s?/i, '').split(', ');
const photoCount = qu.q('.stdimage', 'cnt');
[release.poster, ...release.photos] = Array.from({ length: Number(photoCount) }, (value, index) => {
const source = qu.img('.stdimage', `src${index}_1x`, site.url);
const photoCount = qu.q('.stdimage', 'cnt');
[release.poster, ...release.photos] = Array.from({ length: Number(photoCount) }, (value, index) => {
const source = qu.img('.stdimage', `src${index}_1x`, site.url);
return getFallbacks(source);
});
return getFallbacks(source);
});
return release;
});
return release;
});
}
function scrapeScene({ html, qu }, url) {
const release = { url };
const release = { url };
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
release.title = qu.q('h2', true);
release.description = qu.q('p', true);
release.title = qu.q('h2', true);
release.description = qu.q('p', true);
release.date = extractDate(html, 'MM/DD/YYYY', /\b\d{2}\/\d{2}\/\d{4}\b/);
release.date = extractDate(html, 'MM/DD/YYYY', /\b\d{2}\/\d{2}\/\d{4}\b/);
release.actors = qu.all('h5:not(.video_categories) a').map(actor => ({
name: qu.q(actor, null, true),
url: qu.url(actor, null),
}));
release.actors = qu.all('h5:not(.video_categories) a').map(actor => ({
name: qu.q(actor, null, true),
url: qu.url(actor, null),
}));
release.tags = qu.all('.video_categories a', true);
release.tags = qu.all('.video_categories a', true);
release.duration = qu.dur('.video_categories + p');
release.duration = qu.dur('.video_categories + p');
const poster = qu.img('a img');
const poster = qu.img('a img');
release.poster = getFallbacks(poster);
release.photos = qu.imgs('.featured-video img', 'src0_1x').map(source => getFallbacks(source));
release.poster = getFallbacks(poster);
release.photos = qu.imgs('.featured-video img', 'src0_1x').map(source => getFallbacks(source));
return release;
return release;
}
function scrapeProfile({ el, qu }) {
const profile = {};
const profile = {};
const bio = Array.from(qu.q('.widget-content').childNodes).reduce((acc, node, index, nodes) => {
const nextNode = nodes[index + 1];
const bio = Array.from(qu.q('.widget-content').childNodes).reduce((acc, node, index, nodes) => {
const nextNode = nodes[index + 1];
if (node.tagName === 'STRONG' && nextNode?.nodeType === 3) {
acc[slugify(node.textContent, '_')] = nextNode.textContent.trim();
}
if (node.tagName === 'STRONG' && nextNode?.nodeType === 3) {
acc[slugify(node.textContent, '_')] = nextNode.textContent.trim();
}
return acc;
}, {});
return acc;
}, {});
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
if (bio.age) profile.age = Number(bio.age);
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
if (bio.age) profile.age = Number(bio.age);
if (bio.height && /\d{3}/.test(bio.height)) profile.height = Number(bio.height.match(/\d+/)[0]);
if (bio.height && /\d[;']\d/.test(bio.height)) profile.height = feetInchesToCm(bio.height);
if (bio.height && /\d{3}/.test(bio.height)) profile.height = Number(bio.height.match(/\d+/)[0]);
if (bio.height && /\d[;']\d/.test(bio.height)) profile.height = feetInchesToCm(bio.height);
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust && /\d+[a-zA-Z]+/.test(bust)) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bust && /\d+[a-zA-Z]+/.test(bust)) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bio.bust_size && !profile.bust) profile.bust = bio.bust_size.toUpperCase();
if (bio.bust_size && !profile.bust) profile.bust = bio.bust_size.toUpperCase();
if (bio.birth_location) profile.birthPlace = bio.birth_location;
if (bio.status_married_or_single) profile.relationship = bio.status_married_or_single;
if (bio.birth_location) profile.birthPlace = bio.birth_location;
if (bio.status_married_or_single) profile.relationship = bio.status_married_or_single;
if (bio.eye_color) profile.eyes = bio.eye_color;
if (bio.eye_color) profile.eyes = bio.eye_color;
const avatar = qu.img('.tac img');
profile.avatar = getFallbacks(avatar);
const avatar = qu.img('.tac img');
profile.avatar = getFallbacks(avatar);
profile.releases = scrapeAll(initAll(el, '.featured-video'));
profile.releases = scrapeAll(initAll(el, '.featured-video'));
return profile;
return profile;
}
async function fetchLatest(site, page) {
const url = `${site.url}/tour/categories/movies_${page}_d.html`;
const res = await getAll(url, '.featured-video');
const url = `${site.url}/tour/categories/movies_${page}_d.html`;
const res = await getAll(url, '.featured-video');
if (res.ok) {
return scrapeAll(res.items, site);
}
if (res.ok) {
return scrapeAll(res.items, site);
}
return res.status;
return res.status;
}
async function fetchScene(url, site) {
const res = await get(url, '.page-content .row');
const res = await get(url, '.page-content .row');
if (res.ok) {
return scrapeScene(res.item, url, site);
}
if (res.ok) {
return scrapeScene(res.item, url, site);
}
return res.status;
return res.status;
}
async function fetchProfile(actorName, scraperSlug, site) {
const actorSlug = slugify(actorName, '');
const url = `${site.url}/tour/models/${actorSlug}.html`;
const res = await get(url, '.page-content .row');
const actorSlug = slugify(actorName, '');
const url = `${site.url}/tour/models/${actorSlug}.html`;
const res = await get(url, '.page-content .row');
if (res.ok) {
return scrapeProfile(res.item);
}
if (res.ok) {
return scrapeProfile(res.item);
}
return res.status;
return res.status;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
fetchLatest,
fetchProfile,
fetchScene,
};

View File

@@ -3,11 +3,11 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'babes');
return fetchProfile(actorName, 'babes');
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
};

View File

@@ -6,144 +6,144 @@ const slugify = require('../utils/slugify');
const { feetInchesToCm } = require('../utils/convert');
function scrapeAll(scenes, site) {
return scenes.map(({ qu }) => {
const release = {};
return scenes.map(({ qu }) => {
const release = {};
release.title = qu.q('h3 a', true);
release.url = qu.url('h3 a');
release.title = qu.q('h3 a', true);
release.url = qu.url('h3 a');
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = qu.dur('.item-meta li:nth-child(2)');
release.description = qu.q('.description', true);
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = qu.dur('.item-meta li:nth-child(2)');
release.description = qu.q('.description', true);
release.actors = qu.all('a[href*="/models"]', true);
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
release.actors = qu.all('a[href*="/models"]', true);
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
[release.poster, ...release.photos] = qu.all('.item-thumbs img')
.map(source => [
source.getAttribute('src0_3x'),
source.getAttribute('src0_2x'),
source.getAttribute('src0_1x'),
]
.filter(Boolean)
.map(fallback => (/^http/.test(fallback) ? fallback : `${site.url}${fallback}`)));
[release.poster, ...release.photos] = qu.all('.item-thumbs img')
.map(source => [
source.getAttribute('src0_3x'),
source.getAttribute('src0_2x'),
source.getAttribute('src0_1x'),
]
.filter(Boolean)
.map(fallback => (/^http/.test(fallback) ? fallback : `${site.url}${fallback}`)));
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
return release;
});
return release;
});
}
function scrapeScene({ html, qu }, url, site) {
const release = { url };
const release = { url };
release.title = qu.q('.item-episode h4 a', true);
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = qu.dur('.item-meta li:nth-child(2)');
release.description = qu.q('.description', true);
release.title = qu.q('.item-episode h4 a', true);
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = qu.dur('.item-meta li:nth-child(2)');
release.description = qu.q('.description', true);
release.actors = qu.all('.item-episode a[href*="/models"]', true);
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
release.actors = qu.all('.item-episode a[href*="/models"]', true);
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
const posterPath = html.match(/poster="(.*.jpg)"/)?.[1];
const trailerPath = html.match(/video src="(.*.mp4)"/)?.[1];
const posterPath = html.match(/poster="(.*.jpg)"/)?.[1];
const trailerPath = html.match(/video src="(.*.mp4)"/)?.[1];
if (posterPath) {
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
release.poster = [
poster.replace('-1x', '-3x'),
poster.replace('-1x', '-2x'),
poster,
];
}
if (posterPath) {
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
release.poster = [
poster.replace('-1x', '-3x'),
poster.replace('-1x', '-2x'),
poster,
];
}
if (trailerPath) {
const trailer = /^http/.test(trailerPath) ? trailerPath : `${site.url}${trailerPath}`;
release.trailer = { src: trailer };
}
if (trailerPath) {
const trailer = /^http/.test(trailerPath) ? trailerPath : `${site.url}${trailerPath}`;
release.trailer = { src: trailer };
}
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
return release;
return release;
}
async function fetchActorReleases(actorId, site, page = 1, accScenes = []) {
const url = `${site.url}/sets.php?id=${actorId}&page=${page}`;
const res = await get(url);
const url = `${site.url}/sets.php?id=${actorId}&page=${page}`;
const res = await get(url);
if (!res.ok) return [];
if (!res.ok) return [];
const quReleases = initAll(res.item.el, '.item-episode');
const releases = scrapeAll(quReleases, site);
const quReleases = initAll(res.item.el, '.item-episode');
const releases = scrapeAll(quReleases, site);
const nextPage = res.item.qu.q(`a[href*="page=${page + 1}"]`);
const nextPage = res.item.qu.q(`a[href*="page=${page + 1}"]`);
if (nextPage) {
return fetchActorReleases(actorId, site, page + 1, accScenes.concat(releases));
}
if (nextPage) {
return fetchActorReleases(actorId, site, page + 1, accScenes.concat(releases));
}
return accScenes.concat(releases);
return accScenes.concat(releases);
}
async function scrapeProfile({ qu }, site, withScenes) {
const profile = {};
const profile = {};
const bio = qu.all('.stats li', true).reduce((acc, row) => {
const [key, value] = row.split(':');
return { ...acc, [slugify(key, '_')]: value.trim() };
}, {});
const bio = qu.all('.stats li', true).reduce((acc, row) => {
const [key, value] = row.split(':');
return { ...acc, [slugify(key, '_')]: value.trim() };
}, {});
if (bio.height) profile.height = feetInchesToCm(bio.height);
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bio.height) profile.height = feetInchesToCm(bio.height);
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
profile.avatar = [
qu.q('.profile-pic img', 'src0_3x'),
qu.q('.profile-pic img', 'src0_2x'),
qu.q('.profile-pic img', 'src0_1x'),
].filter(Boolean).map(source => (/^http/.test(source) ? source : `${site.url}${source}`));
profile.avatar = [
qu.q('.profile-pic img', 'src0_3x'),
qu.q('.profile-pic img', 'src0_2x'),
qu.q('.profile-pic img', 'src0_1x'),
].filter(Boolean).map(source => (/^http/.test(source) ? source : `${site.url}${source}`));
if (withScenes) {
const actorId = qu.q('.profile-pic img', 'id')?.match(/set-target-(\d+)/)?.[1];
if (withScenes) {
const actorId = qu.q('.profile-pic img', 'id')?.match(/set-target-(\d+)/)?.[1];
if (actorId) {
profile.releases = await fetchActorReleases(actorId, site);
}
}
if (actorId) {
profile.releases = await fetchActorReleases(actorId, site);
}
}
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/categories/movies/${page}/latest/`;
const res = await geta(url, '.item-episode');
const url = `${site.url}/categories/movies/${page}/latest/`;
const res = await geta(url, '.item-episode');
return res.ok ? scrapeAll(res.items, site) : res.status;
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchScene(url, site) {
const res = await get(url);
const res = await get(url);
return res.ok ? scrapeScene(res.item, url, site) : res.status;
return res.ok ? scrapeScene(res.item, url, site) : res.status;
}
async function fetchProfile(actorName, scraperSlug, site, include) {
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName);
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName);
const resA = await get(`${site.url}/models/${actorSlugA}.html`);
const res = resA.ok ? resA : await get(`${site.url}/models/${actorSlugB}.html`);
const resA = await get(`${site.url}/models/${actorSlugA}.html`);
const res = resA.ok ? resA : await get(`${site.url}/models/${actorSlugB}.html`);
return res.ok ? scrapeProfile(res.item, site, include.scenes) : res.status;
return res.ok ? scrapeProfile(res.item, site, include.scenes) : res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -8,99 +8,99 @@ const clusterId = '617fb597b659459bafe6472470d9073a';
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
const genderMap = {
M: 'male',
F: 'female',
M: 'male',
F: 'female',
};
function getScreenUrl(item, scene) {
return `https://i.bang.com/screenshots/${scene.dvd.id}/movie/${scene.order}/${item.screenId}.jpg`;
return `https://i.bang.com/screenshots/${scene.dvd.id}/movie/${scene.order}/${item.screenId}.jpg`;
}
function encodeId(id) {
return Buffer
.from(id, 'hex')
.toString('base64')
.replace(/\+/g, '-')
.replace(/\//g, '_')
.replace(/=/g, ',');
return Buffer
.from(id, 'hex')
.toString('base64')
.replace(/\+/g, '-')
.replace(/\//g, '_')
.replace(/=/g, ',');
}
function decodeId(id) {
const restoredId = id
.replace(/-/g, '+')
.replace(/_/g, '/')
.replace(/,/g, '=');
const restoredId = id
.replace(/-/g, '+')
.replace(/_/g, '/')
.replace(/,/g, '=');
return Buffer
.from(restoredId, 'base64')
.toString('hex');
return Buffer
.from(restoredId, 'base64')
.toString('hex');
}
function scrapeScene(scene, site) {
const release = {
site,
entryId: scene.id,
title: scene.name,
description: scene.description,
tags: scene.genres.concat(scene.actions).map(genre => genre.name),
duration: scene.duration,
};
const release = {
site,
entryId: scene.id,
title: scene.name,
description: scene.description,
tags: scene.genres.concat(scene.actions).map(genre => genre.name),
duration: scene.duration,
};
const slug = slugify(release.title);
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
const slug = slugify(release.title);
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
const date = new Date(scene.releaseDate);
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
const date = new Date(scene.releaseDate);
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
release.actors = scene.actors.map(actor => ({ name: actor.name, gender: genderMap[actor.gender] }));
release.actors = scene.actors.map(actor => ({ name: actor.name, gender: genderMap[actor.gender] }));
if (scene.is4k) release.tags.push('4k');
if (scene.gay) release.tags.push('gay');
if (scene.is4k) release.tags.push('4k');
if (scene.gay) release.tags.push('gay');
const defaultPoster = scene.screenshots.find(photo => photo.default === true);
const photoset = scene.screenshots.filter(photo => photo.default === false);
const defaultPoster = scene.screenshots.find(photo => photo.default === true);
const photoset = scene.screenshots.filter(photo => photo.default === false);
const photos = defaultPoster ? photoset : photoset.slice(1);
const poster = defaultPoster || photoset[0];
const photos = defaultPoster ? photoset : photoset.slice(1);
const poster = defaultPoster || photoset[0];
release.poster = getScreenUrl(poster, scene);
release.photos = photos.map(photo => getScreenUrl(photo, scene));
release.poster = getScreenUrl(poster, scene);
release.photos = photos.map(photo => getScreenUrl(photo, scene));
release.trailer = {
src: `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`,
};
release.trailer = {
src: `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`,
};
release.channel = scene.series.name
.replace(/[! .]/g, '')
.replace('&', 'and');
release.channel = scene.series.name
.replace(/[! .]/g, '')
.replace('&', 'and');
return release;
return release;
}
function scrapeLatest(scenes, site) {
return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
from: (page - 1) * 50,
query: {
bool: {
must: [
{
match: {
status: 'ok',
},
},
{
range: {
releaseDate: {
lte: 'now',
},
},
},
/*
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
from: (page - 1) * 50,
query: {
bool: {
must: [
{
match: {
status: 'ok',
},
},
{
range: {
releaseDate: {
lte: 'now',
},
},
},
/*
* global fetch
{
nested: {
@@ -122,66 +122,66 @@ async function fetchLatest(site, page = 1) {
},
},
*/
{
nested: {
path: 'series',
query: {
bool: {
must: [
{
match: {
'series.id': {
operator: 'AND',
query: site.parameters.siteId,
},
},
},
],
},
},
},
},
],
must_not: [
{
match: {
type: 'trailer',
},
},
],
},
},
sort: [
{
releaseDate: {
order: 'desc',
},
},
],
}, {
encodeJSON: true,
headers: {
Authorization: `Basic ${authKey}`,
},
});
{
nested: {
path: 'series',
query: {
bool: {
must: [
{
match: {
'series.id': {
operator: 'AND',
query: site.parameters.siteId,
},
},
},
],
},
},
},
},
],
must_not: [
{
match: {
type: 'trailer',
},
},
],
},
},
sort: [
{
releaseDate: {
order: 'desc',
},
},
],
}, {
encodeJSON: true,
headers: {
Authorization: `Basic ${authKey}`,
},
});
return scrapeLatest(res.body.hits.hits, site);
return scrapeLatest(res.body.hits.hits, site);
}
async function fetchScene(url, site) {
const encodedId = new URL(url).pathname.split('/')[2];
const entryId = decodeId(encodedId);
const encodedId = new URL(url).pathname.split('/')[2];
const entryId = decodeId(encodedId);
const res = await bhttp.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
headers: {
Authorization: `Basic ${authKey}`,
},
});
const res = await bhttp.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
headers: {
Authorization: `Basic ${authKey}`,
},
});
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -10,44 +10,44 @@ const slugify = require('../utils/slugify');
const { ex } = require('../utils/q');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.echThumb').toArray();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.echThumb').toArray();
return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('.thmb_lnk');
const title = sceneLinkElement.attr('title');
const url = `https://bangbros.com${sceneLinkElement.attr('href')}`;
const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
const entryId = url.split('/')[3].slice(5);
return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('.thmb_lnk');
const title = sceneLinkElement.attr('title');
const url = `https://bangbros.com${sceneLinkElement.attr('href')}`;
const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
const entryId = url.split('/')[3].slice(5);
const date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
const actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
const date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
const actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
const photoElement = $(element).find('.rollover-image');
const poster = `https:${photoElement.attr('data-original')}`;
const photoElement = $(element).find('.rollover-image');
const poster = `https:${photoElement.attr('data-original')}`;
const photosUrl = photoElement.attr('data-rollover-url');
const photosMaxIndex = photoElement.attr('data-rollover-max-index');
const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
const photosUrl = photoElement.attr('data-rollover-url');
const photosMaxIndex = photoElement.attr('data-rollover-max-index');
const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
const channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
const channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
return {
url,
entryId,
shootId,
title,
actors,
date,
duration,
poster,
photos,
rating: null,
site,
channel,
};
});
return {
url,
entryId,
shootId,
title,
actors,
date,
duration,
poster,
photos,
rating: null,
site,
channel,
};
});
}
/* no dates available, breaks database
@@ -80,63 +80,63 @@ function scrapeUpcoming(html, site) {
*/
function scrapeScene(html, url, _site) {
const { qu } = ex(html, '.playerSection');
const release = {};
const { qu } = ex(html, '.playerSection');
const release = {};
[release.shootId] = qu.q('.vdoTags + .vdoCast', true).match(/\w+$/);
[release.entryId] = url.split('/')[3].match(/\d+$/);
release.title = qu.q('.ps-vdoHdd h1', true);
release.description = qu.q('.vdoDesc', true);
[release.shootId] = qu.q('.vdoTags + .vdoCast', true).match(/\w+$/);
[release.entryId] = url.split('/')[3].match(/\d+$/);
release.title = qu.q('.ps-vdoHdd h1', true);
release.description = qu.q('.vdoDesc', true);
release.actors = qu.all('a[href*="/model"]', true);
release.tags = qu.all('.vdoTags a', true);
release.actors = qu.all('a[href*="/model"]', true);
release.tags = qu.all('.vdoTags a', true);
release.stars = Number(qu.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
release.stars = Number(qu.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
const poster = qu.img('img#player-overlay-image');
release.poster = [
poster,
poster.replace('/big_trailer', '/members/450x340'), // load error fallback
];
const poster = qu.img('img#player-overlay-image');
release.poster = [
poster,
poster.replace('/big_trailer', '/members/450x340'), // load error fallback
];
release.trailer = { src: qu.trailer() };
release.trailer = { src: qu.trailer() };
// all scenes seem to have 12 album photos available, not always included on the page
const firstPhotoUrl = ex(html).qu.img('img[data-slider-index="1"]');
release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
// all scenes seem to have 12 album photos available, not always included on the page
const firstPhotoUrl = ex(html).qu.img('img[data-slider-index="1"]');
release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/);
const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/);
if (channel === 'bangcasting') release.channel = 'bangbroscasting';
if (channel === 'remaster') release.channel = 'bangbrosremastered';
else release.channel = channel;
if (channel === 'bangcasting') release.channel = 'bangbroscasting';
if (channel === 'remaster') release.channel = 'bangbrosremastered';
else release.channel = channel;
return release;
return release;
}
function scrapeProfile(html) {
const { q } = ex(html);
const profile = {};
const { q } = ex(html);
const profile = {};
const avatar = q('.profilePic img', 'src');
if (avatar) profile.avatar = `https:${avatar}`;
const avatar = q('.profilePic img', 'src');
if (avatar) profile.avatar = `https:${avatar}`;
profile.releases = scrape(html);
profile.releases = scrape(html);
return profile;
return profile;
}
function scrapeProfileSearch(html, actorName) {
const { qu } = ex(html);
const actorLink = qu.url(`a[title="${actorName}" i][href*="model"]`);
const { qu } = ex(html);
const actorLink = qu.url(`a[title="${actorName}" i][href*="model"]`);
return actorLink ? `https://bangbros.com${actorLink}` : null;
return actorLink ? `https://bangbros.com${actorLink}` : null;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/${page}`);
const res = await bhttp.get(`${site.url}/${page}`);
return scrape(res.body.toString(), site);
return scrape(res.body.toString(), site);
}
/*
@@ -148,43 +148,43 @@ async function fetchUpcoming(site) {
*/
async function fetchScene(url, site, release) {
if (!release?.date) {
logger.warn(`Scraping Bang Bros scene from URL without release date: ${url}`);
}
if (!release?.date) {
logger.warn(`Scraping Bang Bros scene from URL without release date: ${url}`);
}
const { origin } = new URL(url);
const res = await bhttp.get(url);
const { origin } = new URL(url);
const res = await bhttp.get(url);
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
}
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
}
return scrapeScene(res.body.toString(), url, site);
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
const actorSlug = slugify(actorName);
const url = `https://bangbros.com/search/${actorSlug}`;
const res = await bhttp.get(url);
const actorSlug = slugify(actorName);
const url = `https://bangbros.com/search/${actorSlug}`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
const actorUrl = scrapeProfileSearch(res.body.toString(), actorName);
if (res.statusCode === 200) {
const actorUrl = scrapeProfileSearch(res.body.toString(), actorName);
if (actorUrl) {
const actorRes = await bhttp.get(actorUrl);
if (actorUrl) {
const actorRes = await bhttp.get(actorUrl);
if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString());
}
}
}
if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString());
}
}
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
// fetchUpcoming, no dates available
fetchLatest,
fetchScene,
fetchProfile,
// fetchUpcoming, no dates available
};

View File

@@ -5,33 +5,33 @@
const { fetchScene, fetchLatest, fetchUpcoming, fetchProfile } = require('./gamma');
async function fetchSceneWrapper(url, site, baseRelease) {
const release = await fetchScene(url, site, baseRelease);
const release = await fetchScene(url, site, baseRelease);
if (site.isFallback && release.channel) {
const channelUrl = url.replace('blowpass.com', `${release.channel}.com`);
if (site.isNetwork && release.channel) {
const channelUrl = url.replace('blowpass.com', `${release.channel}.com`);
if (['onlyteenblowjobs', 'mommyblowsbest'].includes(release.channel)) {
release.url = channelUrl.replace(/video\/\w+\//, 'scene/');
return release;
}
if (['onlyteenblowjobs', 'mommyblowsbest'].includes(release.channel)) {
release.url = channelUrl.replace(/video\/\w+\//, 'scene/');
return release;
}
release.url = channelUrl.replace(/video\/\w+\//, 'video/');
}
release.url = channelUrl.replace(/video\/\w+\//, 'video/');
}
return release;
return release;
}
function getActorReleasesUrl(actorPath, page = 1) {
return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`;
return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`;
}
async function networkFetchProfile(actorName, scraperSlug, site, include) {
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchUpcoming,
fetchScene: fetchSceneWrapper,
fetchLatest,
fetchProfile: networkFetchProfile,
fetchUpcoming,
fetchScene: fetchSceneWrapper,
};

View File

@@ -5,90 +5,90 @@ const bhttp = require('bhttp');
const { ex } = require('../utils/q');
function scrapeProfile(html) {
const { qu } = ex(html); /* eslint-disable-line object-curly-newline */
const profile = {};
const { qu } = ex(html); /* eslint-disable-line object-curly-newline */
const profile = {};
const bio = qu.all('.infobox tr[valign="top"]')
.map(detail => qu.all(detail, 'td', true))
.reduce((acc, [key, value]) => ({ ...acc, [key.slice(0, -1).replace(/[\s+|/]/g, '_')]: value }), {});
const bio = qu.all('.infobox tr[valign="top"]')
.map(detail => qu.all(detail, 'td', true))
.reduce((acc, [key, value]) => ({ ...acc, [key.slice(0, -1).replace(/[\s+|/]/g, '_')]: value }), {});
/* unreliable, see: Syren De Mer
/* unreliable, see: Syren De Mer
const catlinks = qa('#mw-normal-catlinks a', true);
const isTrans = catlinks.some(link => link.match(/shemale|transgender/i));
profile.gender = isTrans ? 'transsexual' : 'female';
*/
profile.birthdate = qu.date('.bday', 'YYYY-MM-DD');
profile.birthdate = qu.date('.bday', 'YYYY-MM-DD');
profile.description = qu.q('#mw-content-text > p', true);
profile.description = qu.q('#mw-content-text > p', true);
if (bio.Born) profile.birthPlace = bio.Born.slice(bio.Born.lastIndexOf(')') + 1);
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
if (bio.Born) profile.birthPlace = bio.Born.slice(bio.Born.lastIndexOf(')') + 1);
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
if (bio.Measurements) {
const measurements = bio.Measurements
.match(/\d+(\w+)?-\d+-\d+/g)
if (bio.Measurements) {
const measurements = bio.Measurements
.match(/\d+(\w+)?-\d+-\d+/g)
?.slice(-1)[0] // allow for both '34C-25-36' and '86-64-94 cm / 34-25-37 in'
.split('-');
// account for measuemrents being just e.g. '32EE'
if (measurements) {
const [bust, waist, hip] = measurements;
// account for measuemrents being just e.g. '32EE'
if (measurements) {
const [bust, waist, hip] = measurements;
if (/[a-zA-Z]/.test(bust)) profile.bust = bust; // only use bust if cup size is included
if (/[a-zA-Z]/.test(bust)) profile.bust = bust; // only use bust if cup size is included
profile.waist = Number(waist);
profile.hip = Number(hip);
}
profile.waist = Number(waist);
profile.hip = Number(hip);
}
if (/^\d+\w+$/.test(bio.Measurements)) profile.bust = bio.Measurements;
}
if (/^\d+\w+$/.test(bio.Measurements)) profile.bust = bio.Measurements;
}
if (bio.Bra_cup_size) {
const bust = bio.Bra_cup_size.match(/^\d+\w+/);
if (bust) [profile.bust] = bust;
}
if (bio.Bra_cup_size) {
const bust = bio.Bra_cup_size.match(/^\d+\w+/);
if (bust) [profile.bust] = bust;
}
if (bio.Boobs === 'Enhanced') profile.naturalBoobs = false;
if (bio.Boobs === 'Natural') profile.naturalBoobs = true;
if (bio.Boobs === 'Enhanced') profile.naturalBoobs = false;
if (bio.Boobs === 'Natural') profile.naturalBoobs = true;
if (bio.Height) profile.height = Number(bio.Height.match(/\d+\.\d+/g).slice(-1)[0]) * 100;
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\d+/g)[1]);
if (bio.Height) profile.height = Number(bio.Height.match(/\d+\.\d+/g).slice(-1)[0]) * 100;
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\d+/g)[1]);
if (bio.Eye_color) profile.eyes = bio.Eye_color;
if (bio.Hair) [profile.hair] = bio.Hair.split(',');
if (bio.Eye_color) profile.eyes = bio.Eye_color;
if (bio.Hair) [profile.hair] = bio.Hair.split(',');
if (bio.Blood_group) profile.blood = bio.Blood_group;
if (bio.Also_known_as) profile.aliases = bio.Also_known_as.split(', ');
if (bio.Blood_group) profile.blood = bio.Blood_group;
if (bio.Also_known_as) profile.aliases = bio.Also_known_as.split(', ');
const avatarThumbPath = qu.img('.image img');
const avatarThumbPath = qu.img('.image img');
if (avatarThumbPath && !/NoImageAvailable/.test(avatarThumbPath)) {
const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', '');
if (avatarThumbPath && !/NoImageAvailable/.test(avatarThumbPath)) {
const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', '');
profile.avatar = {
src: `http://www.boobpedia.com${avatarPath}`,
copyright: null,
};
}
profile.avatar = {
src: `http://www.boobpedia.com${avatarPath}`,
copyright: null,
};
}
profile.social = qu.urls('.infobox a.external');
profile.social = qu.urls('.infobox a.external');
return profile;
return profile;
}
async function fetchProfile(actorName) {
const actorSlug = actorName.replace(/\s+/, '_');
const res = await bhttp.get(`http://www.boobpedia.com/boobs/${actorSlug}`);
const actorSlug = actorName.replace(/\s+/, '_');
const res = await bhttp.get(`http://www.boobpedia.com/boobs/${actorSlug}`);
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString());
}
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString());
}
return null;
return null;
}
module.exports = {
fetchProfile,
fetchProfile,
};

View File

@@ -11,216 +11,216 @@ const slugify = require('../utils/slugify');
const { heightToCm, lbsToKg } = require('../utils/convert');
const hairMap = {
Blonde: 'blonde',
Brunette: 'brown',
'Black Hair': 'black',
Redhead: 'red',
Blonde: 'blonde',
Brunette: 'brown',
'Black Hair': 'black',
Redhead: 'red',
};
function scrapeAll(html, site, upcoming) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.release-card.scene').toArray();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.release-card.scene').toArray();
return sceneElements.reduce((acc, element) => {
const isUpcoming = $(element).find('.icon-upcoming.active').length === 1;
return sceneElements.reduce((acc, element) => {
const isUpcoming = $(element).find('.icon-upcoming.active').length === 1;
if ((upcoming && !isUpcoming) || (!upcoming && isUpcoming)) {
return acc;
}
if ((upcoming && !isUpcoming) || (!upcoming && isUpcoming)) {
return acc;
}
const sceneLinkElement = $(element).find('a');
const sceneLinkElement = $(element).find('a');
const url = `https://www.brazzers.com${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title');
const entryId = url.split('/').slice(-3, -2)[0];
const url = `https://www.brazzers.com${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title');
const entryId = url.split('/').slice(-3, -2)[0];
const date = moment.utc($(element).find('time').text(), 'MMMM DD, YYYY').toDate();
const actors = $(element).find('.model-names a').map((actorIndex, actorElement) => $(actorElement).attr('title')).toArray();
const date = moment.utc($(element).find('time').text(), 'MMMM DD, YYYY').toDate();
const actors = $(element).find('.model-names a').map((actorIndex, actorElement) => $(actorElement).attr('title')).toArray();
const likes = Number($(element).find('.label-rating .like-amount').text());
const dislikes = Number($(element).find('.label-rating .dislike-amount').text());
const likes = Number($(element).find('.label-rating .like-amount').text());
const dislikes = Number($(element).find('.label-rating .dislike-amount').text());
const poster = `https:${$(element).find('.card-main-img').attr('data-src')}`;
const photos = $(element).find('.card-overlay .image-under').map((photoIndex, photoElement) => `https:${$(photoElement).attr('data-src')}`).toArray();
const poster = `https:${$(element).find('.card-main-img').attr('data-src')}`;
const photos = $(element).find('.card-overlay .image-under').map((photoIndex, photoElement) => `https:${$(photoElement).attr('data-src')}`).toArray();
const channel = slugify($(element).find('.collection').attr('title'), '');
const channel = slugify($(element).find('.collection').attr('title'), '');
return acc.concat({
url,
entryId,
title,
actors,
date,
poster,
photos,
rating: {
likes,
dislikes,
},
channel,
site,
});
}, []);
return acc.concat({
url,
entryId,
title,
actors,
date,
poster,
photos,
rating: {
likes,
dislikes,
},
channel,
site,
});
}, []);
}
async function scrapeScene(html, url, _site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const release = {};
const $ = cheerio.load(html, { normalizeWhitespace: true });
const release = {};
const videoJson = $('script:contains("window.videoUiOptions")').html();
const videoString = videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('},') + 1);
const videoData = JSON.parse(videoString);
const videoJson = $('script:contains("window.videoUiOptions")').html();
const videoString = videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('},') + 1);
const videoData = JSON.parse(videoString);
[release.entryId] = url.split('/').slice(-3, -2);
release.title = $('.scene-title[itemprop="name"]').text();
[release.entryId] = url.split('/').slice(-3, -2);
release.title = $('.scene-title[itemprop="name"]').text();
release.description = $('#scene-description p[itemprop="description"]')
.contents()
.first()
.text()
.trim();
release.description = $('#scene-description p[itemprop="description"]')
.contents()
.first()
.text()
.trim();
release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
const actorsFromCards = $('.featured-model .card-image a').map((actorIndex, actorElement) => {
const avatar = `https:${$(actorElement).find('img').attr('data-src')}`;
const actorsFromCards = $('.featured-model .card-image a').map((actorIndex, actorElement) => {
const avatar = `https:${$(actorElement).find('img').attr('data-src')}`;
return {
name: $(actorElement).attr('title'),
avatar: [avatar.replace('medium.jpg', 'large.jpg'), avatar],
};
}).toArray();
return {
name: $(actorElement).attr('title'),
avatar: [avatar.replace('medium.jpg', 'large.jpg'), avatar],
};
}).toArray();
release.actors = actorsFromCards || $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.actors = actorsFromCards || $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.likes = Number($('.label-rating .like').text());
release.dislikes = Number($('.label-rating .dislike').text());
release.likes = Number($('.label-rating .like').text());
release.dislikes = Number($('.label-rating .dislike').text());
const siteElement = $('.niche-site-logo');
// const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
const siteName = siteElement.attr('title');
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
const siteElement = $('.niche-site-logo');
// const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
const siteName = siteElement.attr('title');
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
release.tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
release.photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
release.tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
release.photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
const posterPath = videoData?.poster || $('meta[itemprop="thumbnailUrl"]').attr('content') || $('#trailer-player-container').attr('data-player-img');
if (posterPath) release.poster = `https:${posterPath}`;
const posterPath = videoData?.poster || $('meta[itemprop="thumbnailUrl"]').attr('content') || $('#trailer-player-container').attr('data-player-img');
if (posterPath) release.poster = `https:${posterPath}`;
if (videoData) {
release.trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
src: `https:${path}`,
quality: Number(quality.match(/\d{3,}/)[0]),
}));
}
if (videoData) {
release.trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
src: `https:${path}`,
quality: Number(quality.match(/\d{3,}/)[0]),
}));
}
return release;
return release;
}
function scrapeActorSearch(html, url, actorName) {
const { document } = new JSDOM(html).window;
const actorLink = document.querySelector(`a[title="${actorName}" i]`);
const { document } = new JSDOM(html).window;
const actorLink = document.querySelector(`a[title="${actorName}" i]`);
return actorLink ? actorLink.href : null;
return actorLink ? actorLink.href : null;
}
async function fetchActorReleases({ qu, html }, accReleases = []) {
const releases = scrapeAll(html);
const next = qu.url('.pagination .next a');
const releases = scrapeAll(html);
const next = qu.url('.pagination .next a');
if (next) {
const url = `https://www.brazzers.com${next}`;
const res = await get(url);
if (next) {
const url = `https://www.brazzers.com${next}`;
const res = await get(url);
if (res.ok) {
return fetchActorReleases(res.item, accReleases.concat(releases));
}
}
if (res.ok) {
return fetchActorReleases(res.item, accReleases.concat(releases));
}
}
return accReleases.concat(releases);
return accReleases.concat(releases);
}
async function scrapeProfile(html, url, actorName) {
const qProfile = ex(html);
const { q, qa } = qProfile;
const qProfile = ex(html);
const { q, qa } = qProfile;
const bioKeys = qa('.profile-spec-list label', true).map(key => key.replace(/\n+|\s{2,}/g, '').trim());
const bioValues = qa('.profile-spec-list var', true).map(value => value.replace(/\n+|\s{2,}/g, '').trim());
const bioKeys = qa('.profile-spec-list label', true).map(key => key.replace(/\n+|\s{2,}/g, '').trim());
const bioValues = qa('.profile-spec-list var', true).map(value => value.replace(/\n+|\s{2,}/g, '').trim());
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
const profile = {
name: actorName,
};
const profile = {
name: actorName,
};
profile.description = q('.model-profile-specs p', true);
profile.description = q('.model-profile-specs p', true);
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
if (bio.Measurements && bio.Measurements.match(/\d+[A-Z]+-\d+-\d+/)) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = moment.utc(bio['Date of Birth'], 'MMMM DD, YYYY').toDate();
if (bio['Birth Location']) profile.birthPlace = bio['Birth Location'];
if (bio['Pussy Type']) profile.pussy = bio['Pussy Type'].split(',').slice(-1)[0].toLowerCase();
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
if (bio.Measurements && bio.Measurements.match(/\d+[A-Z]+-\d+-\d+/)) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = moment.utc(bio['Date of Birth'], 'MMMM DD, YYYY').toDate();
if (bio['Birth Location']) profile.birthPlace = bio['Birth Location'];
if (bio['Pussy Type']) profile.pussy = bio['Pussy Type'].split(',').slice(-1)[0].toLowerCase();
if (bio.Height) profile.height = heightToCm(bio.Height);
if (bio.Weight) profile.weight = lbsToKg(bio.Weight.match(/\d+/)[0]);
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
if (bio.Height) profile.height = heightToCm(bio.Height);
if (bio.Weight) profile.weight = lbsToKg(bio.Weight.match(/\d+/)[0]);
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
if (bio['Tits Type'] && bio['Tits Type'].match('Natural')) profile.naturalBoobs = true;
if (bio['Tits Type'] && bio['Tits Type'].match('Enhanced')) profile.naturalBoobs = false;
if (bio['Tits Type'] && bio['Tits Type'].match('Natural')) profile.naturalBoobs = true;
if (bio['Tits Type'] && bio['Tits Type'].match('Enhanced')) profile.naturalBoobs = false;
if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true;
if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true;
if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true;
if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true;
const avatarEl = q('.big-pic-model-container img');
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
const avatarEl = q('.big-pic-model-container img');
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
profile.releases = await fetchActorReleases(qProfile);
profile.releases = await fetchActorReleases(qProfile);
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/page/${page}/`);
const res = await bhttp.get(`${site.url}/page/${page}/`);
return scrapeAll(res.body.toString(), site, false);
return scrapeAll(res.body.toString(), site, false);
}
async function fetchUpcoming(site) {
const res = await bhttp.get(`${site.url}/`);
const res = await bhttp.get(`${site.url}/`);
return scrapeAll(res.body.toString(), site, true);
return scrapeAll(res.body.toString(), site, true);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
const searchUrl = 'https://brazzers.com/pornstars-search/';
const searchRes = await bhttp.get(searchUrl, {
headers: {
Cookie: `textSearch=${encodeURIComponent(actorName)};`,
},
});
const searchUrl = 'https://brazzers.com/pornstars-search/';
const searchRes = await bhttp.get(searchUrl, {
headers: {
Cookie: `textSearch=${encodeURIComponent(actorName)};`,
},
});
const actorLink = scrapeActorSearch(searchRes.body.toString(), searchUrl, actorName);
const actorLink = scrapeActorSearch(searchRes.body.toString(), searchUrl, actorName);
if (actorLink) {
const url = `https://brazzers.com${actorLink}`;
const res = await bhttp.get(url);
if (actorLink) {
const url = `https://brazzers.com${actorLink}`;
const res = await bhttp.get(url);
return scrapeProfile(res.body.toString(), url, actorName);
}
return scrapeProfile(res.body.toString(), url, actorName);
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
fetchUpcoming,
fetchLatest,
fetchProfile,
fetchScene,
fetchUpcoming,
};

View File

@@ -3,8 +3,8 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
};

View File

@@ -4,139 +4,139 @@ const { get, geta, ctxa, ed } = require('../utils/q');
const slugify = require('../utils/slugify');
function scrapeAll(scenes, site) {
return scenes.map(({ qu }) => {
const url = qu.url('.text-thumb a');
const { pathname } = new URL(url);
const channelUrl = qu.url('.badge');
return scenes.map(({ qu }) => {
const url = qu.url('.text-thumb a');
const { pathname } = new URL(url);
const channelUrl = qu.url('.badge');
if (site?.parameters?.extract && qu.q('.badge', true) !== site.name) {
return null;
}
if (site?.parameters?.extract && qu.q('.badge', true) !== site.name) {
return null;
}
const release = {};
const release = {};
release.url = channelUrl ? `${channelUrl}${pathname}` : url;
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
release.title = qu.q('.text-thumb a', true);
release.url = channelUrl ? `${channelUrl}${pathname}` : url;
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
release.title = qu.q('.text-thumb a', true);
release.date = qu.date('.date', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
release.duration = qu.dur('.date', /(\d{2}:)?\d{2}:\d{2}/);
release.date = qu.date('.date', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
release.duration = qu.dur('.date', /(\d{2}:)?\d{2}:\d{2}/);
release.actors = qu.all('.category a', true);
release.actors = qu.all('.category a', true);
release.poster = qu.img('img.video_placeholder, .video-images img');
release.teaser = { src: qu.trailer() };
release.poster = qu.img('img.video_placeholder, .video-images img');
release.teaser = { src: qu.trailer() };
return release;
}).filter(Boolean);
return release;
}).filter(Boolean);
}
function scrapeScene({ q, qd, qa }, url, _site, baseRelease) {
const release = { url };
const release = { url };
const { pathname } = new URL(url);
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
const { pathname } = new URL(url);
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
release.title = q('.trailer-block_title', true);
release.description = q('.info-block:nth-child(3) .text', true);
release.date = qd('.info-block_data .text', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.title = q('.trailer-block_title', true);
release.description = q('.info-block:nth-child(3) .text', true);
release.date = qd('.info-block_data .text', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
const duration = baseRelease?.duration || Number(q('.info-block_data .text', true).match(/(\d+)\s+min/)?.[1]) * 60;
if (duration) release.duration = duration;
const duration = baseRelease?.duration || Number(q('.info-block_data .text', true).match(/(\d+)\s+min/)?.[1]) * 60;
if (duration) release.duration = duration;
release.actors = qa('.info-block_data a[href*="/models"]', true);
release.tags = qa('.info-block a[href*="/categories"]', true);
release.actors = qa('.info-block_data a[href*="/models"]', true);
release.tags = qa('.info-block a[href*="/categories"]', true);
const posterEl = q('.update_thumb');
const poster = posterEl.getAttribute('src0_3x') || posterEl.getAttribute('src0_2x') || posterEl.dataset.src;
const posterEl = q('.update_thumb');
const poster = posterEl.getAttribute('src0_3x') || posterEl.getAttribute('src0_2x') || posterEl.dataset.src;
if (poster && baseRelease?.poster) release.photos = [poster];
else if (poster) release.poster = poster;
if (poster && baseRelease?.poster) release.photos = [poster];
else if (poster) release.poster = poster;
return release;
return release;
}
function scrapeProfile({ q, qa, qtx }) {
const profile = {};
const profile = {};
const keys = qa('.model-descr_line:not(.model-descr_rait) p.text span', true);
const values = qa('.model-descr_line:not(.model-descr_rait) p.text').map(el => qtx(el));
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
const keys = qa('.model-descr_line:not(.model-descr_rait) p.text span', true);
const values = qa('.model-descr_line:not(.model-descr_rait) p.text').map(el => qtx(el));
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
if (bio.height) profile.height = Number(bio.height.match(/\((\d+)cm\)/)[1]);
if (bio.weight) profile.weight = Number(bio.weight.match(/\((\d+)kg\)/)[1]);
if (bio.race) profile.ethnicity = bio.race;
if (bio.height) profile.height = Number(bio.height.match(/\((\d+)cm\)/)[1]);
if (bio.weight) profile.weight = Number(bio.weight.match(/\((\d+)kg\)/)[1]);
if (bio.race) profile.ethnicity = bio.race;
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
if (bio.birthplace) profile.birthPlace = bio.birthplace;
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
if (bio.birthplace) profile.birthPlace = bio.birthplace;
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (!/\?/.test(bust)) profile.bust = bust;
if (!/\?/.test(waist)) profile.waist = waist;
if (!/\?/.test(hip)) profile.hip = hip;
}
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (!/\?/.test(bust)) profile.bust = bust;
if (!/\?/.test(waist)) profile.waist = waist;
if (!/\?/.test(hip)) profile.hip = hip;
}
if (bio.hair) profile.hair = bio.hair;
if (bio.eyes) profile.eyes = bio.eyes;
if (bio.hair) profile.hair = bio.hair;
if (bio.eyes) profile.eyes = bio.eyes;
if (/various/i.test(bio.tattoos)) profile.hasTattoos = true;
else if (/none/i.test(bio.tattoos)) profile.hasTattoos = false;
else if (bio.tattoos) {
profile.hasTattoos = true;
profile.tattoos = bio.tattoos;
}
if (/various/i.test(bio.tattoos)) profile.hasTattoos = true;
else if (/none/i.test(bio.tattoos)) profile.hasTattoos = false;
else if (bio.tattoos) {
profile.hasTattoos = true;
profile.tattoos = bio.tattoos;
}
if (/various/i.test(bio.piercings)) profile.hasPiercings = true;
else if (/none/i.test(bio.piercings)) profile.hasPiercings = false;
else if (bio.piercings) {
profile.hasPiercings = true;
profile.piercings = bio.piercings;
}
if (/various/i.test(bio.piercings)) profile.hasPiercings = true;
else if (/none/i.test(bio.piercings)) profile.hasPiercings = false;
else if (bio.piercings) {
profile.hasPiercings = true;
profile.piercings = bio.piercings;
}
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
const avatar = q('.model-img img');
profile.avatar = avatar.getAttribute('src0_3x') || avatar.getAttribute('src0_2x') || avatar.dataset.src;
const avatar = q('.model-img img');
profile.avatar = avatar.getAttribute('src0_3x') || avatar.getAttribute('src0_2x') || avatar.dataset.src;
const releases = qa('.video-thumb');
profile.releases = scrapeAll(ctxa(releases));
const releases = qa('.video-thumb');
profile.releases = scrapeAll(ctxa(releases));
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = site.parameters?.extract
? `https://cherrypimps.com/categories/movies_${page}.html`
: `${site.url}/categories/movies_${page}.html`;
const res = await geta(url, 'div.video-thumb');
const url = site.parameters?.extract
? `https://cherrypimps.com/categories/movies_${page}.html`
: `${site.url}/categories/movies_${page}.html`;
const res = await geta(url, 'div.video-thumb');
return res.ok ? scrapeAll(res.items, site) : res.status;
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchScene(url, site, release) {
const res = await get(url);
const res = await get(url);
return res.ok ? scrapeScene(res.item, url, site, release) : res.status;
return res.ok ? scrapeScene(res.item, url, site, release) : res.status;
}
async function fetchProfile(actorName, scraperSlug) {
const actorSlug = slugify(actorName);
const actorSlug2 = slugify(actorName, '');
const actorSlug = slugify(actorName);
const actorSlug2 = slugify(actorName, '');
const [url, url2] = ['cherrypimps', 'wildoncam'].includes(scraperSlug)
? [`https://${scraperSlug}.com/models/${actorSlug}.html`, `https://${scraperSlug}.com/models/${actorSlug2}.html`]
: [`https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug}.html`, `https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug2}.html`];
const [url, url2] = ['cherrypimps', 'wildoncam'].includes(scraperSlug)
? [`https://${scraperSlug}.com/models/${actorSlug}.html`, `https://${scraperSlug}.com/models/${actorSlug2}.html`]
: [`https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug}.html`, `https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug2}.html`];
const res = await get(url);
if (res.ok) return scrapeProfile(res.item);
const res = await get(url);
if (res.ok) return scrapeProfile(res.item);
const res2 = await get(url2);
return res2.ok ? scrapeProfile(res2.item) : res2.status;
const res2 = await get(url2);
return res2.ok ? scrapeProfile(res2.item) : res2.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -7,182 +7,182 @@ const slugify = require('../utils/slugify');
/* eslint-disable newline-per-chained-call */
function scrapeAll(html, site, origin) {
return exa(html, '.card.m-1:not(.pornstar-card)').map(({ q, qa, qd }) => {
const release = {};
return exa(html, '.card.m-1:not(.pornstar-card)').map(({ q, qa, qd }) => {
const release = {};
release.title = q('a', 'title');
release.url = `${site?.url || origin || 'https://ddfnetwork.com'}${q('a', 'href')}`;
[release.entryId] = release.url.split('/').slice(-1);
release.title = q('a', 'title');
release.url = `${site?.url || origin || 'https://ddfnetwork.com'}${q('a', 'href')}`;
[release.entryId] = release.url.split('/').slice(-1);
release.date = qd('small[datetime]', 'YYYY-MM-DD HH:mm:ss', null, 'datetime');
release.actors = qa('.card-subtitle a', true).filter(Boolean);
release.date = qd('small[datetime]', 'YYYY-MM-DD HH:mm:ss', null, 'datetime');
release.actors = qa('.card-subtitle a', true).filter(Boolean);
const duration = parseInt(q('.card-info div:nth-child(2) .card-text', true), 10) * 60;
if (duration) release.duration = duration;
const duration = parseInt(q('.card-info div:nth-child(2) .card-text', true), 10) * 60;
if (duration) release.duration = duration;
release.poster = q('img').dataset.src;
release.poster = q('img').dataset.src;
return release;
});
return release;
});
}
async function scrapeScene(html, url, _site) {
const { qu } = ex(html);
const release = {};
const { qu } = ex(html);
const release = {};
[release.entryId] = url.split('/').slice(-1);
[release.entryId] = url.split('/').slice(-1);
release.title = qu.meta('itemprop=name');
release.description = qu.q('.descr-box p', true);
release.date = qu.date('meta[itemprop=uploadDate]', 'YYYY-MM-DD', null, 'content')
release.title = qu.meta('itemprop=name');
release.description = qu.q('.descr-box p', true);
release.date = qu.date('meta[itemprop=uploadDate]', 'YYYY-MM-DD', null, 'content')
|| qu.date('.title-border:nth-child(2) p', 'MM.DD.YYYY');
release.actors = qu.all('.pornstar-card > a', 'title');
release.tags = qu.all('.tags-tab .tags a', true);
release.actors = qu.all('.pornstar-card > a', 'title');
release.tags = qu.all('.tags-tab .tags a', true);
release.duration = parseInt(qu.q('.icon-video-red + span', true), 10) * 60;
release.likes = Number(qu.q('.icon-like-red + span', true));
release.duration = parseInt(qu.q('.icon-video-red + span', true), 10) * 60;
release.likes = Number(qu.q('.icon-like-red + span', true));
release.poster = qu.poster();
release.photos = qu.urls('.photo-slider-guest .card a');
release.poster = qu.poster();
release.photos = qu.urls('.photo-slider-guest .card a');
release.trailer = qu.all('source[type="video/mp4"]').map(trailer => ({
src: trailer.src,
quality: Number(trailer.attributes.res.value),
}));
release.trailer = qu.all('source[type="video/mp4"]').map(trailer => ({
src: trailer.src,
quality: Number(trailer.attributes.res.value),
}));
return release;
return release;
}
async function fetchActorReleases(urls) {
// DDF Network and DDF Network Stream list all scenes, exclude
const sources = urls.filter(url => !/ddfnetwork/.test(url));
// DDF Network and DDF Network Stream list all scenes, exclude
const sources = urls.filter(url => !/ddfnetwork/.test(url));
const releases = await Promise.all(sources.map(async (url) => {
const { html } = await get(url);
const releases = await Promise.all(sources.map(async (url) => {
const { html } = await get(url);
return scrapeAll(html, null, new URL(url).origin);
}));
return scrapeAll(html, null, new URL(url).origin);
}));
// DDF cross-releases scenes between sites, filter duplicates by entryId
return Object.values(releases
.flat()
.sort((releaseA, releaseB) => releaseB.date - releaseA.date) // sort by date so earliest scene remains
.reduce((acc, release) => ({ ...acc, [release.entryId]: release }), {}));
// DDF cross-releases scenes between sites, filter duplicates by entryId
return Object.values(releases
.flat()
.sort((releaseA, releaseB) => releaseB.date - releaseA.date) // sort by date so earliest scene remains
.reduce((acc, release) => ({ ...acc, [release.entryId]: release }), {}));
}
async function scrapeProfile(html, _url, actorName) {
const { qu } = ex(html);
const { qu } = ex(html);
const keys = qu.all('.about-title', true).map(key => slugify(key, '_'));
const values = qu.all('.about-info').map((el) => {
if (el.children.length > 0) {
return Array.from(el.children, child => child.textContent.trim()).join(', ');
}
const keys = qu.all('.about-title', true).map(key => slugify(key, '_'));
const values = qu.all('.about-info').map((el) => {
if (el.children.length > 0) {
return Array.from(el.children, child => child.textContent.trim()).join(', ');
}
return el.textContent.trim();
});
return el.textContent.trim();
});
const bio = keys.reduce((acc, key, index) => {
if (values[index] === '-') return acc;
const bio = keys.reduce((acc, key, index) => {
if (values[index] === '-') return acc;
return {
...acc,
[key]: values[index],
};
}, {});
return {
...acc,
[key]: values[index],
};
}, {});
const profile = {
name: actorName,
};
const profile = {
name: actorName,
};
profile.description = qu.q('.description-box', true);
profile.birthdate = ed(bio.birthday, 'MMMM DD, YYYY');
profile.description = qu.q('.description-box', true);
profile.birthdate = ed(bio.birthday, 'MMMM DD, YYYY');
if (bio.nationality) profile.nationality = bio.nationality;
if (bio.nationality) profile.nationality = bio.nationality;
if (bio.bra_size) [profile.bust] = bio.bra_size.match(/\d+\w+/);
if (bio.waist) profile.waist = Number(bio.waist.match(/\d+/)[0]);
if (bio.hips) profile.hip = Number(bio.hips.match(/\d+/)[0]);
if (bio.bra_size) [profile.bust] = bio.bra_size.match(/\d+\w+/);
if (bio.waist) profile.waist = Number(bio.waist.match(/\d+/)[0]);
if (bio.hips) profile.hip = Number(bio.hips.match(/\d+/)[0]);
if (bio.height) profile.height = Number(bio.height.match(/\d{2,}/)[0]);
if (bio.height) profile.height = Number(bio.height.match(/\d{2,}/)[0]);
if (bio.tit_style && /Enhanced/.test(bio.tit_style)) profile.naturalBoobs = false;
if (bio.tit_style && /Natural/.test(bio.tit_style)) profile.naturalBoobs = true;
if (bio.tit_style && /Enhanced/.test(bio.tit_style)) profile.naturalBoobs = false;
if (bio.tit_style && /Natural/.test(bio.tit_style)) profile.naturalBoobs = true;
if (bio.body_art && /Tattoo/.test(bio.body_art)) profile.hasTattoos = true;
if (bio.body_art && /Piercing/.test(bio.body_art)) profile.hasPiercings = true;
if (bio.body_art && /Tattoo/.test(bio.body_art)) profile.hasTattoos = true;
if (bio.body_art && /Piercing/.test(bio.body_art)) profile.hasPiercings = true;
if (bio.hair_style) profile.hair = bio.hair_style.split(',')[0].trim().toLowerCase();
if (bio.eye_color) profile.eyes = bio.eye_color.match(/\w+/)[0].toLowerCase();
if (bio.hair_style) profile.hair = bio.hair_style.split(',')[0].trim().toLowerCase();
if (bio.eye_color) profile.eyes = bio.eye_color.match(/\w+/)[0].toLowerCase();
if (bio.shoe_size) profile.shoes = Number(bio.shoe_size.split('|')[1]);
if (bio.shoe_size) profile.shoes = Number(bio.shoe_size.split('|')[1]);
const avatarEl = qu.q('.pornstar-details .card-img-top');
if (avatarEl && avatarEl.dataset.src.match('^//')) profile.avatar = `https:${avatarEl.dataset.src}`;
const avatarEl = qu.q('.pornstar-details .card-img-top');
if (avatarEl && avatarEl.dataset.src.match('^//')) profile.avatar = `https:${avatarEl.dataset.src}`;
profile.releases = await fetchActorReleases(qu.urls('.find-me-tab li a'));
profile.releases = await fetchActorReleases(qu.urls('.find-me-tab li a'));
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = site.parameters?.native
? `${site.url}/videos/search/latest/ever/allsite/-/${page}`
: `https://ddfnetwork.com/videos/search/latest/ever/${new URL(site.url).hostname}/-/${page}`;
const url = site.parameters?.native
? `${site.url}/videos/search/latest/ever/allsite/-/${page}`
: `https://ddfnetwork.com/videos/search/latest/ever/${new URL(site.url).hostname}/-/${page}`;
const res = await bhttp.get(url);
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeAll(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeAll(res.body.toString(), site);
}
return res.statusCode;
return res.statusCode;
}
async function fetchScene(url, site) {
// DDF's main site moved to Porn World
// const res = await bhttp.get(`https://ddfnetwork.com${new URL(url).pathname}`);
const res = await bhttp.get(url);
// DDF's main site moved to Porn World
// const res = await bhttp.get(`https://ddfnetwork.com${new URL(url).pathname}`);
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
const resSearch = await bhttp.post('https://ddfnetwork.com/search/ajax',
{
type: 'hints',
word: actorName,
},
{
decodeJSON: true,
headers: {
'x-requested-with': 'XMLHttpRequest',
},
});
const resSearch = await bhttp.post('https://ddfnetwork.com/search/ajax',
{
type: 'hints',
word: actorName,
},
{
decodeJSON: true,
headers: {
'x-requested-with': 'XMLHttpRequest',
},
});
if (resSearch.statusCode !== 200 || Array.isArray(resSearch.body.list)) {
return null;
}
if (resSearch.statusCode !== 200 || Array.isArray(resSearch.body.list)) {
return null;
}
if (!resSearch.body.list.pornstarsName || resSearch.body.list.pornstarsName.length === 0) {
return null;
}
if (!resSearch.body.list.pornstarsName || resSearch.body.list.pornstarsName.length === 0) {
return null;
}
const [actor] = resSearch.body.list.pornstarsName;
const url = `https://ddfnetwork.com${actor.href}`;
const [actor] = resSearch.body.list.pornstarsName;
const url = `https://ddfnetwork.com${actor.href}`;
const resActor = await bhttp.get(url);
const resActor = await bhttp.get(url);
if (resActor.statusCode !== 200) {
return null;
}
if (resActor.statusCode !== 200) {
return null;
}
return scrapeProfile(resActor.body.toString(), url, actorName);
return scrapeProfile(resActor.body.toString(), url, actorName);
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
fetchLatest,
fetchProfile,
fetchScene,
};

View File

@@ -3,11 +3,11 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'digitalplayground', 'modelprofile');
return fetchProfile(actorName, 'digitalplayground', 'modelprofile');
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
};

View File

@@ -7,136 +7,136 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
async function getPhotos(albumUrl) {
const res = await bhttp.get(albumUrl);
const html = res.body.toString();
const { document } = new JSDOM(html).window;
const res = await bhttp.get(albumUrl);
const html = res.body.toString();
const { document } = new JSDOM(html).window;
const lastPhotoPage = Array.from(document.querySelectorAll('.preview-image-container a')).slice(-1)[0].href;
const lastPhotoIndex = parseInt(lastPhotoPage.match(/\d+.jpg/)[0], 10);
const lastPhotoPage = Array.from(document.querySelectorAll('.preview-image-container a')).slice(-1)[0].href;
const lastPhotoIndex = parseInt(lastPhotoPage.match(/\d+.jpg/)[0], 10);
const photoUrls = Array.from({ length: lastPhotoIndex }, (value, index) => {
const pageUrl = `https://blacksonblondes.com${lastPhotoPage.replace(/\d+.jpg/, `${(index + 1).toString().padStart(3, '0')}.jpg`)}`;
const photoUrls = Array.from({ length: lastPhotoIndex }, (value, index) => {
const pageUrl = `https://blacksonblondes.com${lastPhotoPage.replace(/\d+.jpg/, `${(index + 1).toString().padStart(3, '0')}.jpg`)}`;
return {
url: pageUrl,
extract: ({ qu }) => qu.q('.scenes-module img', 'src'),
};
});
return {
url: pageUrl,
extract: ({ qu }) => qu.q('.scenes-module img', 'src'),
};
});
return photoUrls;
return photoUrls;
}
function scrapeLatest(html, site) {
const { document } = new JSDOM(html).window;
const sceneElements = Array.from(document.querySelectorAll('.recent-updates'));
const { document } = new JSDOM(html).window;
const sceneElements = Array.from(document.querySelectorAll('.recent-updates'));
return sceneElements.reduce((acc, element) => {
const siteUrl = element.querySelector('.help-block').textContent;
return sceneElements.reduce((acc, element) => {
const siteUrl = element.querySelector('.help-block').textContent;
if (`www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) {
// different dogfart site
return acc;
}
if (`www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) {
// different dogfart site
return acc;
}
const sceneLinkElement = element.querySelector('.thumbnail');
const url = `https://dogfartnetwork.com${sceneLinkElement.href}`;
const { pathname } = new URL(url);
const entryId = `${site.slug}_${pathname.split('/')[4]}`;
const sceneLinkElement = element.querySelector('.thumbnail');
const url = `https://dogfartnetwork.com${sceneLinkElement.href}`;
const { pathname } = new URL(url);
const entryId = `${site.slug}_${pathname.split('/')[4]}`;
const title = element.querySelector('.scene-title').textContent;
const actors = title.split(/[,&]|\band\b/).map(actor => actor.trim());
const title = element.querySelector('.scene-title').textContent;
const actors = title.split(/[,&]|\band\b/).map(actor => actor.trim());
const poster = `https:${element.querySelector('img').src}`;
const teaser = sceneLinkElement.dataset.preview_clip_url;
const poster = `https:${element.querySelector('img').src}`;
const teaser = sceneLinkElement.dataset.preview_clip_url;
return [
...acc,
{
url,
entryId,
title,
actors,
poster,
teaser: {
src: teaser,
},
site,
},
];
}, []);
return [
...acc,
{
url,
entryId,
title,
actors,
poster,
teaser: {
src: teaser,
},
site,
},
];
}, []);
}
async function scrapeScene(html, url, site) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
const title = document.querySelector('.description-title').textContent;
const actors = Array.from(document.querySelectorAll('.more-scenes a')).map(({ textContent }) => textContent);
const metaDescription = document.querySelector('meta[itemprop="description"]').content;
const description = metaDescription
? metaDescription.content
: document.querySelector('.description')
.textContent
.replace(/[ \t\n]{2,}/g, ' ')
.replace('...read more', '')
.trim();
const title = document.querySelector('.description-title').textContent;
const actors = Array.from(document.querySelectorAll('.more-scenes a')).map(({ textContent }) => textContent);
const metaDescription = document.querySelector('meta[itemprop="description"]').content;
const description = metaDescription
? metaDescription.content
: document.querySelector('.description')
.textContent
.replace(/[ \t\n]{2,}/g, ' ')
.replace('...read more', '')
.trim();
const channel = document.querySelector('.site-name').textContent.split('.')[0].toLowerCase();
const { origin, pathname } = new URL(url);
const entryId = `${channel}_${pathname.split('/').slice(-2)[0]}`;
const channel = document.querySelector('.site-name').textContent.split('.')[0].toLowerCase();
const { origin, pathname } = new URL(url);
const entryId = `${channel}_${pathname.split('/').slice(-2)[0]}`;
const date = new Date(document.querySelector('meta[itemprop="uploadDate"]').content);
const duration = moment
.duration(`00:${document
.querySelectorAll('.extra-info p')[1]
.textContent
.match(/\d+:\d+$/)[0]}`)
.asSeconds();
const date = new Date(document.querySelector('meta[itemprop="uploadDate"]').content);
const duration = moment
.duration(`00:${document
.querySelectorAll('.extra-info p')[1]
.textContent
.match(/\d+:\d+$/)[0]}`)
.asSeconds();
const trailerElement = document.querySelector('.html5-video');
const poster = `https:${trailerElement.dataset.poster}`;
const { trailer } = trailerElement.dataset;
const trailerElement = document.querySelector('.html5-video');
const poster = `https:${trailerElement.dataset.poster}`;
const { trailer } = trailerElement.dataset;
const lastPhotosUrl = Array.from(document.querySelectorAll('.pagination a')).slice(-1)[0].href;
const photos = await getPhotos(`${origin}${pathname}${lastPhotosUrl}`, site, url);
const lastPhotosUrl = Array.from(document.querySelectorAll('.pagination a')).slice(-1)[0].href;
const photos = await getPhotos(`${origin}${pathname}${lastPhotosUrl}`, site, url);
const stars = Math.floor(Number(document.querySelector('span[itemprop="average"]')?.textContent || document.querySelector('span[itemprop="ratingValue"]')?.textContent) / 2);
const tags = Array.from(document.querySelectorAll('.scene-details .categories a')).map(({ textContent }) => textContent);
const stars = Math.floor(Number(document.querySelector('span[itemprop="average"]')?.textContent || document.querySelector('span[itemprop="ratingValue"]')?.textContent) / 2);
const tags = Array.from(document.querySelectorAll('.scene-details .categories a')).map(({ textContent }) => textContent);
return {
entryId,
url: `${origin}${pathname}`,
title,
description,
actors,
date,
duration,
poster,
photos,
trailer: {
src: trailer,
},
tags,
rating: {
stars,
},
site,
channel,
};
return {
entryId,
url: `${origin}${pathname}`,
title,
description,
actors,
date,
duration,
poster,
photos,
trailer: {
src: trailer,
},
tags,
rating: {
stars,
},
site,
channel,
};
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`https://dogfartnetwork.com/tour/scenes/?p=${page}`);
const res = await bhttp.get(`https://dogfartnetwork.com/tour/scenes/?p=${page}`);
return scrapeLatest(res.body.toString(), site);
return scrapeLatest(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
return scrapeScene(res.body.toString(), url, site);
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -3,8 +3,8 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
};

View File

@@ -3,11 +3,11 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'fakehub', 'modelprofile');
return fetchProfile(actorName, 'fakehub', 'modelprofile');
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
};

View File

@@ -1,115 +1,115 @@
'use strict';
const {
fetchLatest,
fetchApiLatest,
fetchUpcoming,
fetchApiUpcoming,
fetchScene,
fetchProfile,
fetchApiProfile,
scrapeAll,
fetchLatest,
fetchApiLatest,
fetchUpcoming,
fetchApiUpcoming,
fetchScene,
fetchProfile,
fetchApiProfile,
scrapeAll,
} = require('./gamma');
const { get } = require('../utils/q');
const slugify = require('../utils/slugify');
function extractLowArtActors(release) {
const actors = release.title
.replace(/solo/i, '')
.split(/,|\band\b/ig)
.map(actor => actor.trim());
const actors = release.title
.replace(/solo/i, '')
.split(/,|\band\b/ig)
.map(actor => actor.trim());
return {
...release,
actors,
};
return {
...release,
actors,
};
}
async function networkFetchLatest(site, page = 1) {
if (site.parameters?.api) return fetchApiLatest(site, page, false);
if (site.parameters?.api) return fetchApiLatest(site, page, false);
const releases = await fetchLatest(site, page);
const releases = await fetchLatest(site, page);
if (site.slug === 'lowartfilms') {
return releases.map(release => extractLowArtActors(release));
}
if (site.slug === 'lowartfilms') {
return releases.map(release => extractLowArtActors(release));
}
return releases;
return releases;
}
async function networkFetchScene(url, site) {
const release = await fetchScene(url, site);
const release = await fetchScene(url, site);
if (site.slug === 'lowartfilms') {
return extractLowArtActors(release);
}
if (site.slug === 'lowartfilms') {
return extractLowArtActors(release);
}
return release;
return release;
}
async function networkFetchUpcoming(site, page = 1) {
if (site.parameters?.api) return fetchApiUpcoming(site, page, true);
if (site.parameters?.api) return fetchApiUpcoming(site, page, true);
return fetchUpcoming(site, page);
return fetchUpcoming(site, page);
}
function getActorReleasesUrl(actorPath, page = 1) {
return `https://www.peternorth.com/en/videos/All-Categories/0${actorPath}/All-Dvds/0/latest/${page}`;
return `https://www.peternorth.com/en/videos/All-Categories/0${actorPath}/All-Dvds/0/latest/${page}`;
}
async function fetchClassicProfile(actorName, siteSlug) {
const actorSlug = slugify(actorName);
const actorSlug = slugify(actorName);
const url = `https://${siteSlug}.com/en/pornstars`;
const pornstarsRes = await get(url);
const url = `https://${siteSlug}.com/en/pornstars`;
const pornstarsRes = await get(url);
if (!pornstarsRes.ok) return null;
if (!pornstarsRes.ok) return null;
const actorPath = pornstarsRes.item.qa('option[value*="/pornstar"]')
.find(el => slugify(el.textContent) === actorSlug)
const actorPath = pornstarsRes.item.qa('option[value*="/pornstar"]')
.find(el => slugify(el.textContent) === actorSlug)
?.value;
if (actorPath) {
const actorUrl = `https://${siteSlug}.com${actorPath}`;
const res = await get(actorUrl);
if (actorPath) {
const actorUrl = `https://${siteSlug}.com${actorPath}`;
const res = await get(actorUrl);
if (res.ok) {
const releases = scrapeAll(res.item, null, `https://www.${siteSlug}.com`, false);
if (res.ok) {
const releases = scrapeAll(res.item, null, `https://www.${siteSlug}.com`, false);
return { releases };
}
}
return { releases };
}
}
return null;
return null;
}
async function networkFetchProfile(actorName, scraperSlug, site, include) {
// not all Fame Digital sites offer Gamma actors
const [devils, rocco, peter, silvia] = await Promise.all([
fetchApiProfile(actorName, 'devilsfilm', true),
fetchApiProfile(actorName, 'roccosiffredi'),
include.scenes ? fetchProfile(actorName, 'peternorth', true, getActorReleasesUrl, include) : [],
include.scenes ? fetchClassicProfile(actorName, 'silviasaint') : [],
include.scenes ? fetchClassicProfile(actorName, 'silverstonedvd') : [],
]);
// not all Fame Digital sites offer Gamma actors
const [devils, rocco, peter, silvia] = await Promise.all([
fetchApiProfile(actorName, 'devilsfilm', true),
fetchApiProfile(actorName, 'roccosiffredi'),
include.scenes ? fetchProfile(actorName, 'peternorth', true, getActorReleasesUrl, include) : [],
include.scenes ? fetchClassicProfile(actorName, 'silviasaint') : [],
include.scenes ? fetchClassicProfile(actorName, 'silverstonedvd') : [],
]);
if (devils || rocco || peter) {
const releases = [].concat(devils?.releases || [], rocco?.releases || [], peter?.releases || [], silvia?.releases || []);
if (devils || rocco || peter) {
const releases = [].concat(devils?.releases || [], rocco?.releases || [], peter?.releases || [], silvia?.releases || []);
return {
...peter,
...rocco,
...devils,
releases,
};
}
return {
...peter,
...rocco,
...devils,
releases,
};
}
return null;
return null;
}
module.exports = {
fetchLatest: networkFetchLatest,
fetchProfile: networkFetchProfile,
fetchScene: networkFetchScene,
fetchUpcoming: networkFetchUpcoming,
fetchLatest: networkFetchLatest,
fetchProfile: networkFetchProfile,
fetchScene: networkFetchScene,
fetchUpcoming: networkFetchUpcoming,
};

View File

@@ -4,7 +4,7 @@ const { fetchLatest, fetchUpcoming, fetchScene } = require('./gamma');
module.exports = {
fetchLatest,
fetchScene,
fetchUpcoming,
fetchLatest,
fetchScene,
fetchUpcoming,
};

View File

@@ -5,89 +5,89 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
function scrapeProfile(html, actorName) {
const { document } = new JSDOM(html).window;
const profile = { name: actorName };
const { document } = new JSDOM(html).window;
const profile = { name: actorName };
const bio = Array.from(document.querySelectorAll('a[href^="/babes"]'), el => decodeURI(el.href)).reduce((acc, item) => {
const keyMatch = item.match(/\[\w+\]/);
const bio = Array.from(document.querySelectorAll('a[href^="/babes"]'), el => decodeURI(el.href)).reduce((acc, item) => {
const keyMatch = item.match(/\[\w+\]/);
if (keyMatch) {
const key = keyMatch[0].slice(1, -1);
const [, value] = item.split('=');
if (keyMatch) {
const key = keyMatch[0].slice(1, -1);
const [, value] = item.split('=');
// both hip and waist link to 'waist', assume biggest value is hip
if (key === 'waist' && acc.waist) {
if (acc.waist > value) {
acc.hip = acc.waist;
acc.waist = value;
// both hip and waist link to 'waist', assume biggest value is hip
if (key === 'waist' && acc.waist) {
if (acc.waist > value) {
acc.hip = acc.waist;
acc.waist = value;
return acc;
}
return acc;
}
acc.hip = value;
acc.hip = value;
return acc;
}
return acc;
}
acc[key] = value;
}
acc[key] = value;
}
return acc;
}, {});
return acc;
}, {});
if (bio.dateOfBirth) profile.birthdate = moment.utc(bio.dateOfBirth, 'YYYY-MM-DD').toDate();
if (bio.dateOfBirth) profile.birthdate = moment.utc(bio.dateOfBirth, 'YYYY-MM-DD').toDate();
if (profile.placeOfBirth || bio.country) profile.birthPlace = `${bio.placeOfBirth}, ${bio.country}`;
profile.eyes = bio.eyeColor;
profile.hair = bio.hairColor;
profile.ethnicity = bio.ethnicity;
if (profile.placeOfBirth || bio.country) profile.birthPlace = `${bio.placeOfBirth}, ${bio.country}`;
profile.eyes = bio.eyeColor;
profile.hair = bio.hairColor;
profile.ethnicity = bio.ethnicity;
profile.bust = bio.bra;
if (bio.waist) profile.waist = Number(bio.waist.split(',')[0]);
if (bio.hip) profile.hip = Number(bio.hip.split(',')[0]);
profile.bust = bio.bra;
if (bio.waist) profile.waist = Number(bio.waist.split(',')[0]);
if (bio.hip) profile.hip = Number(bio.hip.split(',')[0]);
if (bio.height) profile.height = Number(bio.height.split(',')[0]);
if (bio.weight) profile.weight = Number(bio.weight.split(',')[0]);
if (bio.height) profile.height = Number(bio.height.split(',')[0]);
if (bio.weight) profile.weight = Number(bio.weight.split(',')[0]);
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), el => el.href);
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), el => el.href);
const avatar = document.querySelector('.profile-image-large img').src;
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, copyright: null };
const avatar = document.querySelector('.profile-image-large img').src;
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, copyright: null };
return profile;
return profile;
}
function scrapeSearch(html) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
return document.querySelector('a.image-link')?.href || null;
return document.querySelector('a.image-link')?.href || null;
}
async function fetchProfile(actorName) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const res = await bhttp.get(`https://freeones.nl/${actorSlug}/profile`);
const res = await bhttp.get(`https://freeones.nl/${actorSlug}/profile`);
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString(), actorName);
}
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString(), actorName);
}
const searchRes = await bhttp.get(`https://freeones.nl/babes?q=${actorName}`);
const actorPath = scrapeSearch(searchRes.body.toString());
const searchRes = await bhttp.get(`https://freeones.nl/babes?q=${actorName}`);
const actorPath = scrapeSearch(searchRes.body.toString());
if (actorPath) {
const actorRes = await bhttp.get(`https://freeones.nl${actorPath}/profile`);
if (actorPath) {
const actorRes = await bhttp.get(`https://freeones.nl${actorPath}/profile`);
if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString(), actorName);
}
if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString(), actorName);
}
return null;
}
return null;
}
return null;
return null;
}
module.exports = {
fetchProfile,
fetchProfile,
};

View File

@@ -6,135 +6,135 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
async function scrapeProfileFrontpage(html, url, name) {
const { document } = new JSDOM(html).window;
const bioEl = document.querySelector('.dashboard-bio-list');
const { document } = new JSDOM(html).window;
const bioEl = document.querySelector('.dashboard-bio-list');
const bioUrl = `https:${document.querySelector('.seemore a').href}`;
const bioUrl = `https:${document.querySelector('.seemore a').href}`;
const keys = Array.from(bioEl.querySelectorAll('dt'), el => el.textContent.trim());
const values = Array.from(bioEl.querySelectorAll('dd'), el => el.textContent.trim());
const keys = Array.from(bioEl.querySelectorAll('dt'), el => el.textContent.trim());
const values = Array.from(bioEl.querySelectorAll('dd'), el => el.textContent.trim());
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
const profile = {
name,
gender: 'female',
};
const profile = {
name,
gender: 'female',
};
const birthdateString = bio['Date of Birth:'];
const measurementsString = bio['Measurements:'];
const birthdateString = bio['Date of Birth:'];
const measurementsString = bio['Measurements:'];
const birthCityString = bio['Place of Birth:'];
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
const birthCityString = bio['Place of Birth:'];
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
const birthCountryString = bio['Country of Origin:'];
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
const birthCountryString = bio['Country of Origin:'];
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
const piercingsString = bio['Piercings:'];
const tattoosString = bio['Tattoos:'];
const piercingsString = bio['Piercings:'];
const tattoosString = bio['Tattoos:'];
if (birthdateString && birthdateString !== 'Unknown (add)') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
if (birthdateString && birthdateString !== 'Unknown (add)') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
if (bio['Fake Boobs:']) profile.naturalBoobs = bio['Fake Boobs:'] === 'No';
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
if (bio['Fake Boobs:']) profile.naturalBoobs = bio['Fake Boobs:'] === 'No';
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
profile.hair = bio['Hair Color:'].toLowerCase();
profile.eyes = bio['Eye Color:'].toLowerCase();
profile.hair = bio['Hair Color:'].toLowerCase();
profile.eyes = bio['Eye Color:'].toLowerCase();
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
profile.social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href);
profile.social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href);
return {
profile,
url: bioUrl,
};
return {
profile,
url: bioUrl,
};
}
async function scrapeProfileBio(html, frontpageProfile, url, name) {
const { document } = new JSDOM(html).window;
const bioEl = document.querySelector('#biographyTable');
const { document } = new JSDOM(html).window;
const bioEl = document.querySelector('#biographyTable');
const keys = Array.from(bioEl.querySelectorAll('td:nth-child(1)'), el => el.textContent.trim());
const values = Array.from(bioEl.querySelectorAll('td:nth-child(2)'), el => el.textContent.trim());
const keys = Array.from(bioEl.querySelectorAll('td:nth-child(1)'), el => el.textContent.trim());
const values = Array.from(bioEl.querySelectorAll('td:nth-child(2)'), el => el.textContent.trim());
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
const profile = {
...frontpageProfile,
name,
gender: 'female',
};
const profile = {
...frontpageProfile,
name,
gender: 'female',
};
const birthdateString = bio['Date of Birth:'];
const measurementsString = bio['Measurements:'];
const birthdateString = bio['Date of Birth:'];
const measurementsString = bio['Measurements:'];
const birthCityString = bio['Place of Birth:'];
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
const birthCityString = bio['Place of Birth:'];
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
const birthCountryString = bio['Country of Origin:'];
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
const birthCountryString = bio['Country of Origin:'];
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
const piercingsString = bio['Piercings:'];
const tattoosString = bio['Tattoos:'];
const piercingsString = bio['Piercings:'];
const tattoosString = bio['Tattoos:'];
if (birthdateString && birthdateString !== 'Unknown') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
if (birthdateString && birthdateString !== 'Unknown') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
if (bio['Fake boobs']) profile.naturalBoobs = bio['Fake boobs:'] === 'No';
profile.ethnicity = bio['Ethnicity:'];
if (bio['Fake boobs']) profile.naturalBoobs = bio['Fake boobs:'] === 'No';
profile.ethnicity = bio['Ethnicity:'];
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
profile.hair = bio['Hair Color:'].toLowerCase();
profile.eyes = bio['Eye Color:'].toLowerCase();
profile.height = Number(bio['Height:'].match(/\d+/)[0]);
profile.weight = Number(bio['Weight:'].match(/\d+/)[0]);
profile.hair = bio['Hair Color:'].toLowerCase();
profile.eyes = bio['Eye Color:'].toLowerCase();
profile.height = Number(bio['Height:'].match(/\d+/)[0]);
profile.weight = Number(bio['Weight:'].match(/\d+/)[0]);
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
profile.social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href);
profile.social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href);
return profile;
return profile;
}
async function fetchProfile(actorName) {
const slug = actorName.replace(' ', '_');
const frontpageUrl = `https://www.freeones.com/html/v_links/${slug}`;
const slug = actorName.replace(' ', '_');
const frontpageUrl = `https://www.freeones.com/html/v_links/${slug}`;
const resFrontpage = await bhttp.get(frontpageUrl);
const resFrontpage = await bhttp.get(frontpageUrl);
if (resFrontpage.statusCode === 200) {
const { url, bio } = await scrapeProfileFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
const resBio = await bhttp.get(url);
if (resFrontpage.statusCode === 200) {
const { url, bio } = await scrapeProfileFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
const resBio = await bhttp.get(url);
return scrapeProfileBio(resBio.body.toString(), bio, url, actorName);
}
return scrapeProfileBio(resBio.body.toString(), bio, url, actorName);
}
// apparently some actors are appended 'Babe' as their surname...
const fallbackSlug = `${slug}_Babe`;
const fallbackUrl = `https://www.freeones.com/html/s_links/${fallbackSlug}`;
const resFallback = await bhttp.get(fallbackUrl);
// apparently some actors are appended 'Babe' as their surname...
const fallbackSlug = `${slug}_Babe`;
const fallbackUrl = `https://www.freeones.com/html/s_links/${fallbackSlug}`;
const resFallback = await bhttp.get(fallbackUrl);
if (resFallback.statusCode === 200) {
const { url, profile } = await scrapeProfileFrontpage(resFallback.body.toString(), fallbackUrl, actorName);
const resBio = await bhttp.get(url);
if (resFallback.statusCode === 200) {
const { url, profile } = await scrapeProfileFrontpage(resFallback.body.toString(), fallbackUrl, actorName);
const resBio = await bhttp.get(url);
return scrapeProfileBio(resBio.body.toString(), profile, url, actorName);
}
return scrapeProfileBio(resBio.body.toString(), profile, url, actorName);
}
return null;
return null;
}
module.exports = {
fetchProfile,
fetchProfile,
};

View File

@@ -4,93 +4,93 @@ const { get, geta, ctxa } = require('../utils/q');
const slugify = require('../utils/slugify');
function scrapeAll(scenes) {
return scenes.map(({ el, qu }) => {
const release = {};
return scenes.map(({ el, qu }) => {
const release = {};
release.entryId = el.dataset.setid || qu.q('.update_thumb', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
release.url = qu.url('.title');
release.entryId = el.dataset.setid || qu.q('.update_thumb', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
release.url = qu.url('.title');
release.title = qu.q('.title', true);
release.description = qu.q('.title', 'title');
release.title = qu.q('.title', true);
release.description = qu.q('.title', 'title');
release.date = qu.date('.video-data > span:last-child', 'YYYY-MM-DD');
release.duration = qu.dur('.video-data > span');
release.date = qu.date('.video-data > span:last-child', 'YYYY-MM-DD');
release.duration = qu.dur('.video-data > span');
release.actors = qu.all('.update_models a', true);
release.actors = qu.all('.update_models a', true);
const poster = qu.q('.update_thumb', 'src0_1x');
release.poster = [
poster.replace('-1x', '-2x'),
poster,
];
const poster = qu.q('.update_thumb', 'src0_1x');
release.poster = [
poster.replace('-1x', '-2x'),
poster,
];
return release;
});
return release;
});
}
function scrapeScene({ q, qa, qd, qtx }, url, _site) {
const release = { url };
const release = { url };
release.entryId = q('#image_parent img', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
release.entryId = q('#image_parent img', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
release.title = q('.trailer_title', true);
release.description = qtx('.text p');
release.date = qd('span[data-dateadded]', 'YYYY-MM-DD', null, 'data-dateadded');
release.title = q('.trailer_title', true);
release.description = qtx('.text p');
release.date = qd('span[data-dateadded]', 'YYYY-MM-DD', null, 'data-dateadded');
release.actors = qa('.update_models a', true);
release.tags = qa('.video-info a[href*="/categories"]', true);
release.actors = qa('.update_models a', true);
release.tags = qa('.video-info a[href*="/categories"]', true);
const poster = q('#image_parent img', 'src0_1x');
release.poster = [
poster.replace('-1x', '-2x'),
poster,
];
const poster = q('#image_parent img', 'src0_1x');
release.poster = [
poster.replace('-1x', '-2x'),
poster,
];
return release;
return release;
}
function scrapeProfile({ el, q, qtx }) {
const profile = {};
const profile = {};
const description = qtx('.model-bio');
if (description) profile.description = description;
const description = qtx('.model-bio');
if (description) profile.description = description;
profile.avatar = [
q('.model-image img', 'src0_2x'),
q('.model-image img', 'src0_1x'),
];
profile.avatar = [
q('.model-image img', 'src0_2x'),
q('.model-image img', 'src0_1x'),
];
profile.releases = scrapeAll(ctxa(el, '.update'));
profile.releases = scrapeAll(ctxa(el, '.update'));
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/categories/movies_${page}_d.html`;
const res = await geta(url, '.latest-updates .update');
const url = `${site.url}/categories/movies_${page}_d.html`;
const res = await geta(url, '.latest-updates .update');
return res.ok ? scrapeAll(res.items, site) : res.status;
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchScene(url, site) {
const res = await get(url, '.content-wrapper');
const res = await get(url, '.content-wrapper');
return res.ok ? scrapeScene(res.item, url, site) : res.status;
return res.ok ? scrapeScene(res.item, url, site) : res.status;
}
async function fetchProfile(actorName, scraperSlug) {
const actorSlug = slugify(actorName, '');
const url = scraperSlug === 'povperverts'
? `https://povperverts.net/models/${actorSlug}.html`
: `https://${scraperSlug}.com/models/${actorSlug}.html`;
const actorSlug = slugify(actorName, '');
const url = scraperSlug === 'povperverts'
? `https://povperverts.net/models/${actorSlug}.html`
: `https://${scraperSlug}.com/models/${actorSlug}.html`;
const res = await get(url);
const res = await get(url);
return res.ok ? scrapeProfile(res.item, actorName) : res.status;
return res.ok ? scrapeProfile(res.item, actorName) : res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
fetchLatest,
fetchScene,
fetchProfile,
};

File diff suppressed because it is too large Load Diff

View File

@@ -4,7 +4,7 @@ const { fetchApiLatest, fetchApiUpcoming, fetchScene } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
fetchLatest: fetchApiLatest,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
};

View File

@@ -8,404 +8,403 @@ const slugify = require('../utils/slugify');
const { feetInchesToCm } = require('../utils/convert');
async function getChannelRegExp(site) {
if (!['hushpass', 'interracialpass'].includes(site.network.slug)) return null;
if (!['hushpass', 'interracialpass'].includes(site.network.slug)) return null;
const sites = await knex('sites').where('network_id', site.network.id);
const sites = await knex('sites').where('network_id', site.network.id);
return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i');
return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i');
}
function deriveEntryId(release) {
if (release.date && release.title) {
return `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
}
if (release.date && release.title) {
return `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
}
return null;
return null;
}
function extractPoster(posterPath, site, baseRelease) {
if (posterPath && !/400.jpg/.test(posterPath)) {
const poster = `${site.parameters?.media || site.url}${posterPath}`;
const posterSources = [
poster,
// upscaled
poster.replace('-1x', '-2x'),
poster.replace('-1x', '-3x'),
];
if (posterPath && !/400.jpg/.test(posterPath)) {
const poster = `${site.parameters?.media || site.url}${posterPath}`;
const posterSources = [
poster,
// upscaled
poster.replace('-1x', '-2x'),
poster.replace('-1x', '-3x'),
];
if (baseRelease?.poster) {
return [posterSources, [baseRelease.poster]];
}
if (baseRelease?.poster) {
return [posterSources, [baseRelease.poster]];
}
return [posterSources, []];
}
return [posterSources, []];
}
return [baseRelease?.poster || null, []];
return [baseRelease?.poster || null, []];
}
function getImageWithFallbacks(q, selector, site, el) {
const sources = el
? [
q(el, selector, 'src0_3x'),
q(el, selector, 'src0_2x'),
q(el, selector, 'src0_1x'),
]
: [
q(selector, 'src0_3x'),
q(selector, 'src0_2x'),
q(selector, 'src0_1x'),
];
const sources = el
? [
q(el, selector, 'src0_3x'),
q(el, selector, 'src0_2x'),
q(el, selector, 'src0_1x'),
]
: [
q(selector, 'src0_3x'),
q(selector, 'src0_2x'),
q(selector, 'src0_1x'),
];
return sources.filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`);
return sources.filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`);
}
function scrapeAll(scenes, site) {
return scenes.map(({ qu }) => {
const release = {};
return scenes.map(({ qu }) => {
const release = {};
release.title = qu.q('h3 a', 'title') || qu.q('h3 a', true);
release.url = qu.url('h3 a');
release.title = qu.q('h3 a', 'title') || qu.q('h3 a', true);
release.url = qu.url('h3 a');
release.date = qu.date('.modeldata p', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
release.duration = qu.dur('.modeldata p');
release.date = qu.date('.modeldata p', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
release.duration = qu.dur('.modeldata p');
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
release.poster = getImageWithFallbacks(qu.q, '.modelimg img', site);
release.poster = getImageWithFallbacks(qu.q, '.modelimg img', site);
// release.entryId = q('.modelimg img', 'id').match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
// release.entryId = q('.modelimg img', 'id').match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
return release;
});
return release;
});
}
function scrapeAllT1(scenes, site, accSiteReleases) {
return scenes.map(({ qu }) => {
const release = {};
return scenes.map(({ qu }) => {
const release = {};
release.title = qu.q('h4 a', 'title') || qu.q('h4 a', true);
release.url = qu.url('h4 a');
release.title = qu.q('h4 a', 'title') || qu.q('h4 a', true);
release.url = qu.url('h4 a');
release.date = qu.date('.more-info-div', 'MMM D, YYYY');
release.duration = qu.dur('.more-info-div');
release.date = qu.date('.more-info-div', 'MMM D, YYYY');
release.duration = qu.dur('.more-info-div');
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
const posterPath = qu.q('.img-div img', 'src0_1x') || qu.img('img.video_placeholder');
const posterPath = qu.q('.img-div img', 'src0_1x') || qu.img('img.video_placeholder');
if (posterPath) {
const poster = /^http/.test(posterPath) ? posterPath : `${site.parameters?.media || site.url}${posterPath}`;
if (posterPath) {
const poster = /^http/.test(posterPath) ? posterPath : `${site.parameters?.media || site.url}${posterPath}`;
release.poster = [
poster.replace('-1x', '-3x'),
poster.replace('-1x', '-2x'),
poster,
];
}
release.poster = [
poster.replace('-1x', '-3x'),
poster.replace('-1x', '-2x'),
poster,
];
}
// release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
// release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
if (site.parameters?.accFilter && accSiteReleases?.map(accRelease => accRelease.entryId).includes(release.entryId)) {
// filter out releases that were already scraped from a categorized site
return null;
}
if (site.parameters?.accFilter && accSiteReleases?.map(accRelease => accRelease.entryId).includes(release.entryId)) {
// filter out releases that were already scraped from a categorized site
return null;
}
return release;
}).filter(Boolean);
return release;
}).filter(Boolean);
}
function scrapeAllTour(scenes) {
return scenes.map(({ qu }) => {
const release = {};
return scenes.map(({ qu }) => {
const release = {};
release.title = qu.q('h4 a', true);
release.url = qu.url('a');
release.date = qu.date('.tour_update_models + span', 'YYYY-MM-DD');
release.title = qu.q('h4 a', true);
release.url = qu.url('a');
release.date = qu.date('.tour_update_models + span', 'YYYY-MM-DD');
release.actors = qu.all('.tour_update_models a', true);
release.actors = qu.all('.tour_update_models a', true);
release.poster = qu.img('a img');
release.poster = qu.img('a img');
release.entryId = deriveEntryId(release);
release.entryId = deriveEntryId(release);
return release;
});
return release;
});
}
function scrapeScene({ html, qu }, site, url, baseRelease) {
const release = { url };
const release = { url };
release.title = qu.q('.centerwrap h2', true);
release.description = qu.q('.videocontent p', true);
release.title = qu.q('.centerwrap h2', true);
release.description = qu.q('.videocontent p', true);
release.date = qu.date('.videodetails .date', 'MM/DD/YYYY');
release.duration = qu.dur('.videodetails .date');
release.date = qu.date('.videodetails .date', 'MM/DD/YYYY');
release.duration = qu.dur('.videodetails .date');
release.actors = qu.all('.modelname a', true);
release.actors = qu.all('.modelname a', true);
const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1];
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1];
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
const trailerPath = html.match(/\/trailers\/.*.mp4/);
if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
const trailerPath = html.match(/\/trailers\/.*.mp4/);
if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
const stars = qu.q('.modelrates + p', true).match(/\d.\d/)?.[0];
if (stars) release.stars = Number(stars);
const stars = qu.q('.modelrates + p', true).match(/\d.\d/)?.[0];
if (stars) release.stars = Number(stars);
// release.entryId = html.match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
// release.entryId = html.match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
return release;
return release;
}
function scrapeSceneT1({ html, qu }, site, url, baseRelease, channelRegExp) {
const release = { url };
const release = { url };
release.title = qu.q('.trailer-section-head .section-title', true);
release.description = qu.text('.row .update-info-block');
release.title = qu.q('.trailer-section-head .section-title', true);
release.description = qu.text('.row .update-info-block');
release.date = qu.date('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = qu.dur('.update-info-row:nth-child(2)');
release.date = qu.date('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = qu.dur('.update-info-row:nth-child(2)');
release.actors = qu.all('.models-list-thumbs a').map(el => ({
name: qu.q(el, 'span', true),
avatar: getImageWithFallbacks(qu.q, 'img', site, el),
}));
release.actors = qu.all('.models-list-thumbs a').map(el => ({
name: qu.q(el, 'span', true),
avatar: getImageWithFallbacks(qu.q, 'img', site, el),
}));
release.tags = qu.all('.tags a', true);
release.tags = qu.all('.tags a', true);
// const posterPath = html.match(/poster="(.*\.jpg)/)?.[1];
const posterPath = qu.q('.player-thumb img', 'src0_1x');
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
// const posterPath = html.match(/poster="(.*\.jpg)/)?.[1];
const posterPath = qu.q('.player-thumb img', 'src0_1x');
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1];
if (trailer && /^http/.test(trailer)) release.trailer = { src: trailer, referer: url };
else if (trailer) release.trailer = { src: `${site.parameters?.media || site.url}${trailer}`, referer: url };
const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1];
if (trailer && /^http/.test(trailer)) release.trailer = { src: trailer, referer: url };
else if (trailer) release.trailer = { src: `${site.parameters?.media || site.url}${trailer}`, referer: url };
const stars = qu.q('.update-rating', true).match(/\d.\d/)?.[0];
if (stars) release.stars = Number(stars);
const stars = qu.q('.update-rating', true).match(/\d.\d/)?.[0];
if (stars) release.stars = Number(stars);
if (channelRegExp) {
const channel = release.tags.find(tag => channelRegExp.test(tag));
if (channelRegExp) {
const channel = release.tags.find(tag => channelRegExp.test(tag));
if (channel) {
release.channel = {
force: true,
slug: slugify(channel, ''),
};
}
}
if (channel) {
release.channel = {
force: true,
slug: slugify(channel, ''),
};
}
}
// release.entryId = q('.player-thumb img', 'id')?.match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
// release.entryId = q('.player-thumb img', 'id')?.match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
return release;
return release;
}
function scrapeSceneTour({ html, qu }, site, url) {
const release = {};
const release = {};
if (url) release.url = url;
release.title = qu.q('.update_title, .video-title', true);
release.description = qu.q('.latest_update_description, .video-summary', true);
if (url) release.url = url;
release.title = qu.q('.update_title, .video-title', true);
release.description = qu.q('.latest_update_description, .video-summary', true);
const date = qu.date('.availdate, .update_date', 'YYYY-MM-DD');
if (date) release.date = date;
const date = qu.date('.availdate, .update_date', 'YYYY-MM-DD');
if (date) release.date = date;
release.actors = qu.all('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true);
release.tags = qu.all('.update_tags a, .tour_update_tags a', true);
release.actors = qu.all('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true);
release.tags = qu.all('.update_tags a, .tour_update_tags a', true);
const [photo, poster, ...photos] = qu.imgs('.update_image img:not(.play_icon_overlay)');
if (poster || photo) release.poster = poster || photo;
if ((photo && poster) || photos) release.photos = poster ? [photo, ...photos] : photos; // don't use first photo when already used as fallback poster
const [photo, poster, ...photos] = qu.imgs('.update_image img:not(.play_icon_overlay)');
if (poster || photo) release.poster = poster || photo;
if ((photo && poster) || photos) release.photos = poster ? [photo, ...photos] : photos; // don't use first photo when already used as fallback poster
if (release.date) release.entryId = deriveEntryId(release);
if (release.date) release.entryId = deriveEntryId(release);
const trailerCode = qu.q('.update_image a', 'onclick');
const trailerPath = trailerCode?.match(/tload\('(.*)'\)/)?.[1] || html.match(/\/trailer\/.*\.mp4/)?.[0];
if (trailerPath && /^http/.test(trailerPath)) release.trailer = { src: trailerPath };
else if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
const trailerCode = qu.q('.update_image a', 'onclick');
const trailerPath = trailerCode?.match(/tload\('(.*)'\)/)?.[1] || html.match(/\/trailer\/.*\.mp4/)?.[0];
if (trailerPath && /^http/.test(trailerPath)) release.trailer = { src: trailerPath };
else if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
return release;
return release;
}
function scrapeProfile({ el, qu }, site) {
const profile = {};
const profile = {};
const bio = qu.texts('.stats p').reduce((acc, info) => {
const [key, value] = info.split(':');
const bio = qu.texts('.stats p').reduce((acc, info) => {
const [key, value] = info.split(':');
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bio.age) profile.age = Number(bio.age);
if (bio.height) profile.height = feetInchesToCm(bio.height);
if (bio.age) profile.age = Number(bio.age);
if (bio.height) profile.height = feetInchesToCm(bio.height);
profile.avatar = getImageWithFallbacks(qu.q, '.profileimg img', site);
profile.avatar = getImageWithFallbacks(qu.q, '.profileimg img', site);
const qReleases = ctxa(el, '.modelFeatures .modelfeature');
profile.releases = scrapeAll(qReleases, site);
const qReleases = ctxa(el, '.modelFeatures .modelfeature');
profile.releases = scrapeAll(qReleases, site);
return profile;
return profile;
}
function scrapeProfileT1({ el, qu }, site) {
const profile = {};
const profile = {};
const bio = qu.all('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => {
const [key, value] = info.split(':');
const bio = qu.all('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => {
const [key, value] = info.split(':');
if (!value) return acc;
if (!value) return acc;
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bio.fun_fact) profile.description = bio.fun_fact;
if (bio.age) profile.age = Number(bio.age);
if (bio.fun_fact) profile.description = bio.fun_fact;
if (bio.age) profile.age = Number(bio.age);
const heightMetric = bio.height?.match(/(\d{3})(\b|c)/);
const heightImperial = bio.height?.match(/\d{1}(\.\d)?/g);
if (heightMetric) profile.height = Number(heightMetric[1]);
if (heightImperial) profile.height = feetInchesToCm(Number(heightImperial[0]), Number(heightImperial[1]));
const heightMetric = bio.height?.match(/(\d{3})(\b|c)/);
const heightImperial = bio.height?.match(/\d{1}(\.\d)?/g);
if (heightMetric) profile.height = Number(heightMetric[1]);
if (heightImperial) profile.height = feetInchesToCm(Number(heightImperial[0]), Number(heightImperial[1]));
profile.avatar = getImageWithFallbacks(qu.q, '.img-div img', site);
profile.avatar = getImageWithFallbacks(qu.q, '.img-div img', site);
const qReleases = ctxa(el, '.item-video');
profile.releases = scrapeAllT1(qReleases, site);
const qReleases = ctxa(el, '.item-video');
profile.releases = scrapeAllT1(qReleases, site);
return profile;
return profile;
}
function scrapeProfileTour({ el, qu }, site) {
const profile = {};
const profile = {};
const bio = qu.texts('.model_bio').reduce((acc, info) => {
const [key, value] = info.split(':');
const bio = qu.texts('.model_bio').reduce((acc, info) => {
const [key, value] = info.split(':');
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
if (bio.birthplace) profile.birthPlace = bio.birthplace;
if (bio.fun_fact) profile.description = bio.fun_fact;
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
if (bio.birthplace) profile.birthPlace = bio.birthplace;
if (bio.fun_fact) profile.description = bio.fun_fact;
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
if (bio.tattoos && /yes/i.test(bio.tattoos)) profile.hasTattoos = true;
if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false;
if (bio.piercings && /yes/i.test(bio.piercings)) profile.hasPiercings = true;
if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false;
if (bio.tattoos && /yes/i.test(bio.tattoos)) profile.hasTattoos = true;
if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false;
if (bio.piercings && /yes/i.test(bio.piercings)) profile.hasPiercings = true;
if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false;
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
profile.avatar = getImageWithFallbacks(qu.q, '.model_picture img', site);
profile.avatar = getImageWithFallbacks(qu.q, '.model_picture img', site);
const qReleases = ctxa(el, '.update_block');
profile.releases = qReleases.map((qRelease) => {
const url = qRelease.qu.url('.update_image a[href]');
const release = scrapeSceneTour(qRelease, site);
const qReleases = ctxa(el, '.update_block');
profile.releases = qReleases.map((qRelease) => {
const url = qRelease.qu.url('.update_image a[href]');
const release = scrapeSceneTour(qRelease, site);
if (!/\/(signup|join)/i.test(url)) release.url = url;
release.entryId = deriveEntryId(release);
release.site = site;
if (!/\/(signup|join)/i.test(url)) release.url = url;
release.entryId = deriveEntryId(release);
release.site = site;
return release;
});
return release;
});
return profile;
return profile;
}
async function fetchLatest(site, page = 1, _beforeFetchLatest, accSiteReleases) {
const url = (site.parameters?.latest && util.format(site.parameters.latest, page))
const url = (site.parameters?.latest && util.format(site.parameters.latest, page))
|| (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`)
|| `${site.url}/categories/movies_${page}_d.html`;
const res = await geta(url, '.modelfeature, .item-video, .updateItem');
const res = await geta(url, '.modelfeature, .item-video, .updateItem');
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeAllT1(res.items, site, accSiteReleases);
if (site.parameters?.tour) return scrapeAllTour(res.items, site, accSiteReleases);
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeAllT1(res.items, site, accSiteReleases);
if (site.parameters?.tour) return scrapeAllTour(res.items, site, accSiteReleases);
return scrapeAll(res.items, site, accSiteReleases);
return scrapeAll(res.items, site, accSiteReleases);
}
async function fetchScene(url, site, baseRelease, beforeFetchLatest) {
const channelRegExp = beforeFetchLatest || await getChannelRegExp(site);
const res = await get(url);
const channelRegExp = beforeFetchLatest || await getChannelRegExp(site);
const res = await get(url);
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeSceneT1(res.item, site, url, baseRelease, channelRegExp);
if (site.parameters?.tour) return scrapeSceneTour(res.item, site, url, baseRelease);
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeSceneT1(res.item, site, url, baseRelease, channelRegExp);
if (site.parameters?.tour) return scrapeSceneTour(res.item, site, url, baseRelease);
return scrapeScene(res.item, site, url, baseRelease);
return scrapeScene(res.item, site, url, baseRelease);
}
async function fetchProfile(actorName, scraperSlug, site) {
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName);
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName);
const t1 = site.parameters?.t1 ? 't1/' : '';
const t1 = site.parameters?.t1 ? 't1/' : '';
const res1 = site.parameters?.profile
? await get(util.format(site.parameters.profile, actorSlugA))
: await get(`${site.url}/${t1}models/${actorSlugA}.html`);
const res1 = site.parameters?.profile
? await get(util.format(site.parameters.profile, actorSlugA))
: await get(`${site.url}/${t1}models/${actorSlugA}.html`);
const res = (res1.ok && res1)
|| (site.parameters?.profile
? await get(util.format(site.parameters.profile, actorSlugB))
: await get(`${site.url}/${t1}models/${actorSlugB}.html`));
const res = (res1.ok && res1)
|| (site.parameters?.profile && await get(util.format(site.parameters.profile, actorSlugB)))
|| await get(`${site.url}/${t1}models/${actorSlugB}.html`);
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeProfileT1(res.item, site);
if (site.parameters?.tour) return scrapeProfileTour(res.item, site);
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeProfileT1(res.item, site);
if (site.parameters?.tour) return scrapeProfileTour(res.item, site);
return scrapeProfile(res.item, site);
return scrapeProfile(res.item, site);
}
module.exports = {
beforeFetchLatest: getChannelRegExp,
fetchLatest,
fetchScene,
fetchProfile,
beforeFetchLatest: getChannelRegExp,
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -3,9 +3,9 @@
const { fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'iconmale');
return fetchProfile(actorName, 'iconmale');
}
module.exports = {
fetchProfile: networkFetchProfile,
fetchProfile: networkFetchProfile,
};

View File

@@ -4,104 +4,104 @@ const bhttp = require('bhttp');
const { get, exa, ed } = require('../utils/q');
function scrapeLatest(html, site) {
const scenes = site.slug === 'paintoy'
? exa(html, '#articleTable table[cellspacing="2"]')
: exa(html, 'body > table');
const scenes = site.slug === 'paintoy'
? exa(html, '#articleTable table[cellspacing="2"]')
: exa(html, 'body > table');
return scenes.map(({ qu }) => {
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
const release = {};
return scenes.map(({ qu }) => {
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
const release = {};
const titleEl = qu.q('.galleryTitleText, .articleTitleText');
const [title, ...actors] = titleEl.textContent.split('|');
const date = qu.date('.articlePostDateText td', 'MMM D, YYYY');
const titleEl = qu.q('.galleryTitleText, .articleTitleText');
const [title, ...actors] = titleEl.textContent.split('|');
const date = qu.date('.articlePostDateText td', 'MMM D, YYYY');
const url = qu.url(titleEl, 'a');
[release.entryId] = url.split('/').slice(-2);
release.url = `${site.url}${url}`;
const url = qu.url(titleEl, 'a');
[release.entryId] = url.split('/').slice(-2);
release.url = `${site.url}${url}`;
if (date) {
release.title = title.trim();
release.date = date;
} else {
// title should contain date instead, not applicable in brief mode
release.title = title.slice(title.indexOf(':') + 1).trim();
release.date = ed(title.slice(0, title.indexOf(':')), 'MMM D, YYYY');
}
if (date) {
release.title = title.trim();
release.date = date;
} else {
// title should contain date instead, not applicable in brief mode
release.title = title.slice(title.indexOf(':') + 1).trim();
release.date = ed(title.slice(0, title.indexOf(':')), 'MMM D, YYYY');
}
release.actors = actors.map(actor => actor.trim());
release.actors = actors.map(actor => actor.trim());
const description = qu.q('.articleCopyText', true);
if (description) release.description = description.slice(0, description.lastIndexOf('('));
const description = qu.q('.articleCopyText', true);
if (description) release.description = description.slice(0, description.lastIndexOf('('));
const duration = qu.dur('.articleCopyText a:nth-child(2)');
if (duration) release.duration = duration;
const duration = qu.dur('.articleCopyText a:nth-child(2)');
if (duration) release.duration = duration;
release.likes = parseInt(qu.q('.articlePostDateText td:nth-child(3)', true), 10);
release.likes = parseInt(qu.q('.articlePostDateText td:nth-child(3)', true), 10);
const cover = qu.img('a img');
release.covers = [[
cover.replace('_thumbnail', ''),
cover,
]];
const cover = qu.img('a img');
release.covers = [[
cover.replace('_thumbnail', ''),
cover,
]];
return release;
});
return release;
});
}
function scrapeScene({ qu }, site) {
const release = {};
const release = {};
const titleEl = qu.q('.articleTitleText');
const [title, ...actors] = titleEl.textContent.split('|');
const titleEl = qu.q('.articleTitleText');
const [title, ...actors] = titleEl.textContent.split('|');
const url = qu.url(titleEl, 'a');
[release.entryId] = url.split('/').slice(-2);
release.url = `${site.url}${url}`;
const url = qu.url(titleEl, 'a');
[release.entryId] = url.split('/').slice(-2);
release.url = `${site.url}${url}`;
release.title = title.trim();
release.description = qu.q('.articleCopyText', true);
release.title = title.trim();
release.description = qu.q('.articleCopyText', true);
release.actors = actors.map(actor => actor.trim());
release.date = qu.date('.articlePostDateText', 'MMMM D, YYYY');
release.duration = qu.dur('.articlePostDateText a:nth-child(2)');
release.actors = actors.map(actor => actor.trim());
release.date = qu.date('.articlePostDateText', 'MMMM D, YYYY');
release.duration = qu.dur('.articlePostDateText a:nth-child(2)');
const [cover, ...photos] = qu.imgs('img[src*="images"]');
release.covers = [cover];
release.photos = photos;
const [cover, ...photos] = qu.imgs('img[src*="images"]');
release.covers = [cover];
release.photos = photos;
release.poster = qu.poster();
release.poster = qu.poster();
const trailer = qu.trailer();
if (trailer) release.trailer = { src: trailer };
const trailer = qu.trailer();
if (trailer) release.trailer = { src: trailer };
return release;
return release;
}
async function fetchLatest(site, page = 1) {
const url = site.slug === 'paintoy' // paintoy's site is partially broken, use front page
? `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`
: `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
const url = site.slug === 'paintoy' // paintoy's site is partially broken, use front page
? `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`
: `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
const res = await bhttp.get(url, {
type: 'brief',
page,
});
const res = await bhttp.get(url, {
type: 'brief',
page,
});
if (res.statusCode === 200) {
return scrapeLatest(site.slug === 'paintoy' ? res.body.toString() : res.body.html, site);
}
if (res.statusCode === 200) {
return scrapeLatest(site.slug === 'paintoy' ? res.body.toString() : res.body.html, site);
}
return null;
return null;
}
async function fetchScene(url, site) {
const res = await get(url);
const res = await get(url);
return res.ok ? scrapeScene(res.item, site) : res.status;
return res.ok ? scrapeScene(res.item, site) : res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -9,116 +9,116 @@ const slugify = require('../utils/slugify');
const { fetchApiLatest, fetchScene } = require('./gamma');
async function fetchToken(site) {
const res = await bhttp.get(site.url);
const html = res.body.toString();
const res = await bhttp.get(site.url);
const html = res.body.toString();
const time = html.match(/"aet":\d+/)[0].split(':')[1];
const ah = html.match(/"ah":"[\w-]+"/)[0].split(':')[1].slice(1, -1);
const token = ah.split('').reverse().join('');
const time = html.match(/"aet":\d+/)[0].split(':')[1];
const ah = html.match(/"ah":"[\w-]+"/)[0].split(':')[1].slice(1, -1);
const token = ah.split('').reverse().join('');
return { time, token };
return { time, token };
}
async function fetchActors(entryId, site, { token, time }) {
const url = `${site.url}/sapi/${token}/${time}/model.getModelContent?_method=model.getModelContent&tz=1&fields[0]=modelId.stageName&fields[1]=_last&fields[2]=modelId.upsellLink&fields[3]=modelId.upsellText&limit=25&transitParameters[contentId]=${entryId}`;
const res = await bhttp.get(url);
const url = `${site.url}/sapi/${token}/${time}/model.getModelContent?_method=model.getModelContent&tz=1&fields[0]=modelId.stageName&fields[1]=_last&fields[2]=modelId.upsellLink&fields[3]=modelId.upsellText&limit=25&transitParameters[contentId]=${entryId}`;
const res = await bhttp.get(url);
if (res.statusCode === 200 && res.body.status === true) {
return Object.values(res.body.response.collection).map(actor => Object.values(actor.modelId.collection)[0].stageName);
}
if (res.statusCode === 200 && res.body.status === true) {
return Object.values(res.body.response.collection).map(actor => Object.values(actor.modelId.collection)[0].stageName);
}
return [];
return [];
}
async function fetchTrailerLocation(entryId, site) {
const url = `${site.url}/api/download/${entryId}/hd1080/stream`;
const url = `${site.url}/api/download/${entryId}/hd1080/stream`;
try {
const res = await bhttp.get(url, {
followRedirects: false,
});
try {
const res = await bhttp.get(url, {
followRedirects: false,
});
if (res.statusCode === 302) {
return res.headers.location;
}
} catch (error) {
logger.warn(`${site.name}: Unable to fetch trailer at '${url}': ${error.message}`);
}
if (res.statusCode === 302) {
return res.headers.location;
}
} catch (error) {
logger.warn(`${site.name}: Unable to fetch trailer at '${url}': ${error.message}`);
}
return null;
return null;
}
async function scrapeScene(scene, site, tokens) {
const release = {
entryId: scene.id,
title: scene.title,
duration: scene.length,
site,
meta: {
tokens, // attach tokens to reduce number of requests required for deep fetching
},
};
const release = {
entryId: scene.id,
title: scene.title,
duration: scene.length,
site,
meta: {
tokens, // attach tokens to reduce number of requests required for deep fetching
},
};
release.url = `${site.url}/scene/${release.entryId}/${slugify(release.title, { encode: true })}`;
release.date = new Date(scene.sites.collection[scene.id].publishDate);
release.poster = scene._resources.primary[0].url;
release.url = `${site.url}/scene/${release.entryId}/${slugify(release.title, { encode: true })}`;
release.date = new Date(scene.sites.collection[scene.id].publishDate);
release.poster = scene._resources.primary[0].url;
if (scene.tags) release.tags = Object.values(scene.tags.collection).map(tag => tag.alias);
if (scene._resources.base) release.photos = scene._resources.base.map(resource => resource.url);
if (scene.tags) release.tags = Object.values(scene.tags.collection).map(tag => tag.alias);
if (scene._resources.base) release.photos = scene._resources.base.map(resource => resource.url);
const [actors, trailer] = await Promise.all([
fetchActors(release.entryId, site, tokens),
fetchTrailerLocation(release.entryId, site),
]);
const [actors, trailer] = await Promise.all([
fetchActors(release.entryId, site, tokens),
fetchTrailerLocation(release.entryId, site),
]);
release.actors = actors;
if (trailer) release.trailer = { src: trailer, quality: 1080 };
release.actors = actors;
if (trailer) release.trailer = { src: trailer, quality: 1080 };
return release;
return release;
}
function scrapeLatest(scenes, site, tokens) {
return Promise.map(scenes, async scene => scrapeScene(scene, site, tokens), { concurrency: 10 });
return Promise.map(scenes, async scene => scrapeScene(scene, site, tokens), { concurrency: 10 });
}
async function fetchLatest(site, page = 1) {
if (site.parameters?.useGamma) {
return fetchApiLatest(site, page);
}
if (site.parameters?.useGamma) {
return fetchApiLatest(site, page);
}
const { time, token } = await fetchToken(site);
const { time, token } = await fetchToken(site);
// transParameters[v1] includes _resources, [v2] includes photos, [preset] is mandatory
const url = `${site.url}/sapi/${token}/${time}/content.load?limit=50&offset=${(page - 1) * 50}&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`;
const res = await bhttp.get(url);
// transParameters[v1] includes _resources, [v2] includes photos, [preset] is mandatory
const url = `${site.url}/sapi/${token}/${time}/content.load?limit=50&offset=${(page - 1) * 50}&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`;
const res = await bhttp.get(url);
if (res.statusCode === 200 && res.body.status) {
return scrapeLatest(res.body.response.collection, site, { time, token });
}
if (res.statusCode === 200 && res.body.status) {
return scrapeLatest(res.body.response.collection, site, { time, token });
}
return null;
return null;
}
async function fetchNetworkScene(url, site, release) {
if (site.parameters?.useGamma) {
return fetchScene(url, site, release);
}
if (site.parameters?.useGamma) {
return fetchScene(url, site, release);
}
const { time, token } = release?.meta.tokens || await fetchToken(site); // use attached tokens when deep fetching
const { pathname } = new URL(url);
const entryId = pathname.split('/')[2];
const { time, token } = release?.meta.tokens || await fetchToken(site); // use attached tokens when deep fetching
const { pathname } = new URL(url);
const entryId = pathname.split('/')[2];
const apiUrl = `${site.url}/sapi/${token}/${time}/content.load?filter[id][fields][0]=id&filter[id][values][0]=${entryId}&transitParameters[v1]=ykYa8ALmUD&transitParameters[preset]=scene`;
const res = await bhttp.get(apiUrl);
const apiUrl = `${site.url}/sapi/${token}/${time}/content.load?filter[id][fields][0]=id&filter[id][values][0]=${entryId}&transitParameters[v1]=ykYa8ALmUD&transitParameters[preset]=scene`;
const res = await bhttp.get(apiUrl);
if (res.statusCode === 200 && res.body.status) {
return scrapeScene(res.body.response.collection[0], site, { time, token });
}
if (res.statusCode === 200 && res.body.status) {
return scrapeScene(res.body.response.collection[0], site, { time, token });
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchScene: fetchNetworkScene,
fetchLatest,
fetchScene: fetchNetworkScene,
};

View File

@@ -3,83 +3,83 @@
const { get, initAll } = require('../utils/qu');
function scrapeLatest(scenes, dates, site) {
return scenes.map(({ qu }, index) => {
const release = {};
return scenes.map(({ qu }, index) => {
const release = {};
const path = qu.url('a');
release.url = `${site.url}/visitors/${path}`;
release.entryId = path.match(/videos\/([a-zA-Z0-9]+)(?:_hd)?_trailer/)?.[1];
const path = qu.url('a');
release.url = `${site.url}/visitors/${path}`;
release.entryId = path.match(/videos\/([a-zA-Z0-9]+)(?:_hd)?_trailer/)?.[1];
if (dates && dates[index]) {
release.date = dates[index].qu.date(null, 'MM/DD/YYYY');
}
if (dates && dates[index]) {
release.date = dates[index].qu.date(null, 'MM/DD/YYYY');
}
release.description = qu.q('tbody tr:nth-child(3) font', true);
release.description = qu.q('tbody tr:nth-child(3) font', true);
const infoLine = qu.q('font[color="#663366"]', true);
if (infoLine) release.duration = Number(infoLine.match(/(\d+) min/)[1]) * 60;
const infoLine = qu.q('font[color="#663366"]', true);
if (infoLine) release.duration = Number(infoLine.match(/(\d+) min/)[1]) * 60;
const poster = qu.img('img[src*="photos/"][width="400"]');
release.poster = `${site.url}/visitors/${poster}`;
release.photos = qu.imgs('img[src*="photos/"]:not([width="400"])').map(source => `${site.url}/visitors/${source}`);
const poster = qu.img('img[src*="photos/"][width="400"]');
release.poster = `${site.url}/visitors/${poster}`;
release.photos = qu.imgs('img[src*="photos/"]:not([width="400"])').map(source => `${site.url}/visitors/${source}`);
return release;
});
return release;
});
}
function scrapeScene({ qu }, url, site) {
const release = { url };
const release = { url };
const { pathname } = new URL(url);
release.entryId = pathname.match(/videos\/(\w+)_hd_trailer/)[1];
const { pathname } = new URL(url);
release.entryId = pathname.match(/videos\/(\w+)_hd_trailer/)[1];
const actor = qu.q('font[color="#990033"] strong', true);
release.actors = [actor];
const actor = qu.q('font[color="#990033"] strong', true);
release.actors = [actor];
const hdTrailer = qu.url('a[href*="hd_trailer.mp4"]');
const sdTrailer = qu.url('a[href*="hd_trailer_mobile.mp4"]');
const hdTrailer = qu.url('a[href*="hd_trailer.mp4"]');
const sdTrailer = qu.url('a[href*="hd_trailer_mobile.mp4"]');
release.trailer = [
{
src: `${site.url}/visitors/videos/${hdTrailer}`,
quality: 1080,
},
{
src: `${site.url}/visitors/videos/${sdTrailer}`,
quality: 270,
},
];
release.trailer = [
{
src: `${site.url}/visitors/videos/${hdTrailer}`,
quality: 1080,
},
{
src: `${site.url}/visitors/videos/${sdTrailer}`,
quality: 270,
},
];
return release;
return release;
}
async function fetchLatest(site, page = 1) {
const url = `https://jesseloadsmonsterfacials.com/visitors/tour_${page.toString().padStart(2, '0')}.html`;
const res = await get(url);
const url = `https://jesseloadsmonsterfacials.com/visitors/tour_${page.toString().padStart(2, '0')}.html`;
const res = await get(url);
if (!res.ok) {
return res.status;
}
if (!res.ok) {
return res.status;
}
const { el } = res.item;
const { el } = res.item;
const scenes = initAll(el, 'table[width="880"]');
const dates = initAll(el, 'font[color="#000000"] strong:not(:empty)');
const scenes = initAll(el, 'table[width="880"]');
const dates = initAll(el, 'font[color="#000000"] strong:not(:empty)');
return scrapeLatest(scenes, dates, site);
return scrapeLatest(scenes, dates, site);
}
async function fetchScene(url, site) {
const res = await get(url);
const res = await get(url);
if (res.ok) {
return scrapeScene(res.item, url, site);
}
if (res.ok) {
return scrapeScene(res.item, url, site);
}
return res.status;
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -13,406 +13,406 @@ const { heightToCm } = require('../utils/convert');
const slugify = require('../utils/slugify');
async function fetchPhotos(url) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
return res.body.toString();
return res.body.toString();
}
function scrapePhotos(html, type) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const $ = cheerio.load(html, { normalizeWhitespace: true });
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
.toArray()
.map((photoElement) => {
const src = $(photoElement).attr('src');
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
.toArray()
.map((photoElement) => {
const src = $(photoElement).attr('src');
// high res often available in alternative directories, but not always, provide original as fallback
if (type === 'caps') {
return [
src.replace('capthumbs/', 'caps/'),
src,
];
}
// high res often available in alternative directories, but not always, provide original as fallback
if (type === 'caps') {
return [
src.replace('capthumbs/', 'caps/'),
src,
];
}
return [
src.replace('thumbs/', 'photos/'),
src.replace('thumbs/', '1600watermarked/'),
src.replace('thumbs/', '1280watermarked/'),
src.replace('thumbs/', '1024watermarked/'),
src,
];
});
return [
src.replace('thumbs/', 'photos/'),
src.replace('thumbs/', '1600watermarked/'),
src.replace('thumbs/', '1280watermarked/'),
src.replace('thumbs/', '1024watermarked/'),
src,
];
});
return photos;
return photos;
}
async function getPhotosLegacy(entryId, site, type = 'highres', page = 1) {
const albumUrl = `${site.url}/trial/gallery.php?id=${entryId}&type=${type}&page=${page}`;
const albumUrl = `${site.url}/trial/gallery.php?id=${entryId}&type=${type}&page=${page}`;
logger.warn(`Jules Jordan is using legacy photo scraper for ${albumUrl} (page ${page})`);
logger.warn(`Jules Jordan is using legacy photo scraper for ${albumUrl} (page ${page})`);
const html = await fetchPhotos(albumUrl);
const $ = cheerio.load(html, { normalizeWhitespace: true });
const html = await fetchPhotos(albumUrl);
const $ = cheerio.load(html, { normalizeWhitespace: true });
// don't add first URL to pages to prevent unnecessary duplicate request
const photos = scrapePhotos(html, type);
const pages = Array.from(new Set($('.page_numbers a').toArray().map(el => $(el).attr('href'))));
// don't add first URL to pages to prevent unnecessary duplicate request
const photos = scrapePhotos(html, type);
const pages = Array.from(new Set($('.page_numbers a').toArray().map(el => $(el).attr('href'))));
const otherPhotos = pages
? await Promise.map(pages, async (pageX) => {
const pageUrl = `https://www.julesjordan.com/trial/${pageX}`;
const pageHtml = await fetchPhotos(pageUrl);
const otherPhotos = pages
? await Promise.map(pages, async (pageX) => {
const pageUrl = `https://www.julesjordan.com/trial/${pageX}`;
const pageHtml = await fetchPhotos(pageUrl);
return scrapePhotos(pageHtml, type);
}, {
concurrency: 2,
})
: [];
return scrapePhotos(pageHtml, type);
}, {
concurrency: 2,
})
: [];
const allPhotos = photos.concat(otherPhotos.flat());
const allPhotos = photos.concat(otherPhotos.flat());
if (allPhotos.length === 0 && type === 'highres') {
// photos not available, try for screencaps instead
return getPhotosLegacy(entryId, site, 'caps', 1);
}
if (allPhotos.length === 0 && type === 'highres') {
// photos not available, try for screencaps instead
return getPhotosLegacy(entryId, site, 'caps', 1);
}
return allPhotos;
return allPhotos;
}
async function getPhotos(entryId, site, type = 'highres', page = 1) {
const albumUrl = `${site.parameters?.photos || `${site.url}/gallery.php`}?id=${entryId}&type=${type}&page=${page}`;
const albumUrl = `${site.parameters?.photos || `${site.url}/gallery.php`}?id=${entryId}&type=${type}&page=${page}`;
const res = await bhttp.get(albumUrl);
const html = res.body.toString();
const res = await bhttp.get(albumUrl);
const html = res.body.toString();
const sourceLines = html.split(/\n/).filter(line => line.match(/ptx\["\w+"\]/));
const sources = sourceLines.reduce((acc, sourceLine) => {
const quality = sourceLine.match(/\["\w+"\]/)[0].slice(2, -2);
const sourceStart = sourceLine.match(/\/trial|\/tour|\/content/);
const sourceLines = html.split(/\n/).filter(line => line.match(/ptx\["\w+"\]/));
const sources = sourceLines.reduce((acc, sourceLine) => {
const quality = sourceLine.match(/\["\w+"\]/)[0].slice(2, -2);
const sourceStart = sourceLine.match(/\/trial|\/tour|\/content/);
if (!sourceStart) return acc;
const source = sourceLine.slice(sourceStart.index, sourceLine.indexOf('.jpg') + 4);
if (!sourceStart) return acc;
const source = sourceLine.slice(sourceStart.index, sourceLine.indexOf('.jpg') + 4);
if (!source) return acc;
if (!acc[quality]) acc[quality] = [];
if (!source) return acc;
if (!acc[quality]) acc[quality] = [];
acc[quality].push(`${site.url}${source}`);
acc[quality].push(`${site.url}${source}`);
return acc;
}, {});
return acc;
}, {});
if (type === 'highres') {
if (sources['1600'] && sources['1600'].length > 0) return sources['1600'];
if (sources['1280'] && sources['1280'].length > 0) return sources['1280'];
if (sources['1024'] && sources['1024'].length > 0) return sources['1024'];
if (sources.Thumbs && sources.Thumbs.length > 0) return sources.Thumbs;
if (type === 'highres') {
if (sources['1600'] && sources['1600'].length > 0) return sources['1600'];
if (sources['1280'] && sources['1280'].length > 0) return sources['1280'];
if (sources['1024'] && sources['1024'].length > 0) return sources['1024'];
if (sources.Thumbs && sources.Thumbs.length > 0) return sources.Thumbs;
// no photos available, try for screencaps instead
return getPhotos(entryId, site, 'caps', 1);
}
// no photos available, try for screencaps instead
return getPhotos(entryId, site, 'caps', 1);
}
if (sources.jpg && sources.jpg.length > 0) return sources.jpg;
if (sources['Video Cap Thumbs'] && sources['Video Cap Thumbs'].length > 0) return sources['Video Cap Thumbs'];
if (sources.jpg && sources.jpg.length > 0) return sources.jpg;
if (sources['Video Cap Thumbs'] && sources['Video Cap Thumbs'].length > 0) return sources['Video Cap Thumbs'];
// no screencaps available either, try legacy scraper just in case
return getPhotosLegacy(entryId, site, 'highres', 1);
// no screencaps available either, try legacy scraper just in case
return getPhotosLegacy(entryId, site, 'highres', 1);
}
function getEntryId(html) {
const entryId = html.match(/showtagform\((\d+)\)/);
const entryId = html.match(/showtagform\((\d+)\)/);
if (entryId) {
return entryId[1];
}
if (entryId) {
return entryId[1];
}
const setIdIndex = html.indexOf('setid:"');
const setIdIndex = html.indexOf('setid:"');
if (setIdIndex) {
return html.slice(setIdIndex, html.indexOf(',', setIdIndex)).match(/\d+/)[0];
}
if (setIdIndex) {
return html.slice(setIdIndex, html.indexOf(',', setIdIndex)).match(/\d+/)[0];
}
return null;
return null;
}
function scrapeAll(scenes, site) {
return scenes.map(({ el, qu }) => {
const release = {};
return scenes.map(({ el, qu }) => {
const release = {};
release.entryId = el.dataset.setid || qu.q('.rating_box')?.dataset.id;
release.entryId = el.dataset.setid || qu.q('.rating_box')?.dataset.id;
release.url = qu.url('.update_title, .dvd_info > a, a ~ a');
release.title = qu.q('.update_title, .dvd_info > a, a ~ a', true);
release.date = qu.date('.update_date', 'MM/DD/YYYY');
release.url = qu.url('.update_title, .dvd_info > a, a ~ a');
release.title = qu.q('.update_title, .dvd_info > a, a ~ a', true);
release.date = qu.date('.update_date', 'MM/DD/YYYY');
release.actors = qu.all('.update_models a', true);
release.actors = qu.all('.update_models a', true);
const dvdPhotos = qu.imgs('.dvd_preview_thumb');
const photoCount = Number(qu.q('a img.thumbs', 'cnt')) || 1;
const dvdPhotos = qu.imgs('.dvd_preview_thumb');
const photoCount = Number(qu.q('a img.thumbs', 'cnt')) || 1;
[release.poster, ...release.photos] = dvdPhotos.length
? dvdPhotos
: Array.from({ length: photoCount }).map((value, index) => {
const src = qu.img('a img.thumbs', `src${index}_1x`) || qu.img('a img.thumbs', `src${index}`) || qu.img('a img.thumbs');
[release.poster, ...release.photos] = dvdPhotos.length
? dvdPhotos
: Array.from({ length: photoCount }).map((value, index) => {
const src = qu.img('a img.thumbs', `src${index}_1x`) || qu.img('a img.thumbs', `src${index}`) || qu.img('a img.thumbs');
return src ? {
src: /^http/.test(src) ? src : `${site.url}${src}`,
referer: site.url,
} : null;
}).filter(Boolean);
return src ? {
src: /^http/.test(src) ? src : `${site.url}${src}`,
referer: site.url,
} : null;
}).filter(Boolean);
const teaserScript = qu.html('script');
if (teaserScript) {
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
if (src) release.teaser = { src };
}
const teaserScript = qu.html('script');
if (teaserScript) {
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
if (src) release.teaser = { src };
}
return release;
});
return release;
});
}
function scrapeUpcoming(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('#coming_soon_carousel').find('.table').toArray();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('#coming_soon_carousel').find('.table').toArray();
return scenesElements.map((element) => {
const entryId = $(element).find('.upcoming_updates_thumb').attr('id').match(/\d+/)[0];
return scenesElements.map((element) => {
const entryId = $(element).find('.upcoming_updates_thumb').attr('id').match(/\d+/)[0];
const details = $(element).find('.update_details_comingsoon')
.eq(1)
.children()
.remove();
const details = $(element).find('.update_details_comingsoon')
.eq(1)
.children()
.remove();
const title = details
.end()
.text()
.trim();
const title = details
.end()
.text()
.trim();
const actors = details
.text()
.trim()
.split(', ');
const actors = details
.text()
.trim()
.split(', ');
const date = moment
.utc($(element).find('.update_date_comingsoon').text().slice(7), 'MM/DD/YYYY')
.toDate();
const date = moment
.utc($(element).find('.update_date_comingsoon').text().slice(7), 'MM/DD/YYYY')
.toDate();
const photoElement = $(element).find('a img.thumbs');
const posterPath = photoElement.attr('src');
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
const photoElement = $(element).find('a img.thumbs');
const posterPath = photoElement.attr('src');
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
const videoClass = $(element).find('.update_thumbnail div').attr('class');
const videoScript = $(element).find(`script:contains(${videoClass})`).html();
const teaser = videoScript.slice(videoScript.indexOf('https://'), videoScript.indexOf('.mp4') + 4);
const videoClass = $(element).find('.update_thumbnail div').attr('class');
const videoScript = $(element).find(`script:contains(${videoClass})`).html();
const teaser = videoScript.slice(videoScript.indexOf('https://'), videoScript.indexOf('.mp4') + 4);
return {
url: null,
entryId,
title,
date,
actors,
poster,
teaser: {
src: teaser,
},
rating: null,
site,
};
});
return {
url: null,
entryId,
title,
date,
actors,
poster,
teaser: {
src: teaser,
},
rating: null,
site,
};
});
}
async function scrapeScene({ html, qu }, url, site, include) {
const release = { url, site };
const release = { url, site };
release.entryId = getEntryId(html);
release.title = qu.q('.title_bar_hilite', true);
release.description = qu.q('.update_description', true);
release.entryId = getEntryId(html);
release.title = qu.q('.title_bar_hilite', true);
release.description = qu.q('.update_description', true);
release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
release.tags = qu.all('.update_tags a', true);
release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
release.tags = qu.all('.update_tags a', true);
const posterPath = html.match(/useimage = "(.*)"/)?.[1];
const posterPath = html.match(/useimage = "(.*)"/)?.[1];
if (posterPath) {
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
if (posterPath) {
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
if (poster) {
release.poster = {
src: poster,
referer: site.url,
};
}
}
if (poster) {
release.poster = {
src: poster,
referer: site.url,
};
}
}
if (include.trailer && site.slug !== 'manuelferrara') {
const trailerLines = html.split('\n').filter(line => /movie\["trailer\w*"\]\[/i.test(line));
if (include.trailer && site.slug !== 'manuelferrara') {
const trailerLines = html.split('\n').filter(line => /movie\["trailer\w*"\]\[/i.test(line));
if (trailerLines.length) {
release.trailer = trailerLines.map((trailerLine) => {
const src = trailerLine.match(/path:"([\w:/.&=?%]+)"/)?.[1];
const quality = trailerLine.match(/movie_height:'(\d+)/)?.[1];
if (trailerLines.length) {
release.trailer = trailerLines.map((trailerLine) => {
const src = trailerLine.match(/path:"([\w:/.&=?%]+)"/)?.[1];
const quality = trailerLine.match(/movie_height:'(\d+)/)?.[1];
return src && {
src: /^http/.test(src) ? src : `${site.url}${src}`,
quality: quality && Number(quality.replace('558', '540')),
};
}).filter(Boolean);
}
}
return src && {
src: /^http/.test(src) ? src : `${site.url}${src}`,
quality: quality && Number(quality.replace('558', '540')),
};
}).filter(Boolean);
}
}
if (include.photos) release.photos = await getPhotos(release.entryId, site);
if (include.photos) release.photos = await getPhotos(release.entryId, site);
if (qu.exists('.update_dvds a')) {
release.movie = {
url: qu.url('.update_dvds a'),
title: qu.q('.update_dvds a', true),
};
}
if (qu.exists('.update_dvds a')) {
release.movie = {
url: qu.url('.update_dvds a'),
title: qu.q('.update_dvds a', true),
};
}
const stars = Number(qu.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
if (stars) release.stars = stars;
const stars = Number(qu.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
if (stars) release.stars = stars;
return release;
return release;
}
function scrapeMovie({ el, qu }, url, site) {
const movie = { url, site };
const movie = { url, site };
movie.entryId = qu.q('.dvd_details_overview .rating_box').dataset.id;
movie.title = qu.q('.title_bar span', true);
movie.covers = qu.urls('#dvd-cover-flip > a');
movie.channel = slugify(qu.q('.update_date a', true), '');
movie.entryId = qu.q('.dvd_details_overview .rating_box').dataset.id;
movie.title = qu.q('.title_bar span', true);
movie.covers = qu.urls('#dvd-cover-flip > a');
movie.channel = slugify(qu.q('.update_date a', true), '');
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
const sceneQus = ctxa(el, '.dvd_details');
const scenes = scrapeAll(sceneQus, site);
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
const sceneQus = ctxa(el, '.dvd_details');
const scenes = scrapeAll(sceneQus, site);
const curatedScenes = scenes
const curatedScenes = scenes
?.map(scene => ({ ...scene, movie }))
.sort((sceneA, sceneB) => sceneA.date - sceneB.date);
movie.date = curatedScenes?.[0].date;
movie.date = curatedScenes?.[0].date;
return {
...movie,
...(curatedScenes && { scenes: curatedScenes }),
};
return {
...movie,
...(curatedScenes && { scenes: curatedScenes }),
};
}
function scrapeProfile(html, url, actorName) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
const bio = document.querySelector('.model_bio').textContent;
const avatarEl = document.querySelector('.model_bio_pic img');
const bio = document.querySelector('.model_bio').textContent;
const avatarEl = document.querySelector('.model_bio_pic img');
const profile = {
name: actorName,
};
const profile = {
name: actorName,
};
const heightString = bio.match(/\d+ feet \d+ inches/);
const ageString = bio.match(/Age:\s*(\d{2})/);
const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
const measurementsString = bio.match(/\w+-\d+-\d+/);
const heightString = bio.match(/\d+ feet \d+ inches/);
const ageString = bio.match(/Age:\s*(\d{2})/);
const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
const measurementsString = bio.match(/\w+-\d+-\d+/);
if (birthDateString) profile.birthdate = parseDate(birthDateString[1], 'MMMM D, YYYY');
if (ageString) profile.age = Number(ageString[1]);
if (birthDateString) profile.birthdate = parseDate(birthDateString[1], 'MMMM D, YYYY');
if (ageString) profile.age = Number(ageString[1]);
if (heightString) profile.height = heightToCm(heightString[0]);
if (heightString) profile.height = heightToCm(heightString[0]);
if (measurementsString) {
const [bust, waist, hip] = measurementsString[0].split('-');
if (measurementsString) {
const [bust, waist, hip] = measurementsString[0].split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (avatarEl) {
const avatarSources = [
avatarEl.getAttribute('src0_3x'),
avatarEl.getAttribute('src0_2x'),
avatarEl.getAttribute('src0_1x'),
avatarEl.getAttribute('src0'),
avatarEl.getAttribute('src'),
].filter(Boolean);
if (avatarEl) {
const avatarSources = [
avatarEl.getAttribute('src0_3x'),
avatarEl.getAttribute('src0_2x'),
avatarEl.getAttribute('src0_1x'),
avatarEl.getAttribute('src0'),
avatarEl.getAttribute('src'),
].filter(Boolean);
if (avatarSources.length) profile.avatar = avatarSources;
}
if (avatarSources.length) profile.avatar = avatarSources;
}
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
console.log(profile);
console.log(profile);
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = site.parameters?.latest
? util.format(site.parameters.latest, page)
: `${site.url}/trial/categories/movies_${page}_d.html`;
const url = site.parameters?.latest
? util.format(site.parameters.latest, page)
: `${site.url}/trial/categories/movies_${page}_d.html`;
// const res = await bhttp.get(url);
const res = await geta(url, '.update_details');
// const res = await bhttp.get(url);
const res = await geta(url, '.update_details');
return res.ok ? scrapeAll(res.items, site) : res.status;
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchUpcoming(site) {
if (site.parameters?.upcoming === false) return null;
if (site.parameters?.upcoming === false) return null;
const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`;
const res = await bhttp.get(url);
const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeUpcoming(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeUpcoming(res.body.toString(), site);
}
return res.statusCode;
return res.statusCode;
}
async function fetchScene(url, site, baseRelease, preflight, include) {
const res = await get(url);
const res = await get(url);
return res.ok ? scrapeScene(res.item, url, site, include) : res.status;
return res.ok ? scrapeScene(res.item, url, site, include) : res.status;
}
async function fetchMovie(url, site) {
const res = await get(url);
const res = await get(url);
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
}
async function fetchProfile(actorName) {
const actorSlugA = slugify(actorName, '-');
const actorSlugB = slugify(actorName, '');
const actorSlugA = slugify(actorName, '-');
const actorSlugB = slugify(actorName, '');
const urlA = `https://julesjordan.com/trial/models/${actorSlugA}.html`;
const urlB = `https://julesjordan.com/trial/models/${actorSlugB}.html`;
const urlA = `https://julesjordan.com/trial/models/${actorSlugA}.html`;
const urlB = `https://julesjordan.com/trial/models/${actorSlugB}.html`;
const resA = await bhttp.get(urlA);
const resA = await bhttp.get(urlA);
if (resA.statusCode === 200) {
const profile = scrapeProfile(resA.body.toString(), urlA, actorName);
if (resA.statusCode === 200) {
const profile = scrapeProfile(resA.body.toString(), urlA, actorName);
return profile;
}
return profile;
}
const resB = await bhttp.get(urlB);
const resB = await bhttp.get(urlB);
if (resB.statusCode === 200) {
const profile = scrapeProfile(resB.body.toString(), urlB, actorName);
if (resB.statusCode === 200) {
const profile = scrapeProfile(resB.body.toString(), urlB, actorName);
return profile;
}
return profile;
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchMovie,
fetchProfile,
fetchUpcoming,
fetchScene,
fetchLatest,
fetchMovie,
fetchProfile,
fetchUpcoming,
fetchScene,
};

View File

@@ -7,184 +7,184 @@ const moment = require('moment');
const { feetInchesToCm } = require('../utils/convert');
const siteMapByKey = {
PF: 'pornfidelity',
TF: 'teenfidelity',
KM: 'kellymadison',
PF: 'pornfidelity',
TF: 'teenfidelity',
KM: 'kellymadison',
};
const siteMapBySlug = Object.entries(siteMapByKey).reduce((acc, [key, value]) => ({ ...acc, [value]: key }), {});
function extractTextNode(parentEl) {
return Array.from(parentEl).reduce((acc, el) => (el.nodeType === 3 ? `${acc}${el.textContent.trim()}` : acc), '');
return Array.from(parentEl).reduce((acc, el) => (el.nodeType === 3 ? `${acc}${el.textContent.trim()}` : acc), '');
}
function scrapeLatest(html, site) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
return Array.from(document.querySelectorAll('.episode'), (scene) => {
const release = { site };
return Array.from(document.querySelectorAll('.episode'), (scene) => {
const release = { site };
release.shootId = scene.querySelector('.card-meta .text-right').textContent.trim();
release.shootId = scene.querySelector('.card-meta .text-right').textContent.trim();
const siteId = release.shootId.match(/\w{2}/)[0];
const siteSlug = siteMapByKey[siteId];
const siteId = release.shootId.match(/\w{2}/)[0];
const siteSlug = siteMapByKey[siteId];
if (site.slug !== siteSlug) {
// using generic network overview, scene is not from the site we want
return null;
}
if (site.slug !== siteSlug) {
// using generic network overview, scene is not from the site we want
return null;
}
const durationEl = scene.querySelector('.content a');
const durationEl = scene.querySelector('.content a');
[release.entryId] = durationEl.href.match(/\d+$/);
release.url = `${site.url}/episodes/${release.entryId}`;
[release.entryId] = durationEl.href.match(/\d+$/);
release.url = `${site.url}/episodes/${release.entryId}`;
release.title = scene.querySelector('h5 a').textContent.trim();
release.title = scene.querySelector('h5 a').textContent.trim();
const dateEl = scene.querySelector('.card-meta .text-left').childNodes;
const dateString = extractTextNode(dateEl);
const dateEl = scene.querySelector('.card-meta .text-left').childNodes;
const dateString = extractTextNode(dateEl);
release.date = moment.utc(dateString, ['MMM D', 'MMM D, YYYY']).toDate();
release.actors = Array.from(scene.querySelectorAll('.models a'), el => el.textContent);
release.date = moment.utc(dateString, ['MMM D', 'MMM D, YYYY']).toDate();
release.actors = Array.from(scene.querySelectorAll('.models a'), el => el.textContent);
const durationString = durationEl.textContent.match(/\d+ min/);
if (durationString) release.duration = Number(durationString[0].match(/\d+/)[0]) * 60;
const durationString = durationEl.textContent.match(/\d+ min/);
if (durationString) release.duration = Number(durationString[0].match(/\d+/)[0]) * 60;
release.poster = scene.querySelector('.card-img-top').dataset.src;
release.teaser = {
src: scene.querySelector('video').src,
};
release.poster = scene.querySelector('.card-img-top').dataset.src;
release.teaser = {
src: scene.querySelector('video').src,
};
return release;
}).filter(scene => scene);
return release;
}).filter(scene => scene);
}
function scrapeScene(html, url, site, baseRelease) {
const { document } = new JSDOM(html).window;
const release = { url, site };
const { document } = new JSDOM(html).window;
const release = { url, site };
const titleEl = document.querySelector('.card-header.row h4').childNodes;
const titleString = extractTextNode(titleEl);
const titleEl = document.querySelector('.card-header.row h4').childNodes;
const titleString = extractTextNode(titleEl);
if (!baseRelease) [release.entryId] = url.match(/\d+/);
if (!baseRelease) [release.entryId] = url.match(/\d+/);
release.title = titleString
.replace('Trailer: ', '')
.replace(/- \w+ #\d+$/, '')
.trim();
release.title = titleString
.replace('Trailer: ', '')
.replace(/- \w+ #\d+$/, '')
.trim();
release.channel = titleString.match(/\w+ #\d+$/)[0].match(/\w+/)[0].toLowerCase();
release.channel = titleString.match(/\w+ #\d+$/)[0].match(/\w+/)[0].toLowerCase();
const episode = titleString.match(/#\d+$/)[0];
const siteKey = siteMapBySlug[release.channel];
const episode = titleString.match(/#\d+$/)[0];
const siteKey = siteMapBySlug[release.channel];
release.shootId = `${siteKey} ${episode}`;
release.description = document.querySelector('p.card-text').textContent.trim();
release.shootId = `${siteKey} ${episode}`;
release.description = document.querySelector('p.card-text').textContent.trim();
const dateEl = document.querySelector('.card-body h4.card-title:nth-child(3)').childNodes;
const dateString = extractTextNode(dateEl);
const dateEl = document.querySelector('.card-body h4.card-title:nth-child(3)').childNodes;
const dateString = extractTextNode(dateEl);
release.date = moment.utc(dateString, 'YYYY-MM-DD').toDate();
release.actors = Array.from(document.querySelectorAll('.card-body h4.card-title:nth-child(4) a'), el => el.textContent);
release.date = moment.utc(dateString, 'YYYY-MM-DD').toDate();
release.actors = Array.from(document.querySelectorAll('.card-body h4.card-title:nth-child(4) a'), el => el.textContent);
const durationRaw = document.querySelector('.card-body h4.card-title:nth-child(1)').textContent;
const durationString = durationRaw.match(/\d+:\d+/)[0];
const durationRaw = document.querySelector('.card-body h4.card-title:nth-child(1)').textContent;
const durationString = durationRaw.match(/\d+:\d+/)[0];
release.duration = moment.duration(`00:${durationString}`).asSeconds();
release.duration = moment.duration(`00:${durationString}`).asSeconds();
const trailerStart = document.body.innerHTML.indexOf('player.updateSrc');
const trailerString = document.body.innerHTML.slice(trailerStart, document.body.innerHTML.indexOf(');', trailerStart));
const trailerStart = document.body.innerHTML.indexOf('player.updateSrc');
const trailerString = document.body.innerHTML.slice(trailerStart, document.body.innerHTML.indexOf(');', trailerStart));
const trailers = trailerString.match(/https:\/\/.*.mp4/g);
const resolutions = trailerString.match(/res: '\d+'/g).map((res) => {
const resolution = Number(res.match(/\d+/)[0]);
const trailers = trailerString.match(/https:\/\/.*.mp4/g);
const resolutions = trailerString.match(/res: '\d+'/g).map((res) => {
const resolution = Number(res.match(/\d+/)[0]);
return resolution === 4000 ? 2160 : resolution; // 4k is not 4000 pixels high
});
return resolution === 4000 ? 2160 : resolution; // 4k is not 4000 pixels high
});
release.trailer = trailers.map((trailer, index) => ({
src: trailer,
quality: resolutions[index],
}));
release.trailer = trailers.map((trailer, index) => ({
src: trailer,
quality: resolutions[index],
}));
const posterPrefix = html.indexOf('poster:');
const poster = html.slice(html.indexOf('http', posterPrefix), html.indexOf('.jpg', posterPrefix) + 4);
const posterPrefix = html.indexOf('poster:');
const poster = html.slice(html.indexOf('http', posterPrefix), html.indexOf('.jpg', posterPrefix) + 4);
if (baseRelease?.poster) release.photos = [poster];
else release.poster = poster;
if (baseRelease?.poster) release.photos = [poster];
else release.poster = poster;
return release;
return release;
}
function scrapeProfile(html, actorName) {
const { document } = new JSDOM(html).window;
const profile = { name: actorName };
const { document } = new JSDOM(html).window;
const profile = { name: actorName };
const bioKeys = Array.from(document.querySelectorAll('table.table td:nth-child(1)'), el => el.textContent.slice(0, -1));
const bioValues = Array.from(document.querySelectorAll('table.table td:nth-child(2)'), el => el.textContent);
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
const bioKeys = Array.from(document.querySelectorAll('table.table td:nth-child(1)'), el => el.textContent.slice(0, -1));
const bioValues = Array.from(document.querySelectorAll('table.table td:nth-child(2)'), el => el.textContent);
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
if (bio.Measurements) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio.Birthplace) profile.birthPlace = bio.Birthplace;
if (bio.Measurements) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio.Birthplace) profile.birthPlace = bio.Birthplace;
if (bio.Height) {
const [feet, inches] = bio.Height.match(/\d+/g);
profile.height = feetInchesToCm(feet, inches);
}
if (bio.Height) {
const [feet, inches] = bio.Height.match(/\d+/g);
profile.height = feetInchesToCm(feet, inches);
}
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
const avatarEl = Array.from(document.querySelectorAll('img')).find(photo => photo.src.match('model'));
const avatarEl = Array.from(document.querySelectorAll('img')).find(photo => photo.src.match('model'));
if (avatarEl) profile.avatar = avatarEl.src;
if (avatarEl) profile.avatar = avatarEl.src;
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `https://kellymadison.com/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
const res = await bhttp.get(url, {
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
});
const url = `https://kellymadison.com/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
const res = await bhttp.get(url, {
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
});
if (res.statusCode === 200 && res.body.status === 'success') {
return scrapeLatest(res.body.html, site);
}
if (res.statusCode === 200 && res.body.status === 'success') {
return scrapeLatest(res.body.html, site);
}
return null;
return null;
}
async function fetchScene(url, site, baseRelease) {
const { pathname } = new URL(url);
const { pathname } = new URL(url);
const res = await bhttp.get(`https://www.kellymadison.com${pathname}`, {
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
});
const res = await bhttp.get(`https://www.kellymadison.com${pathname}`, {
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
});
return scrapeScene(res.body.toString(), url, site, baseRelease);
return scrapeScene(res.body.toString(), url, site, baseRelease);
}
async function fetchProfile(actorName) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const res = await bhttp.get(`https://www.kellymadison.com/models/${actorSlug}`, {
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
});
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const res = await bhttp.get(`https://www.kellymadison.com/models/${actorSlug}`, {
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
});
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString(), actorName);
}
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString(), actorName);
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
fetchLatest,
fetchProfile,
fetchScene,
};

View File

@@ -5,116 +5,116 @@ const cheerio = require('cheerio');
const moment = require('moment');
function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.shoot-list .shoot').toArray();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.shoot-list .shoot').toArray();
return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('.shoot-thumb-title a');
const href = sceneLinkElement.attr('href');
const url = `https://kink.com${href}`;
const shootId = href.split('/')[2];
const title = sceneLinkElement.text().trim();
return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('.shoot-thumb-title a');
const href = sceneLinkElement.attr('href');
const url = `https://kink.com${href}`;
const shootId = href.split('/')[2];
const title = sceneLinkElement.text().trim();
const poster = $(element).find('.adimage').attr('src');
const photos = $(element).find('.rollover .roll-image').map((photoIndex, photoElement) => $(photoElement).attr('data-imagesrc')).toArray();
const poster = $(element).find('.adimage').attr('src');
const photos = $(element).find('.rollover .roll-image').map((photoIndex, photoElement) => $(photoElement).attr('data-imagesrc')).toArray();
const date = moment.utc($(element).find('.date').text(), 'MMM DD, YYYY').toDate();
const actors = $(element).find('.shoot-thumb-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const stars = $(element).find('.average-rating').attr('data-rating') / 10;
const date = moment.utc($(element).find('.date').text(), 'MMM DD, YYYY').toDate();
const actors = $(element).find('.shoot-thumb-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const stars = $(element).find('.average-rating').attr('data-rating') / 10;
const timestamp = $(element).find('.video span').text();
const timestampComponents = timestamp.split(':'); // fix mixed hh:mm:ss and mm:ss format
const duration = moment.duration(timestampComponents.length > 2 ? timestamp : `0:${timestamp}`).asSeconds();
const timestamp = $(element).find('.video span').text();
const timestampComponents = timestamp.split(':'); // fix mixed hh:mm:ss and mm:ss format
const duration = moment.duration(timestampComponents.length > 2 ? timestamp : `0:${timestamp}`).asSeconds();
return {
url,
shootId,
entryId: shootId,
title,
actors,
date,
photos,
poster,
rating: {
stars,
},
duration,
site,
};
});
return {
url,
shootId,
entryId: shootId,
title,
actors,
date,
photos,
poster,
rating: {
stars,
},
duration,
site,
};
});
}
async function scrapeScene(html, url, shootId, ratingRes, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const $ = cheerio.load(html, { normalizeWhitespace: true });
// const title = $('h1.shoot-title').text().replace(/\ue800/, ''); // fallback, special character is 'like'-heart
const title = $('h1.shoot-title span.favorite-button').attr('data-title');
const actorsRaw = $('.shoot-info p.starring');
// const title = $('h1.shoot-title').text().replace(/\ue800/, ''); // fallback, special character is 'like'-heart
const title = $('h1.shoot-title span.favorite-button').attr('data-title');
const actorsRaw = $('.shoot-info p.starring');
const photos = $('.gallery .thumb img').map((photoIndex, photoElement) => $(photoElement).attr('data-image-file')).toArray();
const trailerVideo = $('.player span[data-type="trailer-src"]').attr('data-url');
const trailerPoster = $('.player video#kink-player').attr('poster');
const photos = $('.gallery .thumb img').map((photoIndex, photoElement) => $(photoElement).attr('data-image-file')).toArray();
const trailerVideo = $('.player span[data-type="trailer-src"]').attr('data-url');
const trailerPoster = $('.player video#kink-player').attr('poster');
const date = moment.utc($(actorsRaw)
.prev()
.text()
.trim()
.replace('Date: ', ''),
'MMMM DD, YYYY')
.toDate();
const date = moment.utc($(actorsRaw)
.prev()
.text()
.trim()
.replace('Date: ', ''),
'MMMM DD, YYYY')
.toDate();
const actors = $(actorsRaw).find('span.names a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const description = $('.shoot-info .description').text().trim();
const actors = $(actorsRaw).find('span.names a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const description = $('.shoot-info .description').text().trim();
const { average: stars } = ratingRes.body;
const { average: stars } = ratingRes.body;
const siteName = $('.shoot-logo a').attr('href').split('/')[2];
const siteSlug = siteName.replace(/\s+/g, '').toLowerCase();
const siteName = $('.shoot-logo a').attr('href').split('/')[2];
const siteSlug = siteName.replace(/\s+/g, '').toLowerCase();
const tags = $('.tag-list > a[href*="/tag"]').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const channel = siteSlug;
const tags = $('.tag-list > a[href*="/tag"]').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const channel = siteSlug;
return {
url,
shootId,
entryId: shootId,
title,
date,
actors,
description,
photos,
poster: trailerPoster,
trailer: {
src: trailerVideo,
quality: 480,
},
rating: {
stars,
},
tags,
site,
channel,
};
return {
url,
shootId,
entryId: shootId,
title,
date,
actors,
description,
photos,
poster: trailerPoster,
trailer: {
src: trailerVideo,
quality: 480,
},
rating: {
stars,
},
tags,
site,
channel,
};
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/latest/page/${page}`);
const res = await bhttp.get(`${site.url}/latest/page/${page}`);
return scrapeLatest(res.body.toString(), site);
return scrapeLatest(res.body.toString(), site);
}
async function fetchScene(url, site) {
const shootId = new URL(url).pathname.split('/')[2];
const shootId = new URL(url).pathname.split('/')[2];
const [res, ratingRes] = await Promise.all([
bhttp.get(url),
bhttp.get(`https://kink.com/api/ratings/${shootId}`),
]);
const [res, ratingRes] = await Promise.all([
bhttp.get(url),
bhttp.get(`https://kink.com/api/ratings/${shootId}`),
]);
return scrapeScene(res.body.toString(), url, shootId, ratingRes, site);
return scrapeScene(res.body.toString(), url, shootId, ratingRes, site);
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -8,201 +8,201 @@ const moment = require('moment');
const slugify = require('../utils/slugify');
function extractTitle(originalTitle) {
const titleComponents = originalTitle.split(' ');
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
const titleComponents = originalTitle.split(' ');
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
return { shootId, title };
return { shootId, title };
}
function getPoster(posterElement, sceneId) {
const posterStyle = posterElement.attr('style');
const posterStyle = posterElement.attr('style');
if (posterStyle) {
return posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
}
if (posterStyle) {
return posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
}
const posterRange = posterElement.attr('data-casting');
const posterRangeData = posterRange ? JSON.parse(posterRange) : null;
const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)];
const posterRange = posterElement.attr('data-casting');
const posterRangeData = posterRange ? JSON.parse(posterRange) : null;
const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)];
if (!posterTimeRange) {
return null;
}
if (!posterTimeRange) {
return null;
}
if (typeof posterTimeRange === 'number') {
// poster time is already a single time value
return `https://legalporno.com/casting/${sceneId}/${posterTimeRange}`;
}
if (typeof posterTimeRange === 'number') {
// poster time is already a single time value
return `https://legalporno.com/casting/${sceneId}/${posterTimeRange}`;
}
const [max, min] = posterTimeRange.split('-');
const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min));
const [max, min] = posterTimeRange.split('-');
const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min));
return `https://legalporno.com/casting/${sceneId}/${posterTime}`;
return `https://legalporno.com/casting/${sceneId}/${posterTime}`;
}
function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.thumbnails > div').toArray();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.thumbnails > div').toArray();
return scenesElements.map((element) => {
const sceneLinkElement = $(element).find('.thumbnail-title a');
const url = sceneLinkElement.attr('href');
return scenesElements.map((element) => {
const sceneLinkElement = $(element).find('.thumbnail-title a');
const url = sceneLinkElement.attr('href');
const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
const { shootId, title } = extractTitle(originalTitle);
const entryId = new URL(url).pathname.split('/')[2];
const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
const { shootId, title } = extractTitle(originalTitle);
const entryId = new URL(url).pathname.split('/')[2];
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
const sceneId = $(element).attr('data-content');
const posterElement = $(element).find('.thumbnail-avatar');
const sceneId = $(element).attr('data-content');
const posterElement = $(element).find('.thumbnail-avatar');
const poster = getPoster(posterElement, sceneId);
const poster = getPoster(posterElement, sceneId);
return {
url,
shootId,
entryId,
title,
date,
poster,
site,
};
});
return {
url,
shootId,
entryId,
title,
date,
poster,
site,
};
});
}
async function scrapeScene(html, url, site, useGallery) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const playerObject = $('script:contains("new WatchPage")').html();
const playerData = playerObject && playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.lastIndexOf('},') + 1);
const data = playerData && JSON.parse(playerData);
const $ = cheerio.load(html, { normalizeWhitespace: true });
const playerObject = $('script:contains("new WatchPage")').html();
const playerData = playerObject && playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.lastIndexOf('},') + 1);
const data = playerData && JSON.parse(playerData);
const release = { url };
const release = { url };
const originalTitle = $('h1.watchpage-title').text().trim();
const { shootId, title } = extractTitle(originalTitle);
const originalTitle = $('h1.watchpage-title').text().trim();
const { shootId, title } = extractTitle(originalTitle);
release.shootId = shootId;
release.entryId = new URL(url).pathname.split('/')[2];
release.shootId = shootId;
release.entryId = new URL(url).pathname.split('/')[2];
release.title = title;
release.date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
release.title = title;
release.date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
const [actorsElement, tagsElement, descriptionElement] = $('.scene-description__row').toArray();
const [actorsElement, tagsElement, descriptionElement] = $('.scene-description__row').toArray();
release.description = $('meta[name="description"]')?.attr('content')?.trim()
release.description = $('meta[name="description"]')?.attr('content')?.trim()
|| (descriptionElement && $(descriptionElement).find('dd').text().trim());
release.actors = $(actorsElement)
.find('a[href*="com/model"]')
.map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.actors = $(actorsElement)
.find('a[href*="com/model"]')
.map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
release.tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
release.duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
release.tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const photos = useGallery
? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray()
: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray();
const photos = useGallery
? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray()
: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray();
release.photos = photos.map((source) => {
// source without parameters sometimes serves larger preview photo
const { origin, pathname } = new URL(source);
release.photos = photos.map((source) => {
// source without parameters sometimes serves larger preview photo
const { origin, pathname } = new URL(source);
return `${origin}${pathname}`;
return `${origin}${pathname}`;
/* disable thumbnail as fallback, usually enough high res photos available
/* disable thumbnail as fallback, usually enough high res photos available
return [
`${origin}${pathname}`,
source,
];
*/
});
});
const posterStyle = $('#player').attr('style');
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
const posterStyle = $('#player').attr('style');
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
release.poster = poster || release.photos.slice(Math.floor(release.photos.length / 3) * -1); // poster unavailable, try last 1/3rd of high res photos as fallback
release.poster = poster || release.photos.slice(Math.floor(release.photos.length / 3) * -1); // poster unavailable, try last 1/3rd of high res photos as fallback
if (data) {
const qualityMap = {
web: 240,
vga: 480,
hd: 720,
'1080p': 1080,
};
if (data) {
const qualityMap = {
web: 240,
vga: 480,
hd: 720,
'1080p': 1080,
};
release.trailer = data.clip.qualities.map(trailer => ({
src: trailer.src,
type: trailer.type,
quality: qualityMap[trailer.quality] || trailer.quality,
}));
}
release.trailer = data.clip.qualities.map(trailer => ({
src: trailer.src,
type: trailer.type,
quality: qualityMap[trailer.quality] || trailer.quality,
}));
}
const studioName = $('.watchpage-studioname').first().text().trim();
release.studio = slugify(studioName, '');
const studioName = $('.watchpage-studioname').first().text().trim();
release.studio = slugify(studioName, '');
return release;
return release;
}
async function scrapeProfile(html, _url, actorName) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
const profile = {
name: actorName,
};
const profile = {
name: actorName,
};
const avatarEl = document.querySelector('.model--avatar img[src^="http"]');
const entries = Array.from(document.querySelectorAll('.model--description tr'), el => el.textContent.replace(/\n/g, '').split(':'));
const avatarEl = document.querySelector('.model--avatar img[src^="http"]');
const entries = Array.from(document.querySelectorAll('.model--description tr'), el => el.textContent.replace(/\n/g, '').split(':'));
const bio = entries
.filter(entry => entry.length === 2) // ignore entries without ':' (About section, see Blanche Bradburry)
.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {});
const bio = entries
.filter(entry => entry.length === 2) // ignore entries without ':' (About section, see Blanche Bradburry)
.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {});
profile.birthPlace = bio.Nationality;
profile.birthPlace = bio.Nationality;
if (bio.Age) profile.age = bio.Age;
if (avatarEl) profile.avatar = avatarEl.src;
if (bio.Age) profile.age = bio.Age;
if (avatarEl) profile.avatar = avatarEl.src;
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/new-videos/${page}`);
const res = await bhttp.get(`${site.url}/new-videos/${page}`);
return scrapeLatest(res.body.toString(), site);
return scrapeLatest(res.body.toString(), site);
}
async function fetchScene(url, site) {
const useGallery = true;
const useGallery = true;
// TODO: fall back on screenshots when gallery is not available
const res = useGallery
? await bhttp.get(`${url}/gallery#gallery`)
: await bhttp.get(`${url}/screenshots#screenshots`);
// TODO: fall back on screenshots when gallery is not available
const res = useGallery
? await bhttp.get(`${url}/gallery#gallery`)
: await bhttp.get(`${url}/screenshots#screenshots`);
return scrapeScene(res.body.toString(), url, site, useGallery);
return scrapeScene(res.body.toString(), url, site, useGallery);
}
async function fetchProfile(actorName) {
const res = await bhttp.get(`https://www.legalporno.com/api/autocomplete/search?q=${actorName.replace(' ', '+')}`);
const data = res.body;
const res = await bhttp.get(`https://www.legalporno.com/api/autocomplete/search?q=${actorName.replace(' ', '+')}`);
const data = res.body;
const result = data.terms.find(item => item.type === 'model');
const result = data.terms.find(item => item.type === 'model');
if (result) {
const bioRes = await bhttp.get(result.url);
const html = bioRes.body.toString();
if (result) {
const bioRes = await bhttp.get(result.url);
const html = bioRes.body.toString();
return scrapeProfile(html, result.url, actorName);
}
return scrapeProfile(html, result.url, actorName);
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
fetchLatest,
fetchProfile,
fetchScene,
};

View File

@@ -3,11 +3,11 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'men', 'modelprofile');
return fetchProfile(actorName, 'men', 'modelprofile');
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile: networkFetchProfile,
fetchLatest,
fetchScene,
fetchProfile: networkFetchProfile,
};

View File

@@ -3,11 +3,11 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'devianthardcore');
return fetchProfile(actorName, 'devianthardcore');
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
};

View File

@@ -8,232 +8,232 @@ const moment = require('moment');
const { get } = require('../utils/http');
const descriptionTags = {
'anal cream pie': 'anal creampie',
'ass to mouth': 'ass to mouth',
'cream pie in her ass': 'anal creampie',
'eats ass': 'ass eating',
facial: 'facial',
gaped: 'gaping',
gapes: 'gaping',
gape: 'gaping',
'rectal cream pie': 'anal creampie',
rimming: 'ass eating',
'anal cream pie': 'anal creampie',
'ass to mouth': 'ass to mouth',
'cream pie in her ass': 'anal creampie',
'eats ass': 'ass eating',
facial: 'facial',
gaped: 'gaping',
gapes: 'gaping',
gape: 'gaping',
'rectal cream pie': 'anal creampie',
rimming: 'ass eating',
};
function deriveTagsFromDescription(description) {
const matches = (description || '').toLowerCase().match(new RegExp(Object.keys(descriptionTags).join('|'), 'g'));
const matches = (description || '').toLowerCase().match(new RegExp(Object.keys(descriptionTags).join('|'), 'g'));
return matches
? matches.map(match => descriptionTags[match])
: [];
return matches
? matches.map(match => descriptionTags[match])
: [];
}
async function scrapeLatestA(html, site) {
const { document } = new JSDOM(html).window;
const sceneElements = document.querySelectorAll('.content-item-large, .content-item');
const { document } = new JSDOM(html).window;
const sceneElements = document.querySelectorAll('.content-item-large, .content-item');
return Promise.all(Array.from(sceneElements, async (element) => {
const $ = cheerio.load(element.innerHTML, { normalizeWhitespace: true });
return Promise.all(Array.from(sceneElements, async (element) => {
const $ = cheerio.load(element.innerHTML, { normalizeWhitespace: true });
const titleElement = element.querySelector('h3.title a');
const title = titleElement.textContent;
const url = titleElement.href;
const entryId = url.split('/').slice(-2)[0];
const titleElement = element.querySelector('h3.title a');
const title = titleElement.textContent;
const url = titleElement.href;
const entryId = url.split('/').slice(-2)[0];
const descriptionElement = element.querySelector('.desc');
const description = descriptionElement && descriptionElement.textContent.trim();
const date = moment(element.querySelector('.date, time').textContent, 'Do MMM YYYY').toDate();
const descriptionElement = element.querySelector('.desc');
const description = descriptionElement && descriptionElement.textContent.trim();
const date = moment(element.querySelector('.date, time').textContent, 'Do MMM YYYY').toDate();
const actors = Array.from(element.querySelectorAll('h4.models a'), actorElement => actorElement.textContent);
const actors = Array.from(element.querySelectorAll('h4.models a'), actorElement => actorElement.textContent);
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is sometimes 00:00, sometimes 0:00:00
const duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is sometimes 00:00, sometimes 0:00:00
const duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
const ratingElement = element.querySelector('.rating');
const stars = ratingElement && ratingElement.dataset.rating;
const ratingElement = element.querySelector('.rating');
const stars = ratingElement && ratingElement.dataset.rating;
const [poster, ...primaryPhotos] = Array.from(element.querySelectorAll('img'), imageElement => imageElement.src);
const secondaryPhotos = $('.thumb-top, .thumb-bottom')
.map((photoIndex, photoElement) => $(photoElement).css()['background-image'])
.toArray()
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
const [poster, ...primaryPhotos] = Array.from(element.querySelectorAll('img'), imageElement => imageElement.src);
const secondaryPhotos = $('.thumb-top, .thumb-bottom')
.map((photoIndex, photoElement) => $(photoElement).css()['background-image'])
.toArray()
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
const photos = [...primaryPhotos, ...secondaryPhotos];
const tags = deriveTagsFromDescription(description);
const photos = [...primaryPhotos, ...secondaryPhotos];
const tags = deriveTagsFromDescription(description);
const scene = {
url,
entryId,
title,
description,
actors,
director: 'Mike Adriano',
date,
duration,
tags,
poster,
photos,
rating: {
stars,
},
site,
};
const scene = {
url,
entryId,
title,
description,
actors,
director: 'Mike Adriano',
date,
duration,
tags,
poster,
photos,
rating: {
stars,
},
site,
};
return scene;
}));
return scene;
}));
}
async function scrapeLatestB(html) {
const { document } = new JSDOM(html).window;
const sceneElements = document.querySelectorAll('.content-border');
const { document } = new JSDOM(html).window;
const sceneElements = document.querySelectorAll('.content-border');
return Promise.all(Array.from(sceneElements, async (element) => {
const $ = cheerio.load(element.innerHTML, { normalizeWhitespace: true });
const release = {
director: 'Mike Adriano',
};
return Promise.all(Array.from(sceneElements, async (element) => {
const $ = cheerio.load(element.innerHTML, { normalizeWhitespace: true });
const release = {
director: 'Mike Adriano',
};
const titleElement = element.querySelector('.content-title-wrap a');
release.title = titleElement.title || titleElement.textContent.trim();
release.url = titleElement.href;
release.entryId = release.url.split('/').slice(-2)[0];
const titleElement = element.querySelector('.content-title-wrap a');
release.title = titleElement.title || titleElement.textContent.trim();
release.url = titleElement.href;
release.entryId = release.url.split('/').slice(-2)[0];
release.description = element.querySelector('.content-description').textContent.trim();
release.date = (moment(element.querySelector('.mobile-date').textContent, 'MM/DD/YYYY')
release.description = element.querySelector('.content-description').textContent.trim();
release.date = (moment(element.querySelector('.mobile-date').textContent, 'MM/DD/YYYY')
|| moment(element.querySelector('.date').textContent, 'Do MMM YYYY')).toDate();
release.actors = Array.from(element.querySelectorAll('.content-models a'), actorElement => actorElement.textContent);
release.actors = Array.from(element.querySelectorAll('.content-models a'), actorElement => actorElement.textContent);
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is somethines 00:00, sometimes 0:00:00
release.duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is somethines 00:00, sometimes 0:00:00
release.duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
const [poster, ...primaryPhotos] = Array.from(element.querySelectorAll('a img'), imageElement => imageElement.src);
const secondaryPhotos = $('.thumb-mouseover')
.map((photoIndex, photoElement) => $(photoElement).css()['background-image'])
.toArray()
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
const [poster, ...primaryPhotos] = Array.from(element.querySelectorAll('a img'), imageElement => imageElement.src);
const secondaryPhotos = $('.thumb-mouseover')
.map((photoIndex, photoElement) => $(photoElement).css()['background-image'])
.toArray()
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
release.poster = poster;
release.photos = [...primaryPhotos, ...secondaryPhotos];
release.poster = poster;
release.photos = [...primaryPhotos, ...secondaryPhotos];
release.tags = deriveTagsFromDescription(release.description);
return release;
}));
release.tags = deriveTagsFromDescription(release.description);
return release;
}));
}
async function scrapeSceneA(html, url) {
const { document } = new JSDOM(html).window;
const element = document.querySelector('.content-page-info');
const release = {
url,
director: 'Mike Adriano',
};
const { document } = new JSDOM(html).window;
const element = document.querySelector('.content-page-info');
const release = {
url,
director: 'Mike Adriano',
};
release.entryId = url.split('/').slice(-2)[0];
release.title = element.querySelector('.title').textContent.trim();
release.description = element.querySelector('.desc').textContent.trim();
release.date = moment(element.querySelector('.post-date').textContent.trim(), 'Do MMM YYYY').toDate();
release.entryId = url.split('/').slice(-2)[0];
release.title = element.querySelector('.title').textContent.trim();
release.description = element.querySelector('.desc').textContent.trim();
release.date = moment(element.querySelector('.post-date').textContent.trim(), 'Do MMM YYYY').toDate();
release.actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
release.actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is sometimes 00:00, sometimes 0:00:00
release.duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is sometimes 00:00, sometimes 0:00:00
release.duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
const { poster } = document.querySelector('.content-page-header video');
const { src, type } = document.querySelector('.content-page-header source');
const { poster } = document.querySelector('.content-page-header video');
const { src, type } = document.querySelector('.content-page-header source');
release.poster = poster;
release.trailer = { src, type };
release.poster = poster;
release.trailer = { src, type };
release.tags = deriveTagsFromDescription(release.description);
release.tags = deriveTagsFromDescription(release.description);
return release;
return release;
}
async function scrapeSceneB(html, url, site) {
const { document } = new JSDOM(html).window;
const element = document.querySelector('.content-page-info');
const { document } = new JSDOM(html).window;
const element = document.querySelector('.content-page-info');
const entryId = url.split('/').slice(-2)[0];
const title = element.querySelector('.title').textContent.trim();
const description = element.querySelector('.desc').textContent.trim();
const date = moment(element.querySelector('.date').textContent.trim(), 'Do MMM YYYY').toDate();
const entryId = url.split('/').slice(-2)[0];
const title = element.querySelector('.title').textContent.trim();
const description = element.querySelector('.desc').textContent.trim();
const date = moment(element.querySelector('.date').textContent.trim(), 'Do MMM YYYY').toDate();
const actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
const actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is somethines 00:00, sometimes 0:00:00
const duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is somethines 00:00, sometimes 0:00:00
const duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
const { poster } = document.querySelector('.content-page-header-inner video');
const { src, type } = document.querySelector('.content-page-header-inner source');
const { poster } = document.querySelector('.content-page-header-inner video');
const { src, type } = document.querySelector('.content-page-header-inner source');
const tags = deriveTagsFromDescription(description);
const tags = deriveTagsFromDescription(description);
const scene = {
url,
entryId,
title,
description,
actors,
director: 'Mike Adriano',
date,
duration,
tags,
poster,
trailer: {
src,
type,
},
site,
};
const scene = {
url,
entryId,
title,
description,
actors,
director: 'Mike Adriano',
date,
duration,
tags,
poster,
trailer: {
src,
type,
},
site,
};
return scene;
return scene;
}
async function fetchLatest(site, page = 1) {
const { host } = new URL(site.url);
const url = `https://tour.${host}/videos?page=${page}`;
const { host } = new URL(site.url);
const url = `https://tour.${host}/videos?page=${page}`;
const res = await get(url);
const res = await get(url);
if (res.code === 200) {
if (host === 'trueanal.com' || host === 'swallowed.com') {
return scrapeLatestA(res.html, site);
}
if (res.code === 200) {
if (host === 'trueanal.com' || host === 'swallowed.com') {
return scrapeLatestA(res.html, site);
}
return scrapeLatestB(res.html, site);
}
return scrapeLatestB(res.html, site);
}
return res.code;
return res.code;
}
async function fetchScene(url, site) {
const { host } = new URL(site.url);
const res = await get(url);
const { host } = new URL(site.url);
const res = await get(url);
if (res.code === 200) {
if (host === 'trueanal.com' || host === 'swallowed.com') {
return scrapeSceneA(res.body.toString(), url, site);
}
if (res.code === 200) {
if (host === 'trueanal.com' || host === 'swallowed.com') {
return scrapeSceneA(res.body.toString(), url, site);
}
return scrapeSceneB(res.body.toString(), url, site);
}
return scrapeSceneB(res.body.toString(), url, site);
}
return res.code;
return res.code;
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -3,11 +3,11 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'milehighmedia');
return fetchProfile(actorName, 'milehighmedia');
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
};

View File

@@ -13,257 +13,257 @@ const { inchesToCm, lbsToKg } = require('../utils/convert');
const { cookieToData } = require('../utils/cookies');
function getThumbs(scene) {
if (scene.images.poster) {
return scene.images.poster.map(image => image.xl.url);
}
if (scene.images.poster) {
return scene.images.poster.map(image => image.xl.url);
}
if (scene.images.card_main_rect) {
return scene.images.card_main_rect
.concat(scene.images.card_secondary_rect || [])
.map(image => image.xl.url.replace('.thumb', ''));
}
if (scene.images.card_main_rect) {
return scene.images.card_main_rect
.concat(scene.images.card_secondary_rect || [])
.map(image => image.xl.url.replace('.thumb', ''));
}
return [];
return [];
}
function scrapeLatestX(data, site) {
if (site.parameters?.extract === true && data.collections.length > 0) {
// release should not belong to any channel
return null;
}
if (site.parameters?.extract === true && data.collections.length > 0) {
// release should not belong to any channel
return null;
}
if (typeof site.parameters?.extract === 'string' && !data.collections.some(collection => collection.shortName === site.parameters.extract)) {
// release should belong to specific channel
return null;
}
if (typeof site.parameters?.extract === 'string' && !data.collections.some(collection => collection.shortName === site.parameters.extract)) {
// release should belong to specific channel
return null;
}
const release = {
entryId: data.id,
title: data.title,
description: data.description,
};
const release = {
entryId: data.id,
title: data.title,
description: data.description,
};
const hostname = site.parameters?.native ? site.url : site.network.url;
const hostname = site.parameters?.native ? site.url : site.network.url;
release.url = `${hostname}/scene/${release.entryId}/`;
release.date = new Date(data.dateReleased);
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
release.url = `${hostname}/scene/${release.entryId}/`;
release.date = new Date(data.dateReleased);
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
release.tags = data.tags.map(tag => tag.name);
release.tags = data.tags.map(tag => tag.name);
release.duration = data.videos.mediabook?.length;
[release.poster, ...release.photos] = getThumbs(data);
release.duration = data.videos.mediabook?.length;
[release.poster, ...release.photos] = getThumbs(data);
const teaserSources = data.videos.mediabook?.files;
const teaserSources = data.videos.mediabook?.files;
if (teaserSources) {
release.teaser = Object.values(teaserSources).map(teaser => ({
src: teaser.urls.view,
quality: parseInt(teaser.format, 10),
}));
}
if (teaserSources) {
release.teaser = Object.values(teaserSources).map(teaser => ({
src: teaser.urls.view,
quality: parseInt(teaser.format, 10),
}));
}
return release;
return release;
}
async function scrapeLatest(items, site) {
const latestReleases = await Promise.all(items.map(async data => scrapeLatestX(data, site)));
const latestReleases = await Promise.all(items.map(async data => scrapeLatestX(data, site)));
return latestReleases.filter(Boolean);
return latestReleases.filter(Boolean);
}
function scrapeScene(data, url, _site, networkName) {
const release = {};
const release = {};
const { id: entryId, title, description } = data;
const { id: entryId, title, description } = data;
release.entryId = data.id;
release.title = title;
release.description = description;
release.entryId = data.id;
release.title = title;
release.description = description;
release.date = new Date(data.dateReleased);
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
release.date = new Date(data.dateReleased);
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
release.tags = data.tags.map(tag => tag.name);
release.tags = data.tags.map(tag => tag.name);
[release.poster, ...release.photos] = getThumbs(data);
[release.poster, ...release.photos] = getThumbs(data);
const teaserSources = data.videos.mediabook?.files;
const teaserSources = data.videos.mediabook?.files;
if (teaserSources) {
release.teaser = Object.values(teaserSources).map(teaser => ({
src: teaser.urls.view,
quality: parseInt(teaser.format, 10),
}));
}
if (teaserSources) {
release.teaser = Object.values(teaserSources).map(teaser => ({
src: teaser.urls.view,
quality: parseInt(teaser.format, 10),
}));
}
const siteName = data.collections[0]?.name || data.brand;
release.channel = slugify(siteName, '');
const siteName = data.collections[0]?.name || data.brand;
release.channel = slugify(siteName, '');
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
return release;
return release;
}
function getUrl(site) {
const { search } = new URL(site.url);
const { search } = new URL(site.url);
if (search.match(/\?site=\d+/)) {
return site.url;
}
if (search.match(/\?site=\d+/)) {
return site.url;
}
if (site.parameters?.native) {
return `${site.url}/scenes`;
}
if (site.parameters?.native) {
return `${site.url}/scenes`;
}
if (site.parameters?.extract) {
return `${site.url}/scenes`;
}
if (site.parameters?.extract) {
return `${site.url}/scenes`;
}
if (site.parameters?.siteId) {
return `${site.network.url}/scenes?site=${site.parameters.siteId}`;
}
if (site.parameters?.siteId) {
return `${site.network.url}/scenes?site=${site.parameters.siteId}`;
}
throw new Error(`Mind Geek site '${site.name}' (${site.url}) not supported`);
throw new Error(`Mind Geek site '${site.name}' (${site.url}) not supported`);
}
async function getSession(url) {
const cookieJar = new CookieJar();
const session = bhttp.session({ cookieJar });
const cookieJar = new CookieJar();
const session = bhttp.session({ cookieJar });
await session.get(url);
await session.get(url);
const cookieString = await cookieJar.getCookieStringAsync(url);
const { instance_token: instanceToken } = cookieToData(cookieString);
const cookieString = await cookieJar.getCookieStringAsync(url);
const { instance_token: instanceToken } = cookieToData(cookieString);
return { session, instanceToken };
return { session, instanceToken };
}
function scrapeProfile(data, html, releases = [], networkName) {
const { qa, qd } = ex(html);
const { qa, qd } = ex(html);
const profile = {
description: data.bio,
aliases: data.aliases,
};
const profile = {
description: data.bio,
aliases: data.aliases,
};
const [bust, waist, hip] = data.measurements.split('-');
const [bust, waist, hip] = data.measurements.split('-');
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
if (profile.gender === 'female') {
if (bust) profile.bust = bust.toUpperCase();
if (waist) profile.waist = waist;
if (hip) profile.hip = hip;
}
if (profile.gender === 'female') {
if (bust) profile.bust = bust.toUpperCase();
if (waist) profile.waist = waist;
if (hip) profile.hip = hip;
}
if (data.birthPlace) profile.birthPlace = data.birthPlace;
if (data.height) profile.height = inchesToCm(data.height);
if (data.weight) profile.weight = lbsToKg(data.weight);
if (data.birthPlace) profile.birthPlace = data.birthPlace;
if (data.height) profile.height = inchesToCm(data.height);
if (data.weight) profile.weight = lbsToKg(data.weight);
if (data.images.card_main_rect?.[0]) {
profile.avatar = data.images.card_main_rect[0].xl?.url
if (data.images.card_main_rect?.[0]) {
profile.avatar = data.images.card_main_rect[0].xl?.url
|| data.images.card_main_rect[0].lg?.url
|| data.images.card_main_rect[0].md?.url
|| data.images.card_main_rect[0].sm?.url
|| data.images.card_main_rect[0].xs?.url;
}
}
const birthdate = qa('li').find(el => /Date of Birth/.test(el.textContent));
if (birthdate) profile.birthdate = qd(birthdate, 'span', 'MMMM Do, YYYY');
const birthdate = qa('li').find(el => /Date of Birth/.test(el.textContent));
if (birthdate) profile.birthdate = qd(birthdate, 'span', 'MMMM Do, YYYY');
profile.releases = releases.map(release => scrapeScene(release, null, null, networkName));
profile.releases = releases.map(release => scrapeScene(release, null, null, networkName));
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = getUrl(site);
const { search } = new URL(url);
const siteId = new URLSearchParams(search).get('site');
const url = getUrl(site);
const { search } = new URL(url);
const siteId = new URLSearchParams(search).get('site');
const { session, instanceToken } = await getSession(url);
const { session, instanceToken } = await getSession(url);
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
const limit = 10;
const apiUrl = site.parameters?.native || site.parameters?.extract
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
const limit = 10;
const apiUrl = site.parameters?.native || site.parameters?.extract
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
const res = await session.get(apiUrl, {
headers: {
Instance: instanceToken,
Origin: site.url,
Referer: url,
},
});
const res = await session.get(apiUrl, {
headers: {
Instance: instanceToken,
Origin: site.url,
Referer: url,
},
});
if (res.statusCode === 200 && res.body.result) {
return scrapeLatest(res.body.result, site);
}
if (res.statusCode === 200 && res.body.result) {
return scrapeLatest(res.body.result, site);
}
return null;
return null;
}
async function fetchScene(url, site) {
const entryId = url.match(/\d+/)[0];
const { session, instanceToken } = await getSession(url);
const entryId = url.match(/\d+/)[0];
const { session, instanceToken } = await getSession(url);
const res = await session.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
headers: {
Instance: instanceToken,
},
});
const res = await session.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
headers: {
Instance: instanceToken,
},
});
if (res.statusCode === 200 && res.body.result) {
return scrapeScene(res.body.result, url, site);
}
if (res.statusCode === 200 && res.body.result) {
return scrapeScene(res.body.result, url, site);
}
return null;
return null;
}
async function fetchProfile(actorName, networkName, actorPath = 'model') {
const url = `https://www.${networkName}.com`;
const { session, instanceToken } = await getSession(url);
const url = `https://www.${networkName}.com`;
const { session, instanceToken } = await getSession(url);
const res = await session.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
headers: {
Instance: instanceToken,
},
});
const res = await session.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
headers: {
Instance: instanceToken,
},
});
if (res.statusCode === 200) {
const actorData = res.body.result.find(actor => actor.name.toLowerCase() === actorName.toLowerCase());
if (res.statusCode === 200) {
const actorData = res.body.result.find(actor => actor.name.toLowerCase() === actorName.toLowerCase());
if (actorData) {
const actorUrl = `https://www.${networkName}.com/${actorPath}/${actorData.id}/`;
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
if (actorData) {
const actorUrl = `https://www.${networkName}.com/${actorPath}/${actorData.id}/`;
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
const [actorRes, actorReleasesRes] = await Promise.all([
bhttp.get(actorUrl),
session.get(actorReleasesUrl, {
headers: {
Instance: instanceToken,
},
}),
]);
const [actorRes, actorReleasesRes] = await Promise.all([
bhttp.get(actorUrl),
session.get(actorReleasesUrl, {
headers: {
Instance: instanceToken,
},
}),
]);
if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkName);
}
if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkName);
}
if (actorRes.statusCode === 200) {
return scrapeProfile(actorData, actorRes.body.toString(), null, networkName);
}
}
}
if (actorRes.statusCode === 200) {
return scrapeProfile(actorData, actorRes.body.toString(), null, networkName);
}
}
}
return null;
return null;
}
module.exports = {
scrapeLatestX,
fetchLatest,
fetchScene,
fetchProfile,
scrapeLatestX,
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -3,11 +3,11 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'mofos');
return fetchProfile(actorName, 'mofos');
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile: networkFetchProfile,
fetchLatest,
fetchScene,
fetchProfile: networkFetchProfile,
};

View File

@@ -9,149 +9,149 @@ const slugify = require('../utils/slugify');
const { ex, get } = require('../utils/q');
function titleExtractor(pathname) {
const components = pathname.split('/')[2].split('-');
const entryId = components.slice(-1)[0];
const components = pathname.split('/')[2].split('-');
const entryId = components.slice(-1)[0];
const title = components.slice(0, -1).reduce((accTitle, word, index) => `${accTitle}${index > 0 ? ' ' : ''}${word.slice(0, 1).toUpperCase()}${word.slice(1)}`, '');
const title = components.slice(0, -1).reduce((accTitle, word, index) => `${accTitle}${index > 0 ? ' ' : ''}${word.slice(0, 1).toUpperCase()}${word.slice(1)}`, '');
return { title, entryId };
return { title, entryId };
}
function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.site-list .scene-item').toArray();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.site-list .scene-item').toArray();
return sceneElements.map((item) => {
const element = $(item);
return sceneElements.map((item) => {
const element = $(item);
const sceneLinkElement = element.find('a').first();
const { protocol, hostname, pathname } = new URL(sceneLinkElement.attr('href'));
const url = `${protocol}//${hostname}${pathname}`;
const { title, entryId } = titleExtractor(pathname);
const sceneLinkElement = element.find('a').first();
const { protocol, hostname, pathname } = new URL(sceneLinkElement.attr('href'));
const url = `${protocol}//${hostname}${pathname}`;
const { title, entryId } = titleExtractor(pathname);
const date = moment.utc(element.find('.entry-date').text(), 'MMM D, YYYY').toDate();
const actors = element.find('.contain-actors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const date = moment.utc(element.find('.entry-date').text(), 'MMM D, YYYY').toDate();
const actors = element.find('.contain-actors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const duration = Number(element.find('.scene-runtime').text().slice(0, -4)) * 60;
const duration = Number(element.find('.scene-runtime').text().slice(0, -4)) * 60;
const posterString = sceneLinkElement.find('img[data-srcset]').attr('data-srcset') || sceneLinkElement.find('img[data-src]').attr('data-src');
const poster = `https:${posterString.match(/[\w/.]+$/)[0]}`;
const posterString = sceneLinkElement.find('img[data-srcset]').attr('data-srcset') || sceneLinkElement.find('img[data-src]').attr('data-src');
const poster = `https:${posterString.match(/[\w/.]+$/)[0]}`;
return {
url,
entryId,
title,
actors,
date,
duration,
poster,
rating: null,
site,
};
});
return {
url,
entryId,
title,
actors,
date,
duration,
poster,
rating: null,
site,
};
});
}
function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElement = $('.scene-info');
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElement = $('.scene-info');
const { protocol, hostname, pathname } = new URL(url);
const originalUrl = `${protocol}//${hostname}${pathname}`;
const { protocol, hostname, pathname } = new URL(url);
const originalUrl = `${protocol}//${hostname}${pathname}`;
const entryId = originalUrl.split('-').slice(-1)[0];
const title = sceneElement.find('h1.scene-title.grey-text').text();
const description = sceneElement.find('.synopsis').contents().slice(2).text().replace(/[\s\n]+/g, ' ').trim();
const entryId = originalUrl.split('-').slice(-1)[0];
const title = sceneElement.find('h1.scene-title.grey-text').text();
const description = sceneElement.find('.synopsis').contents().slice(2).text().replace(/[\s\n]+/g, ' ').trim();
const date = moment.utc(sceneElement.find('span.entry-date').text(), 'MMM D, YYYY').toDate();
const actors = $('a.scene-title.grey-text.link').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const date = moment.utc(sceneElement.find('span.entry-date').text(), 'MMM D, YYYY').toDate();
const actors = $('a.scene-title.grey-text.link').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const duration = Number(sceneElement.find('.duration-ratings .duration').text().slice(10, -4)) * 60;
const duration = Number(sceneElement.find('.duration-ratings .duration').text().slice(10, -4)) * 60;
const poster = `https:${$('video, dl8-video').attr('poster')}`;
const photos = $('.contain-scene-images.desktop-only a').map((index, el) => `https:${$(el).attr('href')}`).toArray();
const poster = `https:${$('video, dl8-video').attr('poster')}`;
const photos = $('.contain-scene-images.desktop-only a').map((index, el) => `https:${$(el).attr('href')}`).toArray();
const trailerEl = $('source');
const trailerSrc = trailerEl.attr('src');
const trailerType = trailerEl.attr('type');
const trailerEl = $('source');
const trailerSrc = trailerEl.attr('src');
const trailerType = trailerEl.attr('type');
const siteName = sceneElement.find('a.site-title').text();
const channel = siteName.replace(/[\s']+/g, '').toLowerCase();
const siteName = sceneElement.find('a.site-title').text();
const channel = siteName.replace(/[\s']+/g, '').toLowerCase();
const tags = $('.categories a.cat-tag').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const tags = $('.categories a.cat-tag').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
return {
url,
entryId,
title,
description,
actors,
date,
duration,
tags,
photos,
poster,
trailer: {
src: trailerSrc,
type: trailerType,
},
rating: null,
site,
channel,
};
return {
url,
entryId,
title,
description,
actors,
date,
duration,
tags,
photos,
poster,
trailer: {
src: trailerSrc,
type: trailerType,
},
rating: null,
site,
channel,
};
}
async function fetchActorReleases(url) {
const res = await get(url);
const res = await get(url);
return res.ok
? res.item.qu.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
: [];
return res.ok
? res.item.qu.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
: [];
}
async function scrapeProfile(html) {
const { qu } = ex(html);
const profile = {};
const { qu } = ex(html);
const profile = {};
profile.description = qu.q('.bio_about_text', true);
profile.description = qu.q('.bio_about_text', true);
const avatar = qu.q('img.performer-pic', 'src');
if (avatar) profile.avatar = `https:${avatar}`;
const avatar = qu.q('img.performer-pic', 'src');
if (avatar) profile.avatar = `https:${avatar}`;
const releases = qu.urls('.scene-item > a:first-child');
const otherPages = qu.urls('.pagination a:not([rel=next]):not([rel=prev])');
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
const releases = qu.urls('.scene-item > a:first-child');
const otherPages = qu.urls('.pagination a:not([rel=next]):not([rel=prev])');
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
profile.releases = releases.concat(olderReleases.flat());
profile.releases = releases.concat(olderReleases.flat());
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}?page=${page}`);
const res = await bhttp.get(`${site.url}?page=${page}`);
return scrapeLatest(res.body.toString(), site);
return scrapeLatest(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
const actorSlug = slugify(actorName);
const actorSlug = slugify(actorName);
const res = await bhttp.get(`https://www.naughtyamerica.com/pornstar/${actorSlug}`);
const res = await bhttp.get(`https://www.naughtyamerica.com/pornstar/${actorSlug}`);
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString());
}
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString());
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -3,75 +3,75 @@
const { geta, ed } = require('../utils/q');
function scrapeBlockLatest(scenes) {
return scenes.map(({ html, qu }) => {
const release = {};
return scenes.map(({ html, qu }) => {
const release = {};
const entryId = qu.q('div[class*="videothumb"]', 'class').match(/videothumb_(\d+)/)
const entryId = qu.q('div[class*="videothumb"]', 'class').match(/videothumb_(\d+)/)
|| qu.q('div[id*="videothumb"]', 'id').match(/videothumb_(\d+)/);
release.entryId = entryId[1];
release.entryId = entryId[1];
release.title = qu.q('h4 a', true);
release.url = qu.url('h4 a');
release.date = ed(html, 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
release.title = qu.q('h4 a', true);
release.url = qu.url('h4 a');
release.date = ed(html, 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
release.actors = qu.all('.tour_update_models a', true);
release.actors = qu.all('.tour_update_models a', true);
release.poster = qu.q('div img').dataset.src;
release.photos = [qu.q('div img', 'src0_4x') || qu.q('div img', 'src0_3x') || qu.q('div img', 'src0_2x')];
release.poster = qu.q('div img').dataset.src;
release.photos = [qu.q('div img', 'src0_4x') || qu.q('div img', 'src0_3x') || qu.q('div img', 'src0_2x')];
release.teaser = qu.video();
release.teaser = qu.video();
return release;
});
return release;
});
}
function scrapeClassicLatest(scenes) {
return scenes.map(({ el, qu }) => {
const release = {};
return scenes.map(({ el, qu }) => {
const release = {};
release.entryId = el.dataset.setid;
release.url = qu.url('a');
release.entryId = el.dataset.setid;
release.url = qu.url('a');
release.title = qu.q('.update_title_small', true) || qu.q('a:nth-child(2)', true);
release.title = qu.q('.update_title_small', true) || qu.q('a:nth-child(2)', true);
const description = qu.q('a', 'title');
if (description) release.description = description;
const description = qu.q('a', 'title');
if (description) release.description = description;
const date = qu.date('.date_small, .update_date', 'MM/DD/YYYY');
if (date) release.date = date;
const date = qu.date('.date_small, .update_date', 'MM/DD/YYYY');
if (date) release.date = date;
const durationLine = qu.q('.update_counts', true);
if (durationLine) release.duration = Number(durationLine.match(/(\d+) min/i)[1]) * 60;
const durationLine = qu.q('.update_counts', true);
if (durationLine) release.duration = Number(durationLine.match(/(\d+) min/i)[1]) * 60;
const actors = qu.all('.update_models a', true);
release.actors = actors.length > 0 ? actors : qu.q('.update_models', true).split(/,\s*/);
const actors = qu.all('.update_models a', true);
release.actors = actors.length > 0 ? actors : qu.q('.update_models', true).split(/,\s*/);
const photoCount = qu.q('.update_thumb', 'cnt');
[release.poster, ...release.photos] = Array.from({ length: photoCount })
.map((value, index) => qu.q('.update_thumb', `src${index}_3x`)
const photoCount = qu.q('.update_thumb', 'cnt');
[release.poster, ...release.photos] = Array.from({ length: photoCount })
.map((value, index) => qu.q('.update_thumb', `src${index}_3x`)
|| qu.q('.update_thumb', `src${index}_2x`)
|| qu.q('.update_thumb', `src${index}_1x`));
return release;
});
return release;
});
}
async function fetchLatest(site, page = 1) {
if (!site.parameters) {
return null;
}
if (!site.parameters) {
return null;
}
const url = `${site.url}/tour_${site.parameters.siteId}/categories/movies_${page}_d.html`;
const res = await geta(url, '.updatesBlock .movieBlock, .updatesBlock .videoBlock, .latest_updates_block .update_details, .category_listing_block .update_details');
const url = `${site.url}/tour_${site.parameters.siteId}/categories/movies_${page}_d.html`;
const res = await geta(url, '.updatesBlock .movieBlock, .updatesBlock .videoBlock, .latest_updates_block .update_details, .category_listing_block .update_details');
if (res.ok && site.parameters.block) {
return scrapeBlockLatest(res.items, site);
}
if (res.ok && site.parameters.block) {
return scrapeBlockLatest(res.items, site);
}
return res.ok ? scrapeClassicLatest(res.items, site) : res.status;
return res.ok ? scrapeClassicLatest(res.items, site) : res.status;
}
module.exports = {
fetchLatest,
fetchLatest,
};

View File

@@ -5,161 +5,161 @@ const slugify = require('../utils/slugify');
const { heightToCm } = require('../utils/convert');
const slugUrlMap = {
nubiles: 'https://www.nubiles.net',
nubilesporn: 'https://www.nubiles-porn.com',
nubiles: 'https://www.nubiles.net',
nubilesporn: 'https://www.nubiles-porn.com',
};
async function getPhotos(albumUrl) {
const res = await geta(albumUrl, '.photo-thumb');
const res = await geta(albumUrl, '.photo-thumb');
return res.ok
? res.items.map(({ q }) => q('source').srcset)
: [];
return res.ok
? res.items.map(({ q }) => q('source').srcset)
: [];
}
function scrapeAll(scenes, site, origin) {
return scenes.map(({ qu }) => {
const release = {};
return scenes.map(({ qu }) => {
const release = {};
release.title = qu.q('.title a', true);
release.title = qu.q('.title a', true);
const url = qu.url('.title a').split('?')[0];
const channelUrl = qu.url('.site-link');
const url = qu.url('.title a').split('?')[0];
const channelUrl = qu.url('.site-link');
if (/^http/.test(url)) {
const { pathname } = new URL(url);
release.entryId = pathname.split('/')[3];
if (/^http/.test(url)) {
const { pathname } = new URL(url);
release.entryId = pathname.split('/')[3];
if (channelUrl) release.url = `${channelUrl}${pathname}`;
else release.url = url;
} else if (!/\/join/.test(url)) {
release.entryId = url.split('/')[3];
if (channelUrl) release.url = `${channelUrl}${pathname}`;
else release.url = url;
} else if (!/\/join/.test(url)) {
release.entryId = url.split('/')[3];
if (channelUrl) release.url = `${channelUrl}${url}`;
else if (site?.url) release.url = `${site.url}${url}`;
else if (origin) release.url = `${origin}${url}`;
} else {
release.entryId = qu.q('a img', 'tube_tour_thumb_id');
}
if (channelUrl) release.url = `${channelUrl}${url}`;
else if (site?.url) release.url = `${site.url}${url}`;
else if (origin) release.url = `${origin}${url}`;
} else {
release.entryId = qu.q('a img', 'tube_tour_thumb_id');
}
release.date = qu.date('.date', 'MMM D, YYYY');
release.actors = qu.all('.models a.model', true);
release.date = qu.date('.date', 'MMM D, YYYY');
release.actors = qu.all('.models a.model', true);
const poster = qu.q('img').dataset.original;
release.poster = [
poster.replace('_640', '_1280'),
poster,
];
const poster = qu.q('img').dataset.original;
release.poster = [
poster.replace('_640', '_1280'),
poster,
];
release.stars = Number(qu.q('.rating', true));
release.likes = Number(qu.q('.likes', true));
release.stars = Number(qu.q('.rating', true));
release.likes = Number(qu.q('.likes', true));
return release;
});
return release;
});
}
async function scrapeScene({ qu }, url, site) {
const release = {};
const release = {};
const { origin, pathname } = new URL(url);
release.url = `${origin}${pathname}`;
const { origin, pathname } = new URL(url);
release.url = `${origin}${pathname}`;
release.entryId = new URL(url).pathname.split('/')[3];
release.title = qu.q('.content-pane-title h2', true);
release.description = qu.q('.content-pane-column div', true);
release.entryId = new URL(url).pathname.split('/')[3];
release.title = qu.q('.content-pane-title h2', true);
release.description = qu.q('.content-pane-column div', true);
release.date = qu.q('.date', 'MMM D, YYYY');
release.date = qu.q('.date', 'MMM D, YYYY');
release.actors = qu.all('.content-pane-performers .model', true);
release.tags = qu.all('.categories a', true);
release.actors = qu.all('.content-pane-performers .model', true);
release.tags = qu.all('.categories a', true);
release.poster = qu.poster() || qu.img('.fake-video-player img');
release.trailer = qu.all('source').map(source => ({
src: source.src,
quality: Number(source.getAttribute('res')),
}));
release.poster = qu.poster() || qu.img('.fake-video-player img');
release.trailer = qu.all('source').map(source => ({
src: source.src,
quality: Number(source.getAttribute('res')),
}));
release.stars = Number(qu.q('.score', true));
release.likes = Number(qu.q('#likecount', true));
release.stars = Number(qu.q('.score', true));
release.likes = Number(qu.q('#likecount', true));
const albumLink = qu.url('.content-pane-related-links a[href*="gallery"]');
if (albumLink) release.photos = await getPhotos(`${site.url}${albumLink}`);
const albumLink = qu.url('.content-pane-related-links a[href*="gallery"]');
if (albumLink) release.photos = await getPhotos(`${site.url}${albumLink}`);
return release;
return release;
}
function scrapeProfile({ qu }, _actorName, origin) {
const profile = {};
const profile = {};
const keys = qu.all('.model-profile h5', true);
const values = qu.all('.model-profile h5 + p', true);
const keys = qu.all('.model-profile h5', true);
const values = qu.all('.model-profile h5 + p', true);
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
profile.age = Number(bio.age);
profile.description = qu.q('.model-bio', true);
profile.age = Number(bio.age);
profile.description = qu.q('.model-bio', true);
profile.residencePlace = bio.location;
profile.residencePlace = bio.location;
profile.height = heightToCm(bio.height);
[profile.bust, profile.waist, profile.hip] = bio.figure.split('-').map(v => Number(v) || v);
profile.height = heightToCm(bio.height);
[profile.bust, profile.waist, profile.hip] = bio.figure.split('-').map(v => Number(v) || v);
profile.avatar = qu.img('.model-profile img');
profile.avatar = qu.img('.model-profile img');
const releases = qu.all('.content-grid-item').filter(el => /video\//.test(qu.url(el, '.img-wrapper a'))); // filter out photos
profile.releases = scrapeAll(ctxa(releases), null, origin);
const releases = qu.all('.content-grid-item').filter(el => /video\//.test(qu.url(el, '.img-wrapper a'))); // filter out photos
profile.releases = scrapeAll(ctxa(releases), null, origin);
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/video/gallery/${(page - 1) * 12}`;
const res = await geta(url, '.content-grid-item');
const url = `${site.url}/video/gallery/${(page - 1) * 12}`;
const res = await geta(url, '.content-grid-item');
return res.ok ? scrapeAll(res.items, site) : res.status;
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchUpcoming(site) {
if (site.parameters?.upcoming) {
const url = `${site.url}/video/upcoming`;
const res = await geta(url, '.content-grid-item');
if (site.parameters?.upcoming) {
const url = `${site.url}/video/upcoming`;
const res = await geta(url, '.content-grid-item');
return res.ok ? scrapeAll(res.items, site) : res.status;
}
return res.ok ? scrapeAll(res.items, site) : res.status;
}
return [];
return [];
}
async function fetchScene(url, site) {
const res = await get(url);
const res = await get(url);
return res.ok ? scrapeScene(res.item, url, site) : res.status;
return res.ok ? scrapeScene(res.item, url, site) : res.status;
}
async function fetchProfile(actorName, siteSlug) {
const firstLetter = actorName.charAt(0).toLowerCase();
const origin = slugUrlMap[siteSlug] || `https://www.${siteSlug}.com`;
const firstLetter = actorName.charAt(0).toLowerCase();
const origin = slugUrlMap[siteSlug] || `https://www.${siteSlug}.com`;
const url = `${origin}/model/alpha/${firstLetter}`;
const resModels = await get(url);
const url = `${origin}/model/alpha/${firstLetter}`;
const resModels = await get(url);
if (!resModels.ok) return resModels.status;
if (!resModels.ok) return resModels.status;
const modelPath = resModels.item.qu.all('.content-grid-item a.title').find(el => slugify(el.textContent) === slugify(actorName));
const modelPath = resModels.item.qu.all('.content-grid-item a.title').find(el => slugify(el.textContent) === slugify(actorName));
if (modelPath) {
const modelUrl = `${origin}${modelPath}`;
const resModel = await get(modelUrl);
if (modelPath) {
const modelUrl = `${origin}${modelPath}`;
const resModel = await get(modelUrl);
return resModel.ok ? scrapeProfile(resModel.item, actorName, origin) : resModel.status;
}
return resModel.ok ? scrapeProfile(resModel.item, actorName, origin) : resModel.status;
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchScene,
fetchProfile,
fetchLatest,
fetchUpcoming,
fetchScene,
fetchProfile,
};

View File

@@ -7,143 +7,143 @@ const knex = require('../knex');
const { ex, ctxa } = require('../utils/q');
async function getSiteSlugs() {
return knex('sites')
.pluck('sites.slug')
.join('networks', 'networks.id', 'sites.network_id')
.where('networks.slug', 'perfectgonzo');
return knex('sites')
.pluck('sites.slug')
.join('networks', 'networks.id', 'sites.network_id')
.where('networks.slug', 'perfectgonzo');
}
function getHash(identifier) {
const hash = blake2.createHash('blake2b', { digestLength: 8 });
const hash = blake2.createHash('blake2b', { digestLength: 8 });
hash.update(Buffer.from(identifier));
hash.update(Buffer.from(identifier));
return hash.digest('hex');
return hash.digest('hex');
}
function extractMaleModelsFromTags(tagContainer) {
if (!tagContainer) {
return [];
}
if (!tagContainer) {
return [];
}
const tagEls = Array.from(tagContainer.childNodes, node => ({ type: node.nodeType, text: node.textContent.trim() })).filter(node => node.text.length > 0);
const modelLabelIndex = tagEls.findIndex(node => node.text === 'Male Models');
const tagEls = Array.from(tagContainer.childNodes, node => ({ type: node.nodeType, text: node.textContent.trim() })).filter(node => node.text.length > 0);
const modelLabelIndex = tagEls.findIndex(node => node.text === 'Male Models');
if (modelLabelIndex > -1) {
const nextLabelIndex = tagEls.findIndex((node, index) => index > modelLabelIndex && node.type === 3);
const maleModels = tagEls.slice(modelLabelIndex + 1, nextLabelIndex);
if (modelLabelIndex > -1) {
const nextLabelIndex = tagEls.findIndex((node, index) => index > modelLabelIndex && node.type === 3);
const maleModels = tagEls.slice(modelLabelIndex + 1, nextLabelIndex);
return maleModels.map(model => model.text);
}
return maleModels.map(model => model.text);
}
return [];
return [];
}
async function extractChannelFromPhoto(photo, metaSiteSlugs) {
const siteSlugs = metaSiteSlugs || await getSiteSlugs();
const channelMatch = photo.match(new RegExp(siteSlugs.join('|')));
const siteSlugs = metaSiteSlugs || await getSiteSlugs();
const channelMatch = photo.match(new RegExp(siteSlugs.join('|')));
if (channelMatch) {
return channelMatch[0];
}
if (channelMatch) {
return channelMatch[0];
}
return null;
return null;
}
async function scrapeLatest(html, site) {
const siteSlugs = await getSiteSlugs();
const { element } = ex(html);
const siteSlugs = await getSiteSlugs();
const { element } = ex(html);
return ctxa(element, '#content-main .itemm').map(({
q, qa, qlength, qdate, qimages,
}) => {
const release = {
site,
meta: {
siteSlugs,
},
};
return ctxa(element, '#content-main .itemm').map(({
q, qa, qlength, qdate, qimages,
}) => {
const release = {
site,
meta: {
siteSlugs,
},
};
const sceneLink = q('a');
const sceneLink = q('a');
release.title = sceneLink.title;
release.url = `${site.url}${sceneLink.href}`;
release.date = qdate('.nm-date', 'MM/DD/YYYY');
release.title = sceneLink.title;
release.url = `${site.url}${sceneLink.href}`;
release.date = qdate('.nm-date', 'MM/DD/YYYY');
const slug = new URL(release.url).pathname.split('/')[2];
release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`);
const slug = new URL(release.url).pathname.split('/')[2];
release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`);
release.actors = release.title.split('&').map(actor => actor.trim());
release.actors = release.title.split('&').map(actor => actor.trim());
[release.poster, ...release.photos] = qimages('.bloc-link img');
[release.poster, ...release.photos] = qimages('.bloc-link img');
release.tags = qa('.dropdown ul a', true).slice(1);
release.duration = qlength('.dropdown p:first-child');
release.tags = qa('.dropdown ul a', true).slice(1);
release.duration = qlength('.dropdown p:first-child');
return release;
});
return release;
});
}
async function scrapeScene(html, site, url, metaSiteSlugs) {
const {
q, qa, qlength, qdate, qposter, qtrailer,
} = ex(html);
const {
q, qa, qlength, qdate, qposter, qtrailer,
} = ex(html);
const release = { url, site };
const release = { url, site };
release.title = q('#movie-header h2', true);
release.date = qdate('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
release.title = q('#movie-header h2', true);
release.date = qdate('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
release.description = q('.container .mg-md', true);
release.duration = qlength('#video-ribbon .container > div > span:nth-child(3)');
release.description = q('.container .mg-md', true);
release.duration = qlength('#video-ribbon .container > div > span:nth-child(3)');
release.actors = qa('#video-info a', true).concat(extractMaleModelsFromTags(q('.tag-container')));
release.tags = qa('.tag-container a', true);
release.actors = qa('#video-info a', true).concat(extractMaleModelsFromTags(q('.tag-container')));
release.tags = qa('.tag-container a', true);
const uhd = q('#video-ribbon .container > div > span:nth-child(2)', true);
if (/4K/.test(uhd)) release.tags = release.tags.concat('4k');
const uhd = q('#video-ribbon .container > div > span:nth-child(2)', true);
if (/4K/.test(uhd)) release.tags = release.tags.concat('4k');
release.photos = qa('.bxslider_pics img').map(el => el.dataset.original || el.src);
release.poster = qposter();
release.photos = qa('.bxslider_pics img').map(el => el.dataset.original || el.src);
release.poster = qposter();
const trailer = qtrailer();
if (trailer) release.trailer = { src: trailer };
const trailer = qtrailer();
if (trailer) release.trailer = { src: trailer };
if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], metaSiteSlugs);
if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], metaSiteSlugs);
if (release.channel) {
const { pathname } = new URL(url);
release.url = `https://${release.channel}.com${pathname}`;
if (release.channel) {
const { pathname } = new URL(url);
release.url = `https://${release.channel}.com${pathname}`;
const slug = pathname.split('/')[2];
release.entryId = getHash(`${release.channel}${slug}${release.date.toISOString()}`);
}
const slug = pathname.split('/')[2];
release.entryId = getHash(`${release.channel}${slug}${release.date.toISOString()}`);
}
return release;
return release;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/movies/page-${page}`;
const res = await bhttp.get(url);
const url = `${site.url}/movies/page-${page}`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
return [];
return [];
}
async function fetchScene(url, site, release) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs);
}
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs);
}
return [];
return [];
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -6,135 +6,135 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
async function getTrailer(entryId) {
const trailerRes = await bhttp.post('https://www.pervcity.com/gettoken.php', {
setId: entryId,
});
const trailerRes = await bhttp.post('https://www.pervcity.com/gettoken.php', {
setId: entryId,
});
if (trailerRes.statusCode === 200) {
return {
poster: trailerRes.body.TrailerImg,
trailer: trailerRes.body.TrailerPath || trailerRes.body.Trailerfallback,
};
}
if (trailerRes.statusCode === 200) {
return {
poster: trailerRes.body.TrailerImg,
trailer: trailerRes.body.TrailerPath || trailerRes.body.Trailerfallback,
};
}
return null;
return null;
}
function scrapeLatestScene(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const $ = cheerio.load(html, { normalizeWhitespace: true });
const entryId = $('li').attr('id');
const sceneLinkElement = $('#scene_title_border a');
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
const entryId = $('li').attr('id');
const sceneLinkElement = $('#scene_title_border a');
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, '')); // replace weird commas
const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate();
const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, '')); // replace weird commas
const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate();
const poster = $('a:nth-child(2) > img').attr('src');
const photos = $('.sample-picker img').map((index, element) => $(element).attr('src').replace('tourpics', 'trailer')).toArray();
const poster = $('a:nth-child(2) > img').attr('src');
const photos = $('.sample-picker img').map((index, element) => $(element).attr('src').replace('tourpics', 'trailer')).toArray();
const stars = $('img[src*="/star.png"]')
.toArray()
.map(element => $(element).attr('src'))
.length || 0;
const stars = $('img[src*="/star.png"]')
.toArray()
.map(element => $(element).attr('src'))
.length || 0;
return {
url,
entryId,
title,
actors,
date,
poster,
photos,
rating: {
stars,
},
site,
};
return {
url,
entryId,
title,
actors,
date,
poster,
photos,
rating: {
stars,
},
site,
};
}
async function scrapeScene(html, url, site) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
const release = { url, site };
const release = { url, site };
release.entryId = document.querySelector('input#set_ID').value;
release.entryId = document.querySelector('input#set_ID').value;
release.title = document.querySelector('title').textContent;
release.description = document.querySelector('.player_data').textContent.trim();
release.title = document.querySelector('title').textContent;
release.description = document.querySelector('.player_data').textContent.trim();
const durationString = document.querySelector('.tag_lineR div:nth-child(2) span').textContent;
const [minutes, seconds] = durationString.match(/\d+/g);
const durationString = document.querySelector('.tag_lineR div:nth-child(2) span').textContent;
const [minutes, seconds] = durationString.match(/\d+/g);
release.duration = Number(minutes) * 60 + Number(seconds);
release.tags = document.querySelector('meta[name="keywords"]').content.split(',');
release.duration = Number(minutes) * 60 + Number(seconds);
release.tags = document.querySelector('meta[name="keywords"]').content.split(',');
const { poster, trailer } = await getTrailer(release.entryId);
const { poster, trailer } = await getTrailer(release.entryId);
release.poster = poster;
release.trailer = { src: trailer };
release.poster = poster;
release.trailer = { src: trailer };
return release;
return release;
}
function scrapeFallbackLanding(html) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
return document.querySelector('input#set_ID').value;
return document.querySelector('input#set_ID').value;
}
async function scrapeFallbackScene(html, entryId, url, site) {
const { document } = new JSDOM(html).window;
const release = { url, entryId, site };
const { document } = new JSDOM(html).window;
const release = { url, entryId, site };
release.title = document.querySelector('.popup_data_set_head label').textContent;
release.description = document.querySelector('.popup_data_set_des p').textContent.trim();
release.date = moment.utc(document.querySelector('.popup_left_top div span').textContent, 'MM-DD-YYYY').toDate();
release.actors = Array.from(document.querySelectorAll('.popup_data_set_models a'), el => el.textContent);
release.title = document.querySelector('.popup_data_set_head label').textContent;
release.description = document.querySelector('.popup_data_set_des p').textContent.trim();
release.date = moment.utc(document.querySelector('.popup_left_top div span').textContent, 'MM-DD-YYYY').toDate();
release.actors = Array.from(document.querySelectorAll('.popup_data_set_models a'), el => el.textContent);
const { poster, trailer } = await getTrailer(release.entryId);
const { poster, trailer } = await getTrailer(release.entryId);
release.poster = poster;
release.trailer = { src: trailer };
release.poster = poster;
release.trailer = { src: trailer };
release.channel = document.querySelector('.popup_left_top div img').alt;
release.channel = document.querySelector('.popup_left_top div img').alt;
return release;
return release;
}
async function fetchLatest(site, page = 1) {
const res = page === 1
? await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=${(page - 1) * 9}&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`)
: await bhttp.get(`${site.url}/final_load_latestupdate_grid_view.php?limitstart=0&limitend=${(page - 1) * 8 + 1}&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
const elements = JSON.parse(res.body.toString());
const res = page === 1
? await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=${(page - 1) * 9}&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`)
: await bhttp.get(`${site.url}/final_load_latestupdate_grid_view.php?limitstart=0&limitend=${(page - 1) * 8 + 1}&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
const elements = JSON.parse(res.body.toString());
const latest = Object.values(elements.total_arr).map(html => scrapeLatestScene(html, site)); // total_arr is a key-value object for final_load_latestupdate_grid_view.php
const latest = Object.values(elements.total_arr).map(html => scrapeLatestScene(html, site)); // total_arr is a key-value object for final_load_latestupdate_grid_view.php
return latest;
return latest;
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
if (res.statusCode === 200) {
if (site.isFallback) {
const entryId = scrapeFallbackLanding(res.body.toString(), url);
if (res.statusCode === 200) {
if (site.isNetwork) {
const entryId = scrapeFallbackLanding(res.body.toString(), url);
const fallbackRes = await bhttp.post('https://www.pervcity.com/set_popupvideo.php', {
setId: entryId,
});
const fallbackRes = await bhttp.post('https://www.pervcity.com/set_popupvideo.php', {
setId: entryId,
});
return scrapeFallbackScene(fallbackRes.body.toString(), entryId, url, site);
}
return scrapeFallbackScene(fallbackRes.body.toString(), entryId, url, site);
}
return scrapeScene(res.body.toString(), url, site);
}
return scrapeScene(res.body.toString(), url, site);
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -5,56 +5,56 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
const ethnicityMap = {
White: 'Caucasian',
White: 'Caucasian',
};
const hairMap = {
Brunette: 'brown',
Brunette: 'brown',
};
async function scrapeProfile(html, _url, actorName) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
const entries = Array.from(document.querySelectorAll('.infoPiece'), el => el.textContent.replace(/\n|\t/g, '').split(':'));
const bio = entries.reduce((acc, [key, value]) => (key ? { ...acc, [key.trim()]: value.trim() } : acc), {});
const entries = Array.from(document.querySelectorAll('.infoPiece'), el => el.textContent.replace(/\n|\t/g, '').split(':'));
const bio = entries.reduce((acc, [key, value]) => (key ? { ...acc, [key.trim()]: value.trim() } : acc), {});
const profile = {
name: actorName,
};
const profile = {
name: actorName,
};
const descriptionString = document.querySelector('div[itemprop="description"]') || document.querySelector('.longBio');
const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img');
const descriptionString = document.querySelector('div[itemprop="description"]') || document.querySelector('.longBio');
const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img');
if (bio.Gender) profile.gender = bio.Gender.toLowerCase();
if (bio.ethnicity) profile.ethnicity = ethnicityMap[bio.Ethnicity] || bio.Ethnicity;
if (bio.Gender) profile.gender = bio.Gender.toLowerCase();
if (bio.ethnicity) profile.ethnicity = ethnicityMap[bio.Ethnicity] || bio.Ethnicity;
if (descriptionString) profile.description = descriptionString.textContent;
if (descriptionString) profile.description = descriptionString.textContent;
if (bio.Birthday && !/-0001/.test(bio.Birthday)) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); // birthyear sometimes -0001, see Spencer Bradley as of january 2020
if (bio.Born) profile.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
if (bio.Birthday && !/-0001/.test(bio.Birthday)) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); // birthyear sometimes -0001, see Spencer Bradley as of january 2020
if (bio.Born) profile.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
profile.birthPlace = bio['Birth Place'] || bio.Birthplace;
profile.residencePlace = bio['City and Country'];
profile.birthPlace = bio['Birth Place'] || bio.Birthplace;
profile.residencePlace = bio['City and Country'];
if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio['Fake Boobs']) profile.naturalBoobs = bio['Fake Boobs'] === 'No';
if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio['Fake Boobs']) profile.naturalBoobs = bio['Fake Boobs'] === 'No';
if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1));
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1));
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
if (bio.Piercings) profile.hasPiercings = bio.Piercings === 'Yes';
if (bio.Tattoos) profile.hasTattoos = bio.Tattoos === 'Yes';
if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1));
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1));
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
if (bio.Piercings) profile.hasPiercings = bio.Piercings === 'Yes';
if (bio.Tattoos) profile.hasTattoos = bio.Tattoos === 'Yes';
if (avatarEl && !/default\//.test(avatarEl.src)) profile.avatar = avatarEl.src;
profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason
if (avatarEl && !/default\//.test(avatarEl.src)) profile.avatar = avatarEl.src;
profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason
return profile;
return profile;
}
async function fetchProfile(actorName) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
/* Model pages are not reliably associated with actual porn stars
/* Model pages are not reliably associated with actual porn stars
const modelUrl = `https://pornhub.com/model/${actorSlug}`;
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
@@ -74,12 +74,12 @@ async function fetchProfile(actorName) {
}
*/
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
const pornstarRes = await bhttp.get(pornstarUrl);
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
const pornstarRes = await bhttp.get(pornstarUrl);
return scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
return scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
}
module.exports = {
fetchProfile,
fetchProfile,
};

View File

@@ -9,193 +9,193 @@ const { get, geta } = require('../utils/q');
const slugify = require('../utils/slugify');
async function getPhotos(entryId, site) {
const { hostname } = new URL(site.url);
const { hostname } = new URL(site.url);
const res = await bhttp.get(`https://${hostname}/gallery.php?type=highres&id=${entryId}`);
const html = res.body.toString();
const res = await bhttp.get(`https://${hostname}/gallery.php?type=highres&id=${entryId}`);
const html = res.body.toString();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const photos = $('a.fakethumb').map((photoIndex, photoElement) => $(photoElement).attr('data-src') || $(photoElement).attr('href')).toArray();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const photos = $('a.fakethumb').map((photoIndex, photoElement) => $(photoElement).attr('data-src') || $(photoElement).attr('href')).toArray();
return photos;
return photos;
}
function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.content-wrapper .scene').toArray();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.content-wrapper .scene').toArray();
return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('h3 a');
const thumbnailElement = $(element).find('a img');
return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('h3 a');
const thumbnailElement = $(element).find('a img');
const url = sceneLinkElement.attr('href');
// const title = sceneLinkElement.text();
const entryId = url.split('/').slice(-1)[0];
const url = sceneLinkElement.attr('href');
// const title = sceneLinkElement.text();
const entryId = url.split('/').slice(-1)[0];
const titleText = thumbnailElement.attr('alt');
const title = titleText.slice(titleText.indexOf(':') + 1).trim();
const titleText = thumbnailElement.attr('alt');
const title = titleText.slice(titleText.indexOf(':') + 1).trim();
const date = moment.utc($(element).find('.scene-date'), ['MM/DD/YYYY', 'YYYY-MM-DD']).toDate();
const date = moment.utc($(element).find('.scene-date'), ['MM/DD/YYYY', 'YYYY-MM-DD']).toDate();
const actors = $(element).find('.scene-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const likes = Number($(element).find('.scene-votes').text());
const actors = $(element).find('.scene-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const likes = Number($(element).find('.scene-votes').text());
const photoCount = Number(thumbnailElement.attr('thumbs_num'));
const poster = thumbnailElement.attr('src');
const photos = Array.from({ length: photoCount }, (val, index) => thumbnailElement.attr(`src${index + 1}`));
const photoCount = Number(thumbnailElement.attr('thumbs_num'));
const poster = thumbnailElement.attr('src');
const photos = Array.from({ length: photoCount }, (val, index) => thumbnailElement.attr(`src${index + 1}`));
const scene = {
url,
entryId,
title,
actors,
date,
poster,
photos,
rating: {
likes,
},
site,
};
const scene = {
url,
entryId,
title,
actors,
date,
poster,
photos,
rating: {
likes,
},
site,
};
return scene;
});
return scene;
});
}
async function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const release = { url };
const $ = cheerio.load(html, { normalizeWhitespace: true });
const release = { url };
[release.entryId] = url.split('/').slice(-1);
release.title = $('.video-wrapper meta[itemprop="name"]').attr('content');
release.description = $('.video-wrapper meta[itemprop="description"]').attr('content');
[release.entryId] = url.split('/').slice(-1);
release.title = $('.video-wrapper meta[itemprop="name"]').attr('content');
release.description = $('.video-wrapper meta[itemprop="description"]').attr('content');
release.date = moment.utc($('.video-wrapper meta[itemprop="uploadDate"]').attr('content'), 'MM/DD/YYYY').toDate();
release.actors = $('.content-wrapper .scene-models-list a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.date = moment.utc($('.video-wrapper meta[itemprop="uploadDate"]').attr('content'), 'MM/DD/YYYY').toDate();
release.actors = $('.content-wrapper .scene-models-list a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const timestamp = $('.video-wrapper meta[itemprop="duration"]').attr('content');
const timestamp = $('.video-wrapper meta[itemprop="duration"]').attr('content');
if (timestamp) {
const [minutes, seconds] = timestamp.match(/\d+/g);
release.duration = Number(minutes) * 60 + Number(seconds);
}
if (timestamp) {
const [minutes, seconds] = timestamp.match(/\d+/g);
release.duration = Number(minutes) * 60 + Number(seconds);
}
release.tags = $('.content-desc .scene-tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
release.likes = Number($('.content-desc #social-actions #likes').text());
release.tags = $('.content-desc .scene-tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
release.likes = Number($('.content-desc #social-actions #likes').text());
const posterScript = $('script:contains(poster)').html();
const posterLink = posterScript?.slice(posterScript.indexOf('https://'), posterScript.indexOf('.jpg') + 4);
release.poster = $('meta[property="og:image"]').attr('content') || posterLink || $('#trailer_player_finished img').attr('src');
const posterScript = $('script:contains(poster)').html();
const posterLink = posterScript?.slice(posterScript.indexOf('https://'), posterScript.indexOf('.jpg') + 4);
release.poster = $('meta[property="og:image"]').attr('content') || posterLink || $('#trailer_player_finished img').attr('src');
const trailer = $('meta[property="og:video"]').attr('content') || $('#videojs-trailer source').attr('src');
const trailer = $('meta[property="og:video"]').attr('content') || $('#videojs-trailer source').attr('src');
if (trailer) release.trailer = { src: trailer };
if (trailer) release.trailer = { src: trailer };
release.photos = await getPhotos(release.entryId, site);
release.movie = $('a[data-track="FULL MOVIE"]').attr('href');
release.photos = await getPhotos(release.entryId, site);
release.movie = $('a[data-track="FULL MOVIE"]').attr('href');
const siteElement = $('.content-wrapper .logos-sites a');
if (siteElement) release.channel = slugify(siteElement.text(), '');
const siteElement = $('.content-wrapper .logos-sites a');
if (siteElement) release.channel = slugify(siteElement.text(), '');
return release;
return release;
}
function scrapeProfile({ html, q, qa, qtx }) {
const profile = {};
const profile = {};
const bio = qa('.model-facts li:not(.model-facts-long)', true).reduce((acc, fact) => {
const [key, value] = fact.split(':');
const trimmedValue = value.trim();
const bio = qa('.model-facts li:not(.model-facts-long)', true).reduce((acc, fact) => {
const [key, value] = fact.split(':');
const trimmedValue = value.trim();
if (trimmedValue.length === 0 || trimmedValue === '-') return acc;
return { ...acc, [slugify(key, '_')]: trimmedValue };
}, {});
if (trimmedValue.length === 0 || trimmedValue === '-') return acc;
return { ...acc, [slugify(key, '_')]: trimmedValue };
}, {});
const description = q('.model-facts-long', true);
if (description) profile.description = description;
const description = q('.model-facts-long', true);
if (description) profile.description = description;
const aliases = qtx('.aka')?.split(/,\s*/);
if (aliases) profile.aliases = aliases;
const aliases = qtx('.aka')?.split(/,\s*/);
if (aliases) profile.aliases = aliases;
if (bio.birth_place) profile.birthPlace = bio.birth_place;
if (bio.nationality) profile.nationality = bio.nationality;
if (bio.birth_place) profile.birthPlace = bio.birth_place;
if (bio.nationality) profile.nationality = bio.nationality;
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d+/)[0]);
if (bio.height) profile.height = Number(bio.height.match(/^\d+/)[0]);
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d+/)[0]);
if (bio.height) profile.height = Number(bio.height.match(/^\d+/)[0]);
if (bio.hair_color) profile.hair = bio.hair_color;
if (bio.eye_color) profile.eye = bio.eye_color;
if (bio.hair_color) profile.hair = bio.hair_color;
if (bio.eye_color) profile.eye = bio.eye_color;
if (bio.tattoos) {
profile.hasTattoos = true;
profile.tattoos = bio.tattoos;
}
if (bio.tattoos) {
profile.hasTattoos = true;
profile.tattoos = bio.tattoos;
}
if (bio.tattoos) {
profile.hasTattoos = true;
profile.tattoos = bio.tattoos;
}
if (bio.tattoos) {
profile.hasTattoos = true;
profile.tattoos = bio.tattoos;
}
if (bio.piercings) {
profile.hasPiercings = true;
profile.piercings = bio.piercings;
}
if (bio.piercings) {
profile.hasPiercings = true;
profile.piercings = bio.piercings;
}
profile.avatar = q('.img-pornstar img').dataset.src;
profile.releases = scrapeLatest(html);
profile.avatar = q('.img-pornstar img').dataset.src;
profile.releases = scrapeLatest(html);
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const { hostname } = new URL(site.url);
const { hostname } = new URL(site.url);
if (hostname.match('private.com')) {
const res = await bhttp.get(`${site.url}/${page}/`);
if (hostname.match('private.com')) {
const res = await bhttp.get(`${site.url}/${page}/`);
return scrapeLatest(res.body.toString(), site);
}
return scrapeLatest(res.body.toString(), site);
}
const res = await bhttp.get(`${site.url}/scenes/${page}/`);
const res = await bhttp.get(`${site.url}/scenes/${page}/`);
return scrapeLatest(res.body.toString(), site);
return scrapeLatest(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
const actorSearchSlug = slugify(actorName, '+');
const url = `https://www.private.com/search.php?query=${actorSearchSlug}`;
const modelRes = await geta(url, '.model h3 a');
const actorSearchSlug = slugify(actorName, '+');
const url = `https://www.private.com/search.php?query=${actorSearchSlug}`;
const modelRes = await geta(url, '.model h3 a');
if (modelRes.ok) {
const actorSlug = slugify(actorName);
const model = modelRes.items.find(({ text }) => slugify(text) === actorSlug);
if (modelRes.ok) {
const actorSlug = slugify(actorName);
const model = modelRes.items.find(({ text }) => slugify(text) === actorSlug);
if (model) {
const res = await get(model.el.href);
if (model) {
const res = await get(model.el.href);
return res.ok ? scrapeProfile(res.item) : res.status;
}
}
return res.ok ? scrapeProfile(res.item) : res.status;
}
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -3,7 +3,7 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
fetchLatest: fetchApiLatest,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
};

View File

@@ -4,49 +4,49 @@ const bhttp = require('bhttp');
const cheerio = require('cheerio');
const {
scrapeLatestX,
fetchLatest,
fetchScene,
fetchProfile,
scrapeLatestX,
fetchLatest,
fetchScene,
fetchProfile,
} = require('./mindgeek');
function scrapeLatestClassic(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const $ = cheerio.load(html, { normalizeWhitespace: true });
const stateTag = $('script:contains("initialState")').html();
const prefix = 'initialState = {';
const prefixIndex = stateTag.indexOf('initialState = {');
const suffix = '};';
const stateString = stateTag.slice(prefixIndex + prefix.length - 1, stateTag.indexOf('};', prefixIndex) + suffix.length - 1);
const data = JSON.parse(stateString);
const stateTag = $('script:contains("initialState")').html();
const prefix = 'initialState = {';
const prefixIndex = stateTag.indexOf('initialState = {');
const suffix = '};';
const stateString = stateTag.slice(prefixIndex + prefix.length - 1, stateTag.indexOf('};', prefixIndex) + suffix.length - 1);
const data = JSON.parse(stateString);
return Object.values(data.entities.releases).map(scene => scrapeLatestX(scene, site));
return Object.values(data.entities.releases).map(scene => scrapeLatestX(scene, site));
}
async function fetchClassic(site, page) {
const res = await bhttp.get(`${site.url}/scenes?page=${page}`);
const res = await bhttp.get(`${site.url}/scenes?page=${page}`);
if (res.statusCode === 200) {
return scrapeLatestClassic(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeLatestClassic(res.body.toString(), site);
}
return null;
return null;
}
async function fetchLatestWrap(site, page = 1) {
if (site.parameters?.classic) {
return fetchClassic(site, page);
}
if (site.parameters?.classic) {
return fetchClassic(site, page);
}
return fetchLatest(site, page);
return fetchLatest(site, page);
}
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'realitykings');
return fetchProfile(actorName, 'realitykings');
}
module.exports = {
fetchLatest: fetchLatestWrap,
fetchProfile: networkFetchProfile,
fetchScene,
fetchLatest: fetchLatestWrap,
fetchProfile: networkFetchProfile,
fetchScene,
};

View File

@@ -7,255 +7,255 @@ const slugify = require('../utils/slugify');
const { heightToCm, lbsToKg } = require('../utils/convert');
function scrapePhotos(html) {
const { qis } = ex(html, '#photos-page');
const photos = qis('img');
const { qis } = ex(html, '#photos-page');
const photos = qis('img');
return photos.map(photo => [
photo
.replace('x_800', 'x_xl')
.replace('_tn', ''),
photo,
]);
return photos.map(photo => [
photo
.replace('x_800', 'x_xl')
.replace('_tn', ''),
photo,
]);
}
async function fetchPhotos(url) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapePhotos(res.body.toString(), url);
}
if (res.statusCode === 200) {
return scrapePhotos(res.body.toString(), url);
}
return [];
return [];
}
function scrapeAll(html, site) {
return exa(html, '.container .video, .container-fluid .video').map(({ q, qa, qd, ql }) => {
const release = {};
return exa(html, '.container .video, .container-fluid .video').map(({ q, qa, qd, ql }) => {
const release = {};
release.title = q('.title, .i-title', true);
release.title = q('.title, .i-title', true);
const linkEl = q('a');
const url = new URL(linkEl.href);
release.url = `${url.origin}${url.pathname}`;
const linkEl = q('a');
const url = new URL(linkEl.href);
release.url = `${url.origin}${url.pathname}`;
// this is a photo album, not a scene (used for profiles)
if (/photos\//.test(url)) return null;
// this is a photo album, not a scene (used for profiles)
if (/photos\//.test(url)) return null;
[release.entryId] = url.pathname.split('/').slice(-2);
[release.entryId] = url.pathname.split('/').slice(-2);
release.date = qd('.i-date', 'MMM DD', /\w+ \d{1,2}$/)
release.date = qd('.i-date', 'MMM DD', /\w+ \d{1,2}$/)
|| qd('.dt-box', 'MMM.DD YYYY');
release.actors = site?.parameters?.actors || qa('.model, .i-model', true);
release.duration = ql('.i-amount, .amount');
release.actors = site?.parameters?.actors || qa('.model, .i-model', true);
release.duration = ql('.i-amount, .amount');
const posterEl = q('.item-img img');
const posterEl = q('.item-img img');
if (posterEl) {
release.poster = `https:${posterEl.src}`;
}
if (posterEl) {
release.poster = `https:${posterEl.src}`;
}
if (posterEl?.dataset.gifPreview) {
release.teaser = {
src: `https:${posterEl.dataset.gifPreview}`,
};
}
if (posterEl?.dataset.gifPreview) {
release.teaser = {
src: `https:${posterEl.dataset.gifPreview}`,
};
}
return release;
}).filter(Boolean);
return release;
}).filter(Boolean);
}
async function scrapeScene(html, url, site) {
const { qu } = ex(html, '#videos-page, #content');
const release = {};
const { qu } = ex(html, '#videos-page, #content');
const release = {};
[release.entryId] = new URL(url).pathname.split('/').slice(-2);
[release.entryId] = new URL(url).pathname.split('/').slice(-2);
release.title = qu.q('h2.text-uppercase, h2.title, #breadcrumb-top + h1', true)
release.title = qu.q('h2.text-uppercase, h2.title, #breadcrumb-top + h1', true)
|| qu.q('h1.m-title', true)?.split(/»|\//).slice(-1)[0].trim();
release.description = qu.text('.p-desc, .desc');
release.description = qu.text('.p-desc, .desc');
release.actors = qu.all('.value a[href*=models], .value a[href*=performer], .value a[href*=teen-babes]', true);
release.actors = qu.all('.value a[href*=models], .value a[href*=performer], .value a[href*=teen-babes]', true);
if (release.actors.length === 0) {
const actorEl = qu.all('.stat').find(stat => /Featuring/.test(stat.textContent));
const actorString = qu.text(actorEl);
if (release.actors.length === 0) {
const actorEl = qu.all('.stat').find(stat => /Featuring/.test(stat.textContent));
const actorString = qu.text(actorEl);
release.actors = actorString?.split(/,\band\b|,/g).map(actor => actor.trim()) || [];
}
release.actors = actorString?.split(/,\band\b|,/g).map(actor => actor.trim()) || [];
}
if (release.actors.length === 0 && site.parameters?.actors) release.actors = site.parameters.actors;
if (release.actors.length === 0 && site.parameters?.actors) release.actors = site.parameters.actors;
release.tags = qu.all('a[href*=tag]', true);
release.tags = qu.all('a[href*=tag]', true);
const dateEl = qu.all('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent));
release.date = qu.date(dateEl, null, 'MMMM Do, YYYY')
const dateEl = qu.all('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent));
release.date = qu.date(dateEl, null, 'MMMM Do, YYYY')
|| qu.date('.date', 'MMMM Do, YYYY', /\w+ \d{1,2}\w+, \d{4}/)
|| qu.date('.info .holder', 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
const durationEl = qu.all('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
release.duration = qu.dur(durationEl);
const durationEl = qu.all('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
release.duration = qu.dur(durationEl);
release.poster = qu.poster('video') || qu.img('.flowplayer img') || qu.img('img'); // _800.jpg is larger than _xl.jpg in landscape
const photosUrl = qu.url('.stat a[href*=photos]');
release.poster = qu.poster('video') || qu.img('.flowplayer img') || qu.img('img'); // _800.jpg is larger than _xl.jpg in landscape
const photosUrl = qu.url('.stat a[href*=photos]');
if (photosUrl) {
release.photos = await fetchPhotos(photosUrl);
} else {
release.photos = qu.imgs('img[src*=ThumbNails], .p-photos .tn img').map(photo => [
photo.replace('_tn', ''),
photo,
]);
}
if (photosUrl) {
release.photos = await fetchPhotos(photosUrl);
} else {
release.photos = qu.imgs('img[src*=ThumbNails], .p-photos .tn img').map(photo => [
photo.replace('_tn', ''),
photo,
]);
}
const trailers = qu.all('a[href*=Trailers]');
const trailers = qu.all('a[href*=Trailers]');
if (trailers) {
release.trailer = trailers.map((trailer) => {
const src = `https:${trailer.href}`;
const format = trailer.textContent.trim().match(/^\w+/)[0].toLowerCase();
const quality = parseInt(trailer.textContent.trim().match(/\d+([a-zA-Z]+)?$/)[0], 10);
if (trailers) {
release.trailer = trailers.map((trailer) => {
const src = `https:${trailer.href}`;
const format = trailer.textContent.trim().match(/^\w+/)[0].toLowerCase();
const quality = parseInt(trailer.textContent.trim().match(/\d+([a-zA-Z]+)?$/)[0], 10);
return format === 'mp4' ? { src, quality } : null;
}).filter(Boolean);
}
return format === 'mp4' ? { src, quality } : null;
}).filter(Boolean);
}
const stars = qu.q('.rate-box').dataset.score;
if (stars) release.rating = { stars };
const stars = qu.q('.rate-box').dataset.score;
if (stars) release.rating = { stars };
return release;
return release;
}
function scrapeModels(html, actorName) {
const { qa } = ex(html);
const model = qa('.model a').find(link => link.title === actorName);
const { qa } = ex(html);
const model = qa('.model a').find(link => link.title === actorName);
return model?.href || null;
return model?.href || null;
}
async function fetchActorReleases(url, accReleases = []) {
const res = await get(url);
const res = await get(url);
if (res.ok) {
const releases = accReleases.concat(scrapeAll(res.item.document.body.outerHTML));
const nextPage = res.item.qu.url('.next-pg');
if (res.ok) {
const releases = accReleases.concat(scrapeAll(res.item.document.body.outerHTML));
const nextPage = res.item.qu.url('.next-pg');
if (nextPage && new URL(nextPage).searchParams.has('page')) { // last page has 'next' button linking to join page
return fetchActorReleases(nextPage, releases);
}
if (nextPage && new URL(nextPage).searchParams.has('page')) { // last page has 'next' button linking to join page
return fetchActorReleases(nextPage, releases);
}
return releases;
}
return releases;
}
return null;
return null;
}
async function scrapeProfile(html, actorUrl, withReleases) {
const { q, qa, qi } = ex(html, '#model-page');
const profile = { gender: 'female' };
const { q, qa, qi } = ex(html, '#model-page');
const profile = { gender: 'female' };
const bio = qa('.stat').reduce((acc, el) => {
const prop = q(el, '.label', true).slice(0, -1);
const key = slugify(prop, '_');
const value = q(el, '.value', true);
const bio = qa('.stat').reduce((acc, el) => {
const prop = q(el, '.label', true).slice(0, -1);
const key = slugify(prop, '_');
const value = q(el, '.value', true);
return {
...acc,
[key]: value,
};
}, {});
return {
...acc,
[key]: value,
};
}, {});
if (bio.location) profile.residencePlace = bio.location.replace('Czech Repulic', 'Czech Republic'); // see Laura Lion
if (bio.location) profile.residencePlace = bio.location.replace('Czech Repulic', 'Czech Republic'); // see Laura Lion
if (bio.birthday) {
const birthMonth = bio.birthday.match(/^\w+/)[0].toLowerCase();
const [birthDay] = bio.birthday.match(/\d+/);
if (bio.birthday) {
const birthMonth = bio.birthday.match(/^\w+/)[0].toLowerCase();
const [birthDay] = bio.birthday.match(/\d+/);
profile.birthday = [birthMonth, birthDay]; // currently unused, not to be confused with birthdate
}
profile.birthday = [birthMonth, birthDay]; // currently unused, not to be confused with birthdate
}
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
if (bio.hair_color) profile.hair = bio.hair_color;
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
if (bio.hair_color) profile.hair = bio.hair_color;
if (bio.height) profile.height = heightToCm(bio.height);
if (bio.weight) profile.weight = lbsToKg(bio.weight);
if (bio.height) profile.height = heightToCm(bio.height);
if (bio.weight) profile.weight = lbsToKg(bio.weight);
if (bio.bra_size) profile.bust = bio.bra_size;
if (bio.measurements) [, profile.waist, profile.hip] = bio.measurements.split('-');
if (bio.bra_size) profile.bust = bio.bra_size;
if (bio.measurements) [, profile.waist, profile.hip] = bio.measurements.split('-');
if (bio.occupation) profile.occupation = bio.occupation;
if (bio.occupation) profile.occupation = bio.occupation;
const avatar = qi('img');
if (avatar) profile.avatar = avatar;
const avatar = qi('img');
if (avatar) profile.avatar = avatar;
if (withReleases) {
const { origin, pathname } = new URL(actorUrl);
profile.releases = await fetchActorReleases(`${origin}${pathname}/scenes?page=1`);
}
if (withReleases) {
const { origin, pathname } = new URL(actorUrl);
profile.releases = await fetchActorReleases(`${origin}${pathname}/scenes?page=1`);
}
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const latestPath = site.parameters?.path || '/big-boob-videos';
const url = `${site.url}${latestPath}?page=${page}`;
const res = await bhttp.get(url);
const latestPath = site.parameters?.path || '/big-boob-videos';
const url = `${site.url}${latestPath}?page=${page}`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeAll(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeAll(res.body.toString(), site);
}
return res.statusCode;
return res.statusCode;
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), url, site);
}
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), url, site);
}
return null;
return null;
}
async function fetchProfile(actorName, scraperSlug, site, include, page = 1, source = 0) {
const letter = actorName.charAt(0).toUpperCase();
const letter = actorName.charAt(0).toUpperCase();
const sources = [
`https://www.scoreland.com/big-boob-models/browse/${letter}/?page=${page}`,
`https://www.50plusmilfs.com/xxx-milf-models/browse/${letter}/?page=${page}`,
];
const sources = [
`https://www.scoreland.com/big-boob-models/browse/${letter}/?page=${page}`,
`https://www.50plusmilfs.com/xxx-milf-models/browse/${letter}/?page=${page}`,
];
const url = sources[source];
const url = sources[source];
const res = await bhttp.get(url, {
followRedirects: false,
});
const res = await bhttp.get(url, {
followRedirects: false,
});
if (res.statusCode === 200) {
const actorUrl = scrapeModels(res.body.toString(), actorName);
if (res.statusCode === 200) {
const actorUrl = scrapeModels(res.body.toString(), actorName);
if (actorUrl) {
const actorRes = await bhttp.get(actorUrl);
if (actorUrl) {
const actorRes = await bhttp.get(actorUrl);
if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString(), actorUrl, include.scenes);
}
if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString(), actorUrl, include.scenes);
}
return null;
}
return null;
}
return fetchProfile(actorName, scraperSlug, site, include, page + 1, source);
}
return fetchProfile(actorName, scraperSlug, site, include, page + 1, source);
}
if (sources[source + 1]) {
return fetchProfile(actorName, scraperSlug, site, include, 1, source + 1);
}
if (sources[source + 1]) {
return fetchProfile(actorName, scraperSlug, site, include, 1, source + 1);
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -65,143 +65,143 @@ const freeones = require('./freeones');
// const freeoneslegacy = require('./freeones_legacy');
module.exports = {
releases: {
'21naturals': naturals,
'21sextreme': sextreme,
'21sextury': sextury,
adulttime,
amateurallure,
assylum,
aziani,
babes,
bamvisions,
bang,
bangbros,
blowpass,
brazzers,
burningangel,
cherrypimps,
ddfnetwork,
digitalplayground,
dogfart,
dogfartnetwork: dogfart,
evilangel,
fakehub,
famedigital,
fantasymassage,
fullpornnetwork,
girlsway,
girlgirl: julesjordan,
hussiepass: hush,
hushpass: hush,
insex,
interracialpass: hush,
jayrock,
jesseloadsmonsterfacials,
julesjordan,
kellymadison,
kink,
legalporno,
men,
metrohd,
mikeadriano,
milehighmedia,
mindgeek,
mofos,
naughtyamerica,
newsensations,
nubiles,
perfectgonzo,
pervcity,
pimpxxx: cherrypimps,
pornpros: whalemember,
private: privateNetwork,
puretaboo,
realitykings,
score,
sexyhub: mindgeek,
swallowsalon: julesjordan,
teamskeet,
twistys,
vivid,
vixen,
vogov,
whalemember,
wicked,
xempire,
},
actors: {
'21sextury': sextury,
analbbc: fullpornnetwork,
analized: fullpornnetwork,
analviolation: fullpornnetwork,
anilos: nubiles,
aziani,
babes,
baddaddypov: fullpornnetwork,
bamvisions,
bangbros,
blacked: vixen,
blackedraw: vixen,
blowpass,
boobpedia,
brattysis: nubiles,
brazzers,
burningangel,
cherrypimps,
ddfnetwork,
deeper: vixen,
deeplush: nubiles,
digitalplayground,
dtfsluts: fullpornnetwork,
evilangel,
eyeontheguy: hush,
fakehub,
famedigital,
freeones,
gangbangcreampie: aziani,
girlfaction: fullpornnetwork,
gloryholesecrets: aziani,
hergape: fullpornnetwork,
homemadeanalwhores: fullpornnetwork,
hotcrazymess: nubiles,
hushpass: hush,
hussiepass: hush,
iconmale,
interracialpass: hush,
interracialpovs: hush,
jamesdeen: fullpornnetwork,
julesjordan,
kellymadison,
legalporno,
men,
metrohd,
milehighmedia,
mofos,
mugfucked: fullpornnetwork,
naughtyamerica,
nfbusty: nubiles,
nubilefilms: nubiles,
nubiles,
nubilesporn: nubiles,
onlyprince: fullpornnetwork,
pervertgallery: fullpornnetwork,
pimpxxx: cherrypimps,
pornhub,
povperverts: fullpornnetwork,
povpornstars: hush,
private: privateNetwork,
realitykings,
score,
seehimfuck: hush,
sexyhub: mindgeek,
thatsitcomshow: nubiles,
transangels,
tushy: vixen,
tushyraw: vixen,
twistys,
vixen,
wicked,
xempire,
},
releases: {
'21naturals': naturals,
'21sextreme': sextreme,
'21sextury': sextury,
adulttime,
amateurallure,
assylum,
aziani,
babes,
bamvisions,
bang,
bangbros,
blowpass,
brazzers,
burningangel,
cherrypimps,
ddfnetwork,
digitalplayground,
dogfart,
dogfartnetwork: dogfart,
evilangel,
fakehub,
famedigital,
fantasymassage,
fullpornnetwork,
girlsway,
girlgirl: julesjordan,
hussiepass: hush,
hushpass: hush,
insex,
interracialpass: hush,
jayrock,
jesseloadsmonsterfacials,
julesjordan,
kellymadison,
kink,
legalporno,
men,
metrohd,
mikeadriano,
milehighmedia,
mindgeek,
mofos,
naughtyamerica,
newsensations,
nubiles,
perfectgonzo,
pervcity,
pimpxxx: cherrypimps,
pornpros: whalemember,
private: privateNetwork,
puretaboo,
realitykings,
score,
sexyhub: mindgeek,
swallowsalon: julesjordan,
teamskeet,
twistys,
vivid,
vixen,
vogov,
whalemember,
wicked,
xempire,
},
actors: {
'21sextury': sextury,
analbbc: fullpornnetwork,
analized: fullpornnetwork,
analviolation: fullpornnetwork,
anilos: nubiles,
aziani,
babes,
baddaddypov: fullpornnetwork,
bamvisions,
bangbros,
blacked: vixen,
blackedraw: vixen,
blowpass,
boobpedia,
brattysis: nubiles,
brazzers,
burningangel,
cherrypimps,
ddfnetwork,
deeper: vixen,
deeplush: nubiles,
digitalplayground,
dtfsluts: fullpornnetwork,
evilangel,
eyeontheguy: hush,
fakehub,
famedigital,
freeones,
gangbangcreampie: aziani,
girlfaction: fullpornnetwork,
gloryholesecrets: aziani,
hergape: fullpornnetwork,
homemadeanalwhores: fullpornnetwork,
hotcrazymess: nubiles,
hushpass: hush,
hussiepass: hush,
iconmale,
interracialpass: hush,
interracialpovs: hush,
jamesdeen: fullpornnetwork,
julesjordan,
kellymadison,
legalporno,
men,
metrohd,
milehighmedia,
mofos,
mugfucked: fullpornnetwork,
naughtyamerica,
nfbusty: nubiles,
nubilefilms: nubiles,
nubiles,
nubilesporn: nubiles,
onlyprince: fullpornnetwork,
pervertgallery: fullpornnetwork,
pimpxxx: cherrypimps,
pornhub,
povperverts: fullpornnetwork,
povpornstars: hush,
private: privateNetwork,
realitykings,
score,
seehimfuck: hush,
sexyhub: mindgeek,
thatsitcomshow: nubiles,
transangels,
tushy: vixen,
tushyraw: vixen,
twistys,
vixen,
wicked,
xempire,
},
};

View File

@@ -5,176 +5,176 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
function extractTitle(pathname) {
return pathname
.split('/')
.slice(-2)[0]
.split('_')
.map(seg => `${seg.charAt(0).toUpperCase()}${seg.slice(1)}`)
.join(' ');
return pathname
.split('/')
.slice(-2)[0]
.split('_')
.map(seg => `${seg.charAt(0).toUpperCase()}${seg.slice(1)}`)
.join(' ');
}
function extractActors(str) {
return str
.split(/,|\band\b/ig)
.filter(actor => !/\.{3}/.test(actor))
.map(actor => actor.trim())
.filter(actor => actor.length > 0);
return str
.split(/,|\band\b/ig)
.filter(actor => !/\.{3}/.test(actor))
.map(actor => actor.trim())
.filter(actor => actor.length > 0);
}
function scrapeLatest(html, site) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
const scenes = Array.from(document.querySelectorAll('#updatesList li.grey, #updatesList li.white'));
const scenes = Array.from(document.querySelectorAll('#updatesList li.grey, #updatesList li.white'));
return scenes.map((scene) => {
const release = { site };
return scenes.map((scene) => {
const release = { site };
const link = scene.querySelector('.info a');
const poster = scene.querySelector('img');
const { pathname } = new URL(link);
const link = scene.querySelector('.info a');
const poster = scene.querySelector('img');
const { pathname } = new URL(link);
[release.entryId] = poster.id.match(/\d+/);
[release.entryId] = poster.id.match(/\d+/);
release.url = `https://www.teamskeet.com${pathname}`;
release.title = extractTitle(pathname);
release.url = `https://www.teamskeet.com${pathname}`;
release.title = extractTitle(pathname);
release.date = moment.utc(scene.querySelector('strong').textContent, 'MM/DD/YYYY').toDate();
release.date = moment.utc(scene.querySelector('strong').textContent, 'MM/DD/YYYY').toDate();
const photos = Array.from({ length: 5 }, (_value, index) => poster.dataset.original.replace(/\d+.jpg/, `${String(index + 1).padStart(2, '0')}.jpg`));
[release.poster] = photos;
release.photos = photos.slice(1);
const photos = Array.from({ length: 5 }, (_value, index) => poster.dataset.original.replace(/\d+.jpg/, `${String(index + 1).padStart(2, '0')}.jpg`));
[release.poster] = photos;
release.photos = photos.slice(1);
const actors = scene.querySelector('div span[rel="test"]').textContent;
release.actors = extractActors(actors);
const actors = scene.querySelector('div span[rel="test"]').textContent;
release.actors = extractActors(actors);
return release;
});
return release;
});
}
function scrapeScene(html, site, url) {
const { document } = new JSDOM(html).window;
const release = { site };
const { document } = new JSDOM(html).window;
const release = { site };
release.entryId = document.querySelector('#story-and-tags .scene_rater').attributes.rel.value;
release.description = document.querySelector('#story-and-tags td:nth-child(2) div').textContent;
const [actors, title, channel] = document.querySelector('title').textContent.split('|').map(item => item.trim());
release.entryId = document.querySelector('#story-and-tags .scene_rater').attributes.rel.value;
release.description = document.querySelector('#story-and-tags td:nth-child(2) div').textContent;
const [actors, title, channel] = document.querySelector('title').textContent.split('|').map(item => item.trim());
release.url = url;
release.title = title;
release.actors = extractActors(actors);
release.channel = channel.toLowerCase();
release.tags = Array.from(document.querySelectorAll('#story-and-tags tr:nth-child(2) a'), el => el.rel);
release.url = url;
release.title = title;
release.actors = extractActors(actors);
release.channel = channel.toLowerCase();
release.tags = Array.from(document.querySelectorAll('#story-and-tags tr:nth-child(2) a'), el => el.rel);
const date = document.querySelector('h3 ~ div:nth-child(4), h3 ~ div div.gray:not(.scene_rater)').textContent.split(':')[1].trim();
release.date = moment.utc(date, 'MMMM Do, YYYY').toDate();
const date = document.querySelector('h3 ~ div:nth-child(4), h3 ~ div div.gray:not(.scene_rater)').textContent.split(':')[1].trim();
release.date = moment.utc(date, 'MMMM Do, YYYY').toDate();
const { poster } = document.querySelector('video');
if (poster && !/gen/.test(poster)) release.poster = [poster.replace('low', 'hi'), poster];
const { poster } = document.querySelector('video');
if (poster && !/gen/.test(poster)) release.poster = [poster.replace('low', 'hi'), poster];
const siteId = document.querySelector('#story-and-tags img').src.match(/\w+.jpg/)[0].replace('.jpg', '');
const actorsSlug = document.querySelector('h3 a').href.split('/').slice(-2)[0];
const siteId = document.querySelector('#story-and-tags img').src.match(/\w+.jpg/)[0].replace('.jpg', '');
const actorsSlug = document.querySelector('h3 a').href.split('/').slice(-2)[0];
release.photos = Array.from({ length: 5 }, (value, index) => `https://images.psmcdn.net/teamskeet/${siteId}/${actorsSlug}/shared/scenes/new/${String(index + 1).padStart(2, '0')}.jpg`);
release.photos = Array.from({ length: 5 }, (value, index) => `https://images.psmcdn.net/teamskeet/${siteId}/${actorsSlug}/shared/scenes/new/${String(index + 1).padStart(2, '0')}.jpg`);
const trailer = document.querySelector('div.right.gray a').href;
if (trailer) release.trailer = { src: trailer };
const trailer = document.querySelector('div.right.gray a').href;
if (trailer) release.trailer = { src: trailer };
return release;
return release;
}
function scrapeSceneA(html, site, sceneX, url) {
const scene = sceneX || new JSDOM(html).window.document;
const release = { site };
const scene = sceneX || new JSDOM(html).window.document;
const release = { site };
release.description = scene.querySelector('.scene-story').textContent.replace('...read more', '...').trim();
release.description = scene.querySelector('.scene-story').textContent.replace('...read more', '...').trim();
release.date = moment.utc(scene.querySelector('.scene-date').textContent, 'MM/DD/YYYY').toDate();
release.actors = Array.from(scene.querySelectorAll('.starring span'), el => extractActors(el.textContent)).flat();
release.date = moment.utc(scene.querySelector('.scene-date').textContent, 'MM/DD/YYYY').toDate();
release.actors = Array.from(scene.querySelectorAll('.starring span'), el => extractActors(el.textContent)).flat();
const durationString = scene.querySelector('.time').textContent.trim();
const duration = ['00'].concat(durationString.split(':')).slice(-3).join(':'); // ensure hh:mm:ss
release.duration = moment.duration(duration).asSeconds();
const durationString = scene.querySelector('.time').textContent.trim();
const duration = ['00'].concat(durationString.split(':')).slice(-3).join(':'); // ensure hh:mm:ss
release.duration = moment.duration(duration).asSeconds();
if (sceneX) {
const titleEl = scene.querySelector(':scope > a');
if (sceneX) {
const titleEl = scene.querySelector(':scope > a');
release.url = titleEl.href;
release.entryId = titleEl.id;
release.title = titleEl.title;
release.url = titleEl.href;
release.entryId = titleEl.id;
release.title = titleEl.title;
const [poster, ...photos] = Array.from(scene.querySelectorAll('.scene img'), el => el.src);
release.poster = [poster.replace('bio_big', 'video'), poster];
release.photos = photos;
}
const [poster, ...photos] = Array.from(scene.querySelectorAll('.scene img'), el => el.src);
release.poster = [poster.replace('bio_big', 'video'), poster];
release.photos = photos;
}
if (!sceneX) {
release.title = scene.querySelector('.title span').textContent;
release.url = url;
if (!sceneX) {
release.title = scene.querySelector('.title span').textContent;
release.url = url;
release.poster = scene.querySelector('video').poster;
release.photos = [release.poster.replace('video', 'bio_small'), release.poster.replace('video', 'bio_small2')];
}
release.poster = scene.querySelector('video').poster;
release.photos = [release.poster.replace('video', 'bio_small'), release.poster.replace('video', 'bio_small2')];
}
const [, entryIdA, entryIdB] = new URL(release.url).pathname.split('/');
release.entryId = entryIdA === 'scenes' ? entryIdB : entryIdA;
const [, entryIdA, entryIdB] = new URL(release.url).pathname.split('/');
release.entryId = entryIdA === 'scenes' ? entryIdB : entryIdA;
return release;
return release;
}
function scrapeLatestA(html, site) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
const scenes = Array.from(document.querySelectorAll('.scenewrapper'));
const scenes = Array.from(document.querySelectorAll('.scenewrapper'));
return scenes.map(scene => scrapeSceneA(null, site, scene));
return scenes.map(scene => scrapeSceneA(null, site, scene));
}
async function fetchLatestTeamSkeet(site, page = 1) {
const url = `https://www.teamskeet.com/t1/updates/load?fltrs[site]=${site.parameters.id}&page=${page}&view=newest&fltrs[time]=ALL&order=DESC`;
const res = await bhttp.get(url);
const url = `https://www.teamskeet.com/t1/updates/load?fltrs[site]=${site.parameters.id}&page=${page}&view=newest&fltrs[time]=ALL&order=DESC`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
return null;
return null;
}
async function fetchLatestA(site) {
const url = `${site.url}/scenes`;
const res = await bhttp.get(url);
const url = `${site.url}/scenes`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeLatestA(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeLatestA(res.body.toString(), site);
}
return null;
return null;
}
async function fetchLatest(site, page = 1) {
if (site.parameters.id) {
return fetchLatestTeamSkeet(site, page);
}
if (site.parameters.id) {
return fetchLatestTeamSkeet(site, page);
}
if (site.parameters.scraper === 'A') {
return fetchLatestA(site, page);
}
if (site.parameters.scraper === 'A') {
return fetchLatestA(site, page);
}
return null;
return null;
}
async function fetchScene(url, site) {
const session = bhttp.session(); // resolve redirects
const res = await session.get(url);
const session = bhttp.session(); // resolve redirects
const res = await session.get(url);
if (site.parameters?.scraper === 'A') {
return scrapeSceneA(res.body.toString(), site, null, url);
}
if (site.parameters?.scraper === 'A') {
return scrapeSceneA(res.body.toString(), site, null, url);
}
return scrapeScene(res.body.toString(), site, url);
return scrapeScene(res.body.toString(), site, url);
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -3,9 +3,9 @@
const { fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'transangels');
return fetchProfile(actorName, 'transangels');
}
module.exports = {
fetchProfile: networkFetchProfile,
fetchProfile: networkFetchProfile,
};

View File

@@ -3,11 +3,11 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'twistys');
return fetchProfile(actorName, 'twistys');
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
};

View File

@@ -8,128 +8,128 @@ const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = requir
const slugify = require('../utils/slugify');
function scrapeLatestNative(scenes, site) {
return scenes.map((scene) => {
const release = {};
return scenes.map((scene) => {
const release = {};
release.entryId = scene.id;
release.url = `${site.url}${scene.url}`;
release.entryId = scene.id;
release.url = `${site.url}${scene.url}`;
release.title = scene.name;
release.date = ed(scene.release_date, 'YYYY-MM-DD');
release.duration = parseInt(scene.runtime, 10) * 60;
release.title = scene.name;
release.date = ed(scene.release_date, 'YYYY-MM-DD');
release.duration = parseInt(scene.runtime, 10) * 60;
release.actors = scene.cast?.map(actor => ({
name: actor.stagename,
gender: actor.gender.toLowerCase(),
avatar: actor.placard,
})) || [];
release.actors = scene.cast?.map(actor => ({
name: actor.stagename,
gender: actor.gender.toLowerCase(),
avatar: actor.placard,
})) || [];
release.stars = Number(scene.rating);
release.poster = scene.placard_800 || scene.placard;
release.stars = Number(scene.rating);
release.poster = scene.placard_800 || scene.placard;
return release;
});
return release;
});
}
function scrapeSceneNative({ html, q, qa }, url, _site) {
const release = { url };
const release = { url };
release.entryId = new URL(url).pathname.split('/')[2]; // eslint-disable-line prefer-destructuring
release.entryId = new URL(url).pathname.split('/')[2]; // eslint-disable-line prefer-destructuring
release.title = q('.scene-h2-heading', true);
release.description = q('.indie-model-p', true);
release.title = q('.scene-h2-heading', true);
release.description = q('.indie-model-p', true);
const dateString = qa('h5').find(el => /Released/.test(el.textContent)).textContent;
release.date = ed(dateString, 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
const dateString = qa('h5').find(el => /Released/.test(el.textContent)).textContent;
release.date = ed(dateString, 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
const duration = qa('h5').find(el => /Runtime/.test(el.textContent)).textContent;
const [hours, minutes] = duration.match(/\d+/g);
const duration = qa('h5').find(el => /Runtime/.test(el.textContent)).textContent;
const [hours, minutes] = duration.match(/\d+/g);
if (minutes) release.duration = (hours * 3600) + (minutes * 60);
else release.duration = hours * 60; // scene shorter that 1hr, hour match are minutes
if (minutes) release.duration = (hours * 3600) + (minutes * 60);
else release.duration = hours * 60; // scene shorter that 1hr, hour match are minutes
release.actors = qa('h4 a[href*="/stars"], h4 a[href*="/celebs"]', true);
release.tags = qa('h5 a[href*="/categories"]', true);
release.actors = qa('h4 a[href*="/stars"], h4 a[href*="/celebs"]', true);
release.tags = qa('h5 a[href*="/categories"]', true);
const [poster, trailer] = html.match(/https:\/\/content.vivid.com(.*)(.jpg|.mp4)/g);
release.poster = poster;
const [poster, trailer] = html.match(/https:\/\/content.vivid.com(.*)(.jpg|.mp4)/g);
release.poster = poster;
if (trailer) {
release.trailer = {
src: trailer,
};
}
if (trailer) {
release.trailer = {
src: trailer,
};
}
const channel = q('h5 a[href*="/sites"]', true);
if (channel) release.channel = channel.replace(/\.\w+/, '');
const channel = q('h5 a[href*="/sites"]', true);
if (channel) release.channel = channel.replace(/\.\w+/, '');
return release;
return release;
}
async function fetchLatestNative(site, page = 1) {
if (site.parameters?.useGamma) {
return fetchApiLatest(site, page);
}
if (site.parameters?.useGamma) {
return fetchApiLatest(site, page);
}
const apiUrl = `${site.url}/videos/api/?limit=50&offset=${(page - 1) * 50}&sort=datedesc`;
const res = await bhttp.get(apiUrl, {
decodeJSON: true,
});
const apiUrl = `${site.url}/videos/api/?limit=50&offset=${(page - 1) * 50}&sort=datedesc`;
const res = await bhttp.get(apiUrl, {
decodeJSON: true,
});
if (res.statusCode === 200 && res.body.code === 200) {
return scrapeLatestNative(res.body.responseData, site);
}
if (res.statusCode === 200 && res.body.code === 200) {
return scrapeLatestNative(res.body.responseData, site);
}
return null;
return null;
}
async function fetchUpcomingNative(site) {
if (site.parameters?.useGamma) {
return fetchApiUpcoming(site);
}
if (site.parameters?.useGamma) {
return fetchApiUpcoming(site);
}
return null;
return null;
}
async function fetchSceneNative(url, site, release) {
if (site.parameters?.useGamma) {
return fetchScene(url, site, release);
}
if (site.parameters?.useGamma) {
return fetchScene(url, site, release);
}
const res = await get(url);
const res = await get(url);
return res.ok ? scrapeSceneNative(res.item, url, site) : res.status;
return res.ok ? scrapeSceneNative(res.item, url, site) : res.status;
}
async function fetchSceneWrapper(url, site, release) {
const scene = await fetchScene(url, site, release);
const scene = await fetchScene(url, site, release);
if (scene.date - new Date(site.parameters?.lastNative) <= 0) {
// scene is probably still available on Vivid site, use search API to get URL and original date
const searchUrl = `${site.url}/videos/api/?limit=10&sort=datedesc&search=${encodeURI(scene.title)}`;
const searchRes = await bhttp.get(searchUrl, {
decodeJSON: true,
});
if (scene.date - new Date(site.parameters?.lastNative) <= 0) {
// scene is probably still available on Vivid site, use search API to get URL and original date
const searchUrl = `${site.url}/videos/api/?limit=10&sort=datedesc&search=${encodeURI(scene.title)}`;
const searchRes = await bhttp.get(searchUrl, {
decodeJSON: true,
});
if (searchRes.statusCode === 200 && searchRes.body.code === 200) {
const sceneMatch = searchRes.body.responseData.find(item => slugify(item.name) === slugify(scene.title));
if (searchRes.statusCode === 200 && searchRes.body.code === 200) {
const sceneMatch = searchRes.body.responseData.find(item => slugify(item.name) === slugify(scene.title));
if (sceneMatch) {
return {
...scene,
url: `${site.url}${sceneMatch.url}`,
date: ed(sceneMatch.release_date, 'YYYY-MM-DD'),
};
}
}
}
if (sceneMatch) {
return {
...scene,
url: `${site.url}${sceneMatch.url}`,
date: ed(sceneMatch.release_date, 'YYYY-MM-DD'),
};
}
}
}
return scene;
return scene;
}
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene: fetchSceneWrapper,
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene: fetchSceneWrapper,
};

View File

@@ -8,246 +8,246 @@ const { get, post } = require('../utils/http');
const slugify = require('../utils/slugify');
const genderMap = {
F: 'female',
M: 'male',
T: 'transsexual', // not yet observed
F: 'female',
M: 'male',
T: 'transsexual', // not yet observed
};
function getPosterFallbacks(poster) {
return poster
.filter(image => /landscape/i.test(image.name))
.sort((imageA, imageB) => imageB.height - imageA.height)
.map((image) => {
const sources = [image.src, image.highdpi?.['2x'], image.highdpi?.['3x']];
// high DPI images for full HD source are huge, only prefer for smaller fallback sources
return image.height === 1080 ? sources : sources.reverse();
})
.flat();
return poster
.filter(image => /landscape/i.test(image.name))
.sort((imageA, imageB) => imageB.height - imageA.height)
.map((image) => {
const sources = [image.src, image.highdpi?.['2x'], image.highdpi?.['3x']];
// high DPI images for full HD source are huge, only prefer for smaller fallback sources
return image.height === 1080 ? sources : sources.reverse();
})
.flat();
}
function getTeaserFallbacks(teaser) {
return teaser
.filter(video => /landscape/i.test(video.name))
.map(video => ({
src: video.src,
type: video.type,
quality: Number(String(video.height).replace('353', '360')),
}));
return teaser
.filter(video => /landscape/i.test(video.name))
.map(video => ({
src: video.src,
type: video.type,
quality: Number(String(video.height).replace('353', '360')),
}));
}
function getAvatarFallbacks(avatar) {
return avatar
.sort((imageA, imageB) => imageB.height - imageA.height)
.map(image => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src])
.flat();
return avatar
.sort((imageA, imageB) => imageB.height - imageA.height)
.map(image => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src])
.flat();
}
async function getTrailer(scene, site, url) {
const qualities = [360, 480, 720, 1080, 2160];
const qualities = [360, 480, 720, 1080, 2160];
const tokenRes = await post(`${site.url}/api/__record_tknreq`, {
file: scene.previewVideoUrl1080P,
sizes: qualities.join('+'),
type: 'trailer',
}, { referer: url });
const tokenRes = await post(`${site.url}/api/__record_tknreq`, {
file: scene.previewVideoUrl1080P,
sizes: qualities.join('+'),
type: 'trailer',
}, { referer: url });
if (!tokenRes.ok) {
return null;
}
if (!tokenRes.ok) {
return null;
}
const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`;
const trailersRes = await post(trailerUrl, null, { referer: url });
const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`;
const trailersRes = await post(trailerUrl, null, { referer: url });
if (trailersRes.ok) {
return qualities.map(quality => (trailersRes.body[quality] ? {
src: trailersRes.body[quality].token,
quality,
} : null)).filter(Boolean);
}
if (trailersRes.ok) {
return qualities.map(quality => (trailersRes.body[quality] ? {
src: trailersRes.body[quality].token,
quality,
} : null)).filter(Boolean);
}
return null;
return null;
}
function scrapeAll(scenes, site, origin) {
return scenes.map((scene) => {
const release = {};
return scenes.map((scene) => {
const release = {};
release.title = scene.title;
release.title = scene.title;
release.entryId = String(scene.newId);
release.url = `${site?.url || origin}${scene.targetUrl}`;
release.entryId = String(scene.newId);
release.url = `${site?.url || origin}${scene.targetUrl}`;
release.date = moment.utc(scene.releaseDate).toDate();
release.shootDate = moment.utc(scene.shootDate).toDate();
release.date = moment.utc(scene.releaseDate).toDate();
release.shootDate = moment.utc(scene.shootDate).toDate();
release.actors = scene.models;
release.stars = Number(scene.textRating) / 2;
release.actors = scene.models;
release.stars = Number(scene.textRating) / 2;
release.poster = getPosterFallbacks(scene.images.poster);
release.teaser = getTeaserFallbacks(scene.previews.poster);
release.poster = getPosterFallbacks(scene.images.poster);
release.teaser = getTeaserFallbacks(scene.previews.poster);
return release;
});
return release;
});
}
function scrapeUpcoming(scene, site) {
if (!scene || scene.isPreReleasePeriod) return null;
if (!scene || scene.isPreReleasePeriod) return null;
const release = {};
const release = {};
release.title = scene.targetUrl
.slice(1)
.split('-')
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
.join(' ');
release.title = scene.targetUrl
.slice(1)
.split('-')
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
.join(' ');
release.url = `${site.url}${scene.targetUrl}`;
release.url = `${site.url}${scene.targetUrl}`;
release.date = moment.utc(scene.releaseDate).toDate();
release.shootDate = moment.utc(scene.shootDate).toDate();
release.date = moment.utc(scene.releaseDate).toDate();
release.shootDate = moment.utc(scene.shootDate).toDate();
release.actors = scene.models;
release.actors = scene.models;
release.poster = getPosterFallbacks(scene.images.poster);
release.teaser = getTeaserFallbacks(scene.previews.poster);
release.poster = getPosterFallbacks(scene.images.poster);
release.teaser = getTeaserFallbacks(scene.previews.poster);
release.entryId = (release.poster[0] || release.teaser[0])?.match(/\/(\d+)/)?.[1];
release.entryId = (release.poster[0] || release.teaser[0])?.match(/\/(\d+)/)?.[1];
return [release];
return [release];
}
async function scrapeScene(data, url, site, baseRelease) {
const scene = data.video;
const scene = data.video;
const release = {
url,
title: scene.title,
description: scene.description,
actors: scene.models,
director: scene.directorNames,
duration: scene.runLength,
stars: scene.totalRateVal,
tags: scene.tags,
};
const release = {
url,
title: scene.title,
description: scene.description,
actors: scene.models,
director: scene.directorNames,
duration: scene.runLength,
stars: scene.totalRateVal,
tags: scene.tags,
};
release.entryId = scene.newId;
release.entryId = scene.newId;
release.date = moment.utc(scene.releaseDate).toDate();
release.shootDate = moment.utc(scene.shootDate).toDate();
release.date = moment.utc(scene.releaseDate).toDate();
release.shootDate = moment.utc(scene.shootDate).toDate();
release.actors = baseRelease?.actors || scene.models;
release.actors = baseRelease?.actors || scene.models;
release.poster = getPosterFallbacks(scene.images.poster);
release.photos = data.pictureset.map(photo => photo.main[0].src);
release.poster = getPosterFallbacks(scene.images.poster);
release.photos = data.pictureset.map(photo => photo.main[0].src);
release.teaser = getTeaserFallbacks(scene.previews.poster);
release.teaser = getTeaserFallbacks(scene.previews.poster);
const trailer = await getTrailer(scene, site, url);
if (trailer) release.trailer = trailer;
const trailer = await getTrailer(scene, site, url);
if (trailer) release.trailer = trailer;
return release;
return release;
}
async function fetchActorReleases(pages, model, origin) {
const releasesPerPage = await Promise.map(pages, async (page) => {
const url = `${origin}/api${model.targetUrl}?page=${page}`;
const res = await get(url);
const releasesPerPage = await Promise.map(pages, async (page) => {
const url = `${origin}/api${model.targetUrl}?page=${page}`;
const res = await get(url);
if (res.code === 200) {
return scrapeAll(res.body.data.videos.videos, null, origin);
}
if (res.code === 200) {
return scrapeAll(res.body.data.videos.videos, null, origin);
}
return [];
}, { concurrency: 3 });
return [];
}, { concurrency: 3 });
return releasesPerPage.flat();
return releasesPerPage.flat();
}
async function scrapeProfile(data, origin, withReleases) {
const model = data.model;
const profile = {};
const model = data.model;
const profile = {};
profile.birthdate = new Date(model.dateOfBirth);
profile.gender = genderMap[model.sex];
profile.birthdate = new Date(model.dateOfBirth);
profile.gender = genderMap[model.sex];
profile.hair = model.hairColour;
profile.nationality = model.nationality;
profile.hair = model.hairColour;
profile.nationality = model.nationality;
if (model.biography.trim().length > 0) profile.description = model.biography;
if (model.biography.trim().length > 0) profile.description = model.biography;
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
profile.avatar = getAvatarFallbacks(model.images.listing);
profile.poster = getAvatarFallbacks(model.images.profile);
profile.banner = getAvatarFallbacks(model.images.poster);
profile.avatar = getAvatarFallbacks(model.images.listing);
profile.poster = getAvatarFallbacks(model.images.profile);
profile.banner = getAvatarFallbacks(model.images.poster);
const releases = scrapeAll(data.videos.videos, null, origin);
const releases = scrapeAll(data.videos.videos, null, origin);
if (withReleases) {
const pageCount = Math.ceil(data.videos.count / 6);
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
if (withReleases) {
const pageCount = Math.ceil(data.videos.count / 6);
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
profile.releases = [...releases, ...otherReleases];
} else {
profile.releases = releases;
}
profile.releases = [...releases, ...otherReleases];
} else {
profile.releases = releases;
}
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/api/videos?page=${page}`;
const res = await get(url);
const url = `${site.url}/api/videos?page=${page}`;
const res = await get(url);
if (res.code === 200) {
return scrapeAll(res.body.data.videos, site);
}
if (res.code === 200) {
return scrapeAll(res.body.data.videos, site);
}
return res.code;
return res.code;
}
async function fetchUpcoming(site) {
const apiUrl = `${site.url}/api`;
const res = await get(apiUrl);
const apiUrl = `${site.url}/api`;
const res = await get(apiUrl);
if (res.code === 200) {
return scrapeUpcoming(res.body.data.nextScene, site);
}
if (res.code === 200) {
return scrapeUpcoming(res.body.data.nextScene, site);
}
return res.code;
return res.code;
}
async function fetchScene(url, site, baseRelease) {
const { origin, pathname } = new URL(url);
const apiUrl = `${origin}/api${pathname}`;
const { origin, pathname } = new URL(url);
const apiUrl = `${origin}/api${pathname}`;
const res = await get(apiUrl);
const res = await get(apiUrl);
if (res.code === 200) {
return scrapeScene(res.body.data, url, site, baseRelease);
}
if (res.code === 200) {
return scrapeScene(res.body.data, url, site, baseRelease);
}
return res.code;
return res.code;
}
async function fetchProfile(actorName, scraperSlug, site, include) {
const origin = `https://www.${scraperSlug}.com`;
const actorSlug = slugify(actorName);
const url = `${origin}/api/${actorSlug}`;
const res = await get(url);
const origin = `https://www.${scraperSlug}.com`;
const actorSlug = slugify(actorName);
const url = `${origin}/api/${actorSlug}`;
const res = await get(url);
if (res.code === 200) {
return scrapeProfile(res.body.data, origin, include.scenes);
}
if (res.code === 200) {
return scrapeProfile(res.body.data, origin, include.scenes);
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchScene,
fetchProfile,
fetchLatest,
fetchUpcoming,
fetchScene,
fetchProfile,
};

View File

@@ -5,199 +5,199 @@ const { ex, ctxa } = require('../utils/q');
// const slugify = require('../utils/slugify');
function getLicenseCode(html) {
const licensePrefix = 'license_code: \'';
const licenseStart = html.indexOf(licensePrefix);
const licenseCode = html.slice(licenseStart + licensePrefix.length, html.indexOf('\'', licenseStart + licensePrefix.length));
const licensePrefix = 'license_code: \'';
const licenseStart = html.indexOf(licensePrefix);
const licenseCode = html.slice(licenseStart + licensePrefix.length, html.indexOf('\'', licenseStart + licensePrefix.length));
const c = '16px';
let f;
let g;
let h;
let i;
let j;
let k;
let l;
let m;
let n;
const c = '16px';
let f;
let g;
let h;
let i;
let j;
let k;
let l;
let m;
let n;
for (f = '', g = 1; g < licenseCode.length; g += 1) {
f += parseInt(licenseCode[g], 10) ? parseInt(licenseCode[g], 10) : 1;
}
for (f = '', g = 1; g < licenseCode.length; g += 1) {
f += parseInt(licenseCode[g], 10) ? parseInt(licenseCode[g], 10) : 1;
}
for (j = parseInt(f.length / 2, 10),
k = parseInt(f.substring(0, j + 1), 10),
l = parseInt(f.substring(j), 10),
g = l - k,
g < 0 && (g = -g),
f = g,
g = k - l,
g < 0 && (g = -g),
f += g,
f *= 2,
f = String(f),
i = (parseInt(c, 10) / 2) + 2,
m = '',
g = 0; g < j + 1; g += 1) {
for (h = 1; h <= 4; h += 1) {
n = parseInt(licenseCode[g + h], 10) + parseInt(f[g], 10);
for (j = parseInt(f.length / 2, 10),
k = parseInt(f.substring(0, j + 1), 10),
l = parseInt(f.substring(j), 10),
g = l - k,
g < 0 && (g = -g),
f = g,
g = k - l,
g < 0 && (g = -g),
f += g,
f *= 2,
f = String(f),
i = (parseInt(c, 10) / 2) + 2,
m = '',
g = 0; g < j + 1; g += 1) {
for (h = 1; h <= 4; h += 1) {
n = parseInt(licenseCode[g + h], 10) + parseInt(f[g], 10);
if (n >= i) n -= i;
m += n;
}
}
if (n >= i) n -= i;
m += n;
}
}
return m;
return m;
}
function decodeTrailerUrl(html, encodedTrailerUrl) {
const licenseCode = getLicenseCode(html);
const i = licenseCode;
const licenseCode = getLicenseCode(html);
const i = licenseCode;
let j;
let k;
let l;
let m;
let n;
let o;
let j;
let k;
let l;
let m;
let n;
let o;
const d = '16px';
const g = encodedTrailerUrl.split('/').slice(2);
const d = '16px';
const g = encodedTrailerUrl.split('/').slice(2);
let h = g[5].substring(0, 2 * parseInt(d, 10));
let h = g[5].substring(0, 2 * parseInt(d, 10));
for (j = h, k = h.length - 1; k >= 0; k -= 1) {
for (l = k, m = k; m < i.length; m += 1) {
l += parseInt(i[m], 10);
}
for (j = h, k = h.length - 1; k >= 0; k -= 1) {
for (l = k, m = k; m < i.length; m += 1) {
l += parseInt(i[m], 10);
}
for (; l >= h.length;) {
l -= h.length;
}
for (; l >= h.length;) {
l -= h.length;
}
for (n = '', o = 0; o < h.length; o += 1) {
if (o === k) {
n += h[l];
} else {
n += (o === l ? h[k] : h[o]);
}
}
for (n = '', o = 0; o < h.length; o += 1) {
if (o === k) {
n += h[l];
} else {
n += (o === l ? h[k] : h[o]);
}
}
h = n;
}
h = n;
}
g[5] = g[5].replace(j, h);
const trailer = g.join('/');
g[5] = g[5].replace(j, h);
const trailer = g.join('/');
return trailer;
return trailer;
}
function scrapeLatest(html) {
const { document } = ex(html);
const { document } = ex(html);
return ctxa(document, '.video-post').map(({ q, qa, qd }) => {
const release = {};
return ctxa(document, '.video-post').map(({ q, qa, qd }) => {
const release = {};
// release.entryId = slugify(release.title);
release.entryId = q('.ico-fav-0').dataset.favVideoId;
// release.entryId = slugify(release.title);
release.entryId = q('.ico-fav-0').dataset.favVideoId;
const titleEl = q('.video-title-title');
release.title = titleEl.title;
release.url = titleEl.href;
const titleEl = q('.video-title-title');
release.title = titleEl.title;
release.url = titleEl.href;
release.date = qd('.video-data em', 'MMM DD, YYYY');
release.actors = qa('.video-model-list a', true);
release.date = qd('.video-data em', 'MMM DD, YYYY');
release.actors = qa('.video-model-list a', true);
const posterData = q('img.thumb').dataset;
release.poster = posterData.src;
release.trailer = posterData.preview;
const posterData = q('img.thumb').dataset;
release.poster = posterData.src;
release.trailer = posterData.preview;
return release;
});
return release;
});
}
function scrapeScene(html, url) {
const { qu } = ex(html);
const release = { url };
const { qu } = ex(html);
const release = { url };
// release.entryId = slugify(release.title);
[release.entryId] = qu.q('link[rel="canonical"]').href.match(/\d+/);
// release.entryId = slugify(release.title);
[release.entryId] = qu.q('link[rel="canonical"]').href.match(/\d+/);
release.title = qu.meta('meta[property="og:title"]') || qu.q('.video-page-header h1', true);
release.description = qu.meta('meta[property="og:description"]') || qu.q('.info-video-description', true);
release.title = qu.meta('meta[property="og:title"]') || qu.q('.video-page-header h1', true);
release.description = qu.meta('meta[property="og:description"]') || qu.q('.info-video-description', true);
release.date = qu.date('.info-video-details li:first-child span', 'MMM DD, YYYY');
release.duration = qu.dur('.info-video-details li:nth-child(2) span');
release.date = qu.date('.info-video-details li:first-child span', 'MMM DD, YYYY');
release.duration = qu.dur('.info-video-details li:nth-child(2) span');
release.actors = qu.all('.info-video-models a', true);
release.tags = qu.all('.info-video-category a', true);
release.actors = qu.all('.info-video-models a', true);
release.tags = qu.all('.info-video-category a', true);
release.photos = qu.urls('.swiper-wrapper .swiper-slide a').map(source => source.replace('.jpg/', '.jpg'));
release.poster = qu.meta('meta[property="og:image"');
release.photos = qu.urls('.swiper-wrapper .swiper-slide a').map(source => source.replace('.jpg/', '.jpg'));
release.poster = qu.meta('meta[property="og:image"');
if (!release.poster) {
const previewStart = html.indexOf('preview_url');
release.poster = html.slice(html.indexOf('http', previewStart), html.indexOf('.jpg', previewStart) + 4);
}
if (!release.poster) {
const previewStart = html.indexOf('preview_url');
release.poster = html.slice(html.indexOf('http', previewStart), html.indexOf('.jpg', previewStart) + 4);
}
const varsPrefix = 'flashvars = {';
const varsStart = html.indexOf(varsPrefix);
const varsString = html.slice(varsStart + varsPrefix.length, html.indexOf('};', varsStart));
const varsPrefix = 'flashvars = {';
const varsStart = html.indexOf(varsPrefix);
const varsString = html.slice(varsStart + varsPrefix.length, html.indexOf('};', varsStart));
const vars = varsString.split(',').reduce((acc, item) => {
const [prop, value] = item.split(': ');
acc[prop.trim()] = value.trim().replace(/'/g, '');
const vars = varsString.split(',').reduce((acc, item) => {
const [prop, value] = item.split(': ');
acc[prop.trim()] = value.trim().replace(/'/g, '');
return acc;
}, {});
return acc;
}, {});
release.trailer = [
{
src: decodeTrailerUrl(html, vars.video_url),
quality: parseInt(vars.video_url_text, 10),
},
{
src: decodeTrailerUrl(html, vars.video_alt_url),
quality: parseInt(vars.video_alt_url_text, 10),
},
{
src: decodeTrailerUrl(html, vars.video_alt_url2),
quality: parseInt(vars.video_alt_url2_text, 10),
},
{
src: decodeTrailerUrl(html, vars.video_alt_url3),
quality: parseInt(vars.video_alt_url3_text, 10),
},
{
src: decodeTrailerUrl(html, vars.video_alt_url4),
quality: parseInt(vars.video_alt_url4_text, 10),
},
];
release.trailer = [
{
src: decodeTrailerUrl(html, vars.video_url),
quality: parseInt(vars.video_url_text, 10),
},
{
src: decodeTrailerUrl(html, vars.video_alt_url),
quality: parseInt(vars.video_alt_url_text, 10),
},
{
src: decodeTrailerUrl(html, vars.video_alt_url2),
quality: parseInt(vars.video_alt_url2_text, 10),
},
{
src: decodeTrailerUrl(html, vars.video_alt_url3),
quality: parseInt(vars.video_alt_url3_text, 10),
},
{
src: decodeTrailerUrl(html, vars.video_alt_url4),
quality: parseInt(vars.video_alt_url4_text, 10),
},
];
return release;
return release;
}
async function fetchLatest(site, page = 1) {
const url = `https://vogov.com/latest-videos/?sort_by=post_date&from=${page}`;
const res = await bhttp.get(url);
const url = `https://vogov.com/latest-videos/?sort_by=post_date&from=${page}`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
return null;
return null;
}
async function fetchScene(url) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), url);
}
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), url);
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -5,86 +5,86 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
function scrapeLatest(html, site) {
const { document } = new JSDOM(html).window;
const { origin } = new URL(site.url);
const { document } = new JSDOM(html).window;
const { origin } = new URL(site.url);
const videos = Array.from(document.querySelectorAll('.video-releases-list')).slice(-1)[0];
const videos = Array.from(document.querySelectorAll('.video-releases-list')).slice(-1)[0];
return Array.from(videos.querySelectorAll('.card'), (scene) => {
const release = { site };
return Array.from(videos.querySelectorAll('.card'), (scene) => {
const release = { site };
release.url = `${origin}${scene.querySelector(':scope > a').href}`;
release.entryId = scene.dataset.videoId;
release.title = scene.querySelector('.card-title').textContent;
release.date = moment.utc(scene.dataset.date, 'MMMM DD, YYYY').toDate();
release.actors = Array.from(scene.querySelectorAll('.actors a'), el => el.textContent);
release.url = `${origin}${scene.querySelector(':scope > a').href}`;
release.entryId = scene.dataset.videoId;
release.title = scene.querySelector('.card-title').textContent;
release.date = moment.utc(scene.dataset.date, 'MMMM DD, YYYY').toDate();
release.actors = Array.from(scene.querySelectorAll('.actors a'), el => el.textContent);
release.poster = `https:${scene.querySelector('.single-image').src}`;
release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), el => `https:${el.dataset.src}`);
release.poster = `https:${scene.querySelector('.single-image').src}`;
release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), el => `https:${el.dataset.src}`);
const trailerEl = scene.querySelector('source');
if (trailerEl) release.trailer = { src: trailerEl.dataset.src };
const trailerEl = scene.querySelector('source');
if (trailerEl) release.trailer = { src: trailerEl.dataset.src };
return release;
});
return release;
});
}
function scrapeScene(html, site, url) {
const { document } = new JSDOM(html).window;
const release = { site };
const { document } = new JSDOM(html).window;
const release = { site };
const scene = document.querySelector('#t2019-2col');
const scene = document.querySelector('#t2019-2col');
release.url = url;
release.title = scene.querySelector('.t2019-stitle').textContent.trim();
release.description = scene.querySelector('#t2019-description').textContent.trim();
release.actors = Array.from(scene.querySelectorAll('#t2019-models a'), el => el.textContent);
release.url = url;
release.title = scene.querySelector('.t2019-stitle').textContent.trim();
release.description = scene.querySelector('#t2019-description').textContent.trim();
release.actors = Array.from(scene.querySelectorAll('#t2019-models a'), el => el.textContent);
const durationEls = Array.from(scene.querySelectorAll('#t2019-stime span'));
const durationEls = Array.from(scene.querySelectorAll('#t2019-stime span'));
if (durationEls.length > 1) {
release.date = moment.utc(durationEls[0].textContent, 'MMMM DD, YYYY').toDate();
release.duration = Number(durationEls[1].textContent.match(/\d+/)[0]) * 60;
} else {
release.duration = Number(durationEls[0].textContent.match(/\d+/)[0]) * 60;
}
if (durationEls.length > 1) {
release.date = moment.utc(durationEls[0].textContent, 'MMMM DD, YYYY').toDate();
release.duration = Number(durationEls[1].textContent.match(/\d+/)[0]) * 60;
} else {
release.duration = Number(durationEls[0].textContent.match(/\d+/)[0]) * 60;
}
release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), el => `https:${el.src}`);
release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), el => `https:${el.src}`);
const posterEl = scene.querySelector('#no-player-image');
const videoEl = scene.querySelector('video');
const posterEl = scene.querySelector('#no-player-image');
const videoEl = scene.querySelector('video');
if (posterEl) release.poster = `https:${posterEl.src}`;
else if (videoEl) release.poster = `https:${videoEl.poster}`;
if (posterEl) release.poster = `https:${posterEl.src}`;
else if (videoEl) release.poster = `https:${videoEl.poster}`;
const trailerEl = scene.querySelector('#t2019-video source');
if (trailerEl) release.trailer = { src: trailerEl.src };
const trailerEl = scene.querySelector('#t2019-video source');
if (trailerEl) release.trailer = { src: trailerEl.src };
return release;
return release;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}?page=${page}`;
const res = await bhttp.get(url);
const url = `${site.url}?page=${page}`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
return [];
return [];
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), site, url);
}
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), site, url);
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -3,8 +3,8 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
};

View File

@@ -5,31 +5,31 @@ const bhttp = require('bhttp');
const { fetchLatest, fetchUpcoming, scrapeScene, fetchProfile } = require('./gamma');
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
const release = await scrapeScene(res.body.toString(), url, site);
const release = await scrapeScene(res.body.toString(), url, site);
const siteDomain = release.$('meta[name="twitter:domain"]').attr('content') || 'allblackx.com'; // only AllBlackX has no twitter domain, no other useful hints available
const siteSlug = siteDomain && siteDomain.split('.')[0].toLowerCase();
// const siteUrl = siteDomain && `https://www.${siteDomain}`;
const siteDomain = release.$('meta[name="twitter:domain"]').attr('content') || 'allblackx.com'; // only AllBlackX has no twitter domain, no other useful hints available
const siteSlug = siteDomain && siteDomain.split('.')[0].toLowerCase();
// const siteUrl = siteDomain && `https://www.${siteDomain}`;
release.channel = siteSlug;
release.director = 'Mason';
release.channel = siteSlug;
release.director = 'Mason';
return release;
return release;
}
function getActorReleasesUrl(actorPath, page = 1) {
return `https://www.xempire.com/en/videos/xempire/latest/${page}/All-Categories/0${actorPath}`;
return `https://www.xempire.com/en/videos/xempire/latest/${page}/All-Categories/0${actorPath}`;
}
async function networkFetchProfile(actorName, scraperSlug, site, include) {
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchUpcoming,
fetchScene,
fetchLatest,
fetchProfile: networkFetchProfile,
fetchUpcoming,
fetchScene,
};

View File

@@ -8,189 +8,189 @@ const knex = require('./knex');
const whereOr = require('./utils/where-or');
async function curateSite(site, includeParameters = false, includeTags = true) {
const curatedSite = {
id: site.id,
name: site.name,
url: site.url,
description: site.description,
slug: site.slug,
independent: !!site.parameters && site.parameters.independent,
parameters: includeParameters ? site.parameters : null,
network: {
id: site.network_id,
name: site.network_name,
description: site.network_description,
slug: site.network_slug,
url: site.network_url,
parameters: includeParameters ? site.network_parameters : null,
},
};
const curatedSite = {
id: site.id,
name: site.name,
url: site.url,
description: site.description,
slug: site.slug,
independent: !!site.parameters && site.parameters.independent,
parameters: includeParameters ? site.parameters : null,
network: {
id: site.network_id,
name: site.network_name,
description: site.network_description,
slug: site.network_slug,
url: site.network_url,
parameters: includeParameters ? site.network_parameters : null,
},
};
if (includeTags) {
curatedSite.tags = await knex('sites_tags')
.select('tags.*', 'sites_tags.inherit')
.where('site_id', site.id)
.join('tags', 'tags.id', 'sites_tags.tag_id');
}
if (includeTags) {
curatedSite.tags = await knex('sites_tags')
.select('tags.*', 'sites_tags.inherit')
.where('site_id', site.id)
.join('tags', 'tags.id', 'sites_tags.tag_id');
}
return curatedSite;
return curatedSite;
}
async function curateSites(sites, includeParameters) {
return Promise.all(sites.map(async site => curateSite(site, includeParameters)));
return Promise.all(sites.map(async site => curateSite(site, includeParameters)));
}
function destructConfigNetworks(networks = []) {
return networks.reduce((acc, network) => {
if (Array.isArray(network)) {
// network specifies sites
return {
...acc,
sites: [...acc.sites, ...network[1]],
};
}
return networks.reduce((acc, network) => {
if (Array.isArray(network)) {
// network specifies sites
return {
...acc,
sites: [...acc.sites, ...network[1]],
};
}
return {
...acc,
networks: [...acc.networks, network],
};
}, {
networks: [],
sites: [],
});
return {
...acc,
networks: [...acc.networks, network],
};
}, {
networks: [],
sites: [],
});
}
async function findSiteByUrl(url) {
const { origin, hostname, pathname } = new URL(url);
// const domain = hostname.replace(/www.|tour./, '');
const dirUrl = `${origin}${pathname.split('/').slice(0, 2).join('/')}`; // allow for sites on URI directory
const { origin, hostname, pathname } = new URL(url);
// const domain = hostname.replace(/www.|tour./, '');
const dirUrl = `${origin}${pathname.split('/').slice(0, 2).join('/')}`; // allow for sites on URI directory
const site = await knex('sites')
.leftJoin('networks', 'sites.network_id', 'networks.id')
.select(
'sites.*',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
)
.where('sites.url', url)
.orWhere('sites.url', origin)
.orWhere('sites.url', origin.replace(/www\.|tour\./, ''))
.orWhere('sites.url', `https://www.${hostname}`)
.orWhere('sites.url', `http://www.${hostname}`)
.orWhere('sites.url', dirUrl)
// .orWhere('sites.url', 'like', `%${domain}`)
.first();
const site = await knex('sites')
.leftJoin('networks', 'sites.network_id', 'networks.id')
.select(
'sites.*',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
)
.where('sites.url', url)
.orWhere('sites.url', origin)
.orWhere('sites.url', origin.replace(/www\.|tour\./, ''))
.orWhere('sites.url', `https://www.${hostname}`)
.orWhere('sites.url', `http://www.${hostname}`)
.orWhere('sites.url', dirUrl)
// .orWhere('sites.url', 'like', `%${domain}`)
.first();
if (site) {
const curatedSite = curateSite(site, true, false);
if (site) {
const curatedSite = curateSite(site, true, false);
return curatedSite;
}
return curatedSite;
}
return null;
return null;
}
function sitesByNetwork(sites) {
const networks = sites.reduce((acc, site) => {
if (acc[site.network.slug]) {
acc[site.network.slug].sites = acc[site.network.slug].sites.concat(site);
const networks = sites.reduce((acc, site) => {
if (acc[site.network.slug]) {
acc[site.network.slug].sites = acc[site.network.slug].sites.concat(site);
return acc;
}
return acc;
}
acc[site.network.slug] = {
...site.network,
sites: [site],
};
acc[site.network.slug] = {
...site.network,
sites: [site],
};
return acc;
}, {});
return acc;
}, {});
return Object.values(networks);
return Object.values(networks);
}
async function fetchSitesFromArgv() {
const rawSites = await knex('sites')
.select(
'sites.*',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
)
.whereIn('sites.slug', argv.sites || [])
.orWhereIn('networks.slug', argv.networks || [])
.leftJoin('networks', 'sites.network_id', 'networks.id');
const rawSites = await knex('sites')
.select(
'sites.*',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
)
.whereIn('sites.slug', argv.sites || [])
.orWhereIn('networks.slug', argv.networks || [])
.leftJoin('networks', 'sites.network_id', 'networks.id');
const curatedSites = await curateSites(rawSites, true);
logger.info(`Found ${curatedSites.length} sites in database`);
const curatedSites = await curateSites(rawSites, true);
logger.info(`Found ${curatedSites.length} sites in database`);
return sitesByNetwork(curatedSites);
return sitesByNetwork(curatedSites);
}
async function fetchSitesFromConfig() {
const included = destructConfigNetworks(config.include);
const excluded = destructConfigNetworks(config.exclude);
const included = destructConfigNetworks(config.include);
const excluded = destructConfigNetworks(config.exclude);
const rawSites = await knex('sites')
.select(
'sites.*',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
)
.leftJoin('networks', 'sites.network_id', 'networks.id')
.where((builder) => {
if (config.include) {
builder
.whereIn('sites.slug', included.sites)
.orWhereIn('networks.slug', included.networks);
}
})
.whereNot((builder) => {
builder
.whereIn('sites.slug', excluded.sites)
.orWhereIn('networks.slug', excluded.networks);
});
const rawSites = await knex('sites')
.select(
'sites.*',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
)
.leftJoin('networks', 'sites.network_id', 'networks.id')
.where((builder) => {
if (config.include) {
builder
.whereIn('sites.slug', included.sites)
.orWhereIn('networks.slug', included.networks);
}
})
.whereNot((builder) => {
builder
.whereIn('sites.slug', excluded.sites)
.orWhereIn('networks.slug', excluded.networks);
});
const curatedSites = await curateSites(rawSites, true);
logger.info(`Found ${curatedSites.length} sites in database`);
const curatedSites = await curateSites(rawSites, true);
logger.info(`Found ${curatedSites.length} sites in database`);
return sitesByNetwork(curatedSites);
return sitesByNetwork(curatedSites);
}
async function fetchIncludedSites() {
if (argv.networks || argv.sites) {
return fetchSitesFromArgv();
}
if (argv.networks || argv.sites) {
return fetchSitesFromArgv();
}
return fetchSitesFromConfig();
return fetchSitesFromConfig();
}
async function fetchSites(queryObject) {
const sites = await knex('sites')
.where(builder => whereOr(queryObject, 'sites', builder))
.select(
'sites.*',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
)
.leftJoin('networks', 'sites.network_id', 'networks.id')
.limit(100);
const sites = await knex('sites')
.where(builder => whereOr(queryObject, 'sites', builder))
.select(
'sites.*',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
)
.leftJoin('networks', 'sites.network_id', 'networks.id')
.limit(100);
return curateSites(sites);
return curateSites(sites);
}
async function fetchSitesFromReleases() {
const sites = await knex('releases')
.select('site_id', '')
.leftJoin('sites', 'sites.id', 'releases.site_id')
.groupBy('sites.id')
.limit(100);
const sites = await knex('releases')
.select('site_id', '')
.leftJoin('sites', 'sites.id', 'releases.site_id')
.groupBy('sites.id')
.limit(100);
return curateSites(sites);
return curateSites(sites);
}
module.exports = {
curateSite,
curateSites,
fetchIncludedSites,
fetchSites,
fetchSitesFromConfig,
fetchSitesFromArgv,
fetchSitesFromReleases,
findSiteByUrl,
curateSite,
curateSites,
fetchIncludedSites,
fetchSites,
fetchSitesFromConfig,
fetchSitesFromArgv,
fetchSitesFromReleases,
findSiteByUrl,
};

View File

@@ -11,155 +11,164 @@ const { curateSite } = require('./sites');
const { associateReleaseMedia } = require('./media');
function curateReleaseEntry(release, batchId, existingRelease) {
const slug = slugify(release.title || release.actors?.join('-') || null, '-', {
encode: true,
limit: config.titleSlugLength,
});
const slug = slugify(release.title || release.actors?.join('-') || null, '-', {
encode: true,
limit: config.titleSlugLength,
});
const curatedRelease = {
title: release.title,
entry_id: release.entryId || null,
site_id: release.site.id,
shoot_id: release.shootId || null,
studio_id: release.studio?.id || null,
url: release.url,
date: release.date,
slug,
description: release.description,
duration: release.duration,
type: release.type,
// director: release.director,
// likes: release.rating && release.rating.likes,
// dislikes: release.rating && release.rating.dislikes,
// rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
deep: typeof release.deep === 'boolean' ? release.deep : false,
deep_url: release.deepUrl,
updated_batch_id: batchId,
};
const curatedRelease = {
title: release.title,
entry_id: release.entryId || null,
site_id: release.site?.id,
network_id: release.site ? null : release.network?.id, // prefer site ID if available
shoot_id: release.shootId || null,
studio_id: release.studio?.id || null,
url: release.url,
date: release.date,
slug,
description: release.description,
duration: release.duration,
type: release.type,
// director: release.director,
// likes: release.rating && release.rating.likes,
// dislikes: release.rating && release.rating.dislikes,
// rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
deep: typeof release.deep === 'boolean' ? release.deep : false,
deep_url: release.deepUrl,
updated_batch_id: batchId,
};
if (!existingRelease && !release.id) {
curatedRelease.created_batch_id = batchId;
}
if (!existingRelease && !release.id) {
curatedRelease.created_batch_id = batchId;
}
return curatedRelease;
return curatedRelease;
}
async function attachChannelSites(releases) {
const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isFallback));
const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isNetwork));
const channelSites = await knex('sites')
.leftJoin('networks', 'networks.id', 'sites.network_id')
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.parameters as network_parameters', 'networks.description as network_description')
.whereIn('sites.slug', releasesWithoutSite.map(release => release.channel));
const channelSites = await knex('sites')
.leftJoin('networks', 'networks.id', 'sites.network_id')
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.parameters as network_parameters', 'networks.description as network_description')
.whereIn('sites.slug', releasesWithoutSite.map(release => release.channel));
const channelSitesBySlug = channelSites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
const channelSitesBySlug = channelSites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
const releasesWithChannelSite = await Promise.all(releases
.map(async (release) => {
if (release.site && !release.site.isFallback) {
return release;
}
const releasesWithChannelSite = await Promise.all(releases
.map(async (release) => {
if (release.site && !release.site.isNetwork) {
return release;
}
if (release.channel && channelSitesBySlug[release.channel]) {
const curatedSite = await curateSite(channelSitesBySlug[release.channel]);
if (release.channel && channelSitesBySlug[release.channel]) {
const curatedSite = await curateSite(channelSitesBySlug[release.channel]);
return {
...release,
site: curatedSite,
};
}
return {
...release,
site: curatedSite,
};
}
logger.error(`Unable to match channel '${release.channel?.slug || release.channel}' from generic URL ${release.url}`);
if (release.site && release.site.isNetwork) {
return {
...release,
site: null,
network: release.site,
};
}
return null;
}));
logger.error(`Unable to match channel '${release.channel?.slug || release.channel}' from generic URL ${release.url}`);
return releasesWithChannelSite.filter(Boolean);
return null;
}));
return releasesWithChannelSite.filter(Boolean);
}
async function attachStudios(releases) {
const studioSlugs = releases.map(release => release.studio).filter(Boolean);
const studioSlugs = releases.map(release => release.studio).filter(Boolean);
const studios = await knex('studios').whereIn('slug', studioSlugs);
const studioBySlug = studios.reduce((acc, studio) => ({ ...acc, [studio.slug]: studio }), {});
const studios = await knex('studios').whereIn('slug', studioSlugs);
const studioBySlug = studios.reduce((acc, studio) => ({ ...acc, [studio.slug]: studio }), {});
const releasesWithStudio = releases.map((release) => {
if (release.studio && studioBySlug[release.studio]) {
return {
...release,
studio: studioBySlug[release.studio],
};
}
const releasesWithStudio = releases.map((release) => {
if (release.studio && studioBySlug[release.studio]) {
return {
...release,
studio: studioBySlug[release.studio],
};
}
if (release.studio) {
logger.warn(`Unable to match studio '${release.studio}' for ${release.url}`);
}
if (release.studio) {
logger.warn(`Unable to match studio '${release.studio}' for ${release.url}`);
}
return release;
});
return release;
});
return releasesWithStudio;
return releasesWithStudio;
}
function attachReleaseIds(releases, storedReleases) {
const storedReleaseIdsBySiteIdAndEntryId = storedReleases.reduce((acc, release) => {
if (!acc[release.site_id]) acc[release.site_id] = {};
acc[release.site_id][release.entry_id] = release.id;
const storedReleaseIdsBySiteIdAndEntryId = storedReleases.reduce((acc, release) => {
if (!acc[release.site_id]) acc[release.site_id] = {};
acc[release.site_id][release.entry_id] = release.id;
return acc;
}, {});
return acc;
}, {});
const releasesWithId = releases.map(release => ({
...release,
id: storedReleaseIdsBySiteIdAndEntryId[release.site.id][release.entryId],
}));
const releasesWithId = releases.map(release => ({
...release,
id: storedReleaseIdsBySiteIdAndEntryId[release.site.id][release.entryId],
}));
return releasesWithId;
return releasesWithId;
}
function filterInternalDuplicateReleases(releases) {
const releasesBySiteIdAndEntryId = releases.reduce((acc, release) => {
if (!acc[release.site.id]) {
acc[release.site.id] = {};
}
const releasesBySiteIdAndEntryId = releases.reduce((acc, release) => {
if (!acc[release.site.id]) {
acc[release.site.id] = {};
}
acc[release.site.id][release.entryId] = release;
acc[release.site.id][release.entryId] = release;
return acc;
}, {});
return acc;
}, {});
return Object.values(releasesBySiteIdAndEntryId)
.map(siteReleases => Object.values(siteReleases))
.flat();
return Object.values(releasesBySiteIdAndEntryId)
.map(siteReleases => Object.values(siteReleases))
.flat();
}
async function filterDuplicateReleases(releases) {
const internalUniqueReleases = filterInternalDuplicateReleases(releases);
const internalUniqueReleases = filterInternalDuplicateReleases(releases);
const duplicateReleaseEntries = await knex('releases')
.whereIn(['entry_id', 'site_id'], internalUniqueReleases.map(release => [release.entryId, release.site.id]));
const duplicateReleaseEntries = await knex('releases')
.whereIn(['entry_id', 'site_id'], internalUniqueReleases.map(release => [release.entryId, release.site.id]));
const duplicateReleasesBySiteIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => {
if (!acc[release.site_id]) acc[release.site_id] = {};
acc[release.site_id][release.entry_id] = true;
const duplicateReleasesBySiteIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => {
if (!acc[release.site_id]) acc[release.site_id] = {};
acc[release.site_id][release.entry_id] = true;
return acc;
}, {});
return acc;
}, {});
const duplicateReleases = internalUniqueReleases.filter(release => duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]);
const uniqueReleases = internalUniqueReleases.filter(release => !duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]);
const duplicateReleases = internalUniqueReleases.filter(release => duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]);
const uniqueReleases = internalUniqueReleases.filter(release => !duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]);
return {
uniqueReleases,
duplicateReleases,
duplicateReleaseEntries,
};
return {
uniqueReleases,
duplicateReleases,
duplicateReleaseEntries,
};
}
async function updateReleasesSearch(releaseIds) {
logger.info(`Updating search documents for ${releaseIds ? releaseIds.length : 'all' } releases`);
logger.info(`Updating search documents for ${releaseIds ? releaseIds.length : 'all' } releases`);
const documents = await knex.raw(`
const documents = await knex.raw(`
SELECT
releases.id AS release_id,
TO_TSVECTOR(
@@ -190,45 +199,49 @@ async function updateReleasesSearch(releaseIds) {
GROUP BY releases.id, sites.name, sites.slug, sites.alias, sites.url, networks.name, networks.slug, networks.url;
`, releaseIds && [releaseIds]);
if (documents.rows?.length > 0) {
const query = knex('releases_search').insert(documents.rows).toString();
await knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
}
if (documents.rows?.length > 0) {
const query = knex('releases_search').insert(documents.rows).toString();
await knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
}
}
async function storeReleases(releases) {
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
if (releases.length === 0) {
return [];
}
const releasesWithSites = await attachChannelSites(releases);
const releasesWithStudios = await attachStudios(releasesWithSites);
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
// uniqueness is site ID + entry ID, filter uniques after adding sites
const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios);
const releasesWithSites = await attachChannelSites(releases);
const releasesWithStudios = await attachStudios(releasesWithSites);
const curatedNewReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId));
// uniqueness is site ID + entry ID, filter uniques after adding sites
const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios);
const storedReleases = await knex('releases').insert(curatedNewReleaseEntries).returning('*');
// TODO: update duplicate releases
const curatedNewReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId));
const storedReleaseEntries = Array.isArray(storedReleases) ? storedReleases : [];
const releasesWithId = attachReleaseIds([].concat(uniqueReleases, duplicateReleases), [].concat(storedReleaseEntries, duplicateReleaseEntries));
const storedReleases = await knex('releases').insert(curatedNewReleaseEntries).returning('*');
// TODO: update duplicate releases
await Promise.all([
associateActors(releasesWithId, batchId),
associateReleaseTags(releasesWithId),
]);
const storedReleaseEntries = Array.isArray(storedReleases) ? storedReleases : [];
const releasesWithId = attachReleaseIds([].concat(uniqueReleases, duplicateReleases), [].concat(storedReleaseEntries, duplicateReleaseEntries));
// media is more error-prone, associate separately
await associateReleaseMedia(releasesWithId);
await Promise.all([
associateActors(releasesWithId, batchId),
associateReleaseTags(releasesWithId),
]);
logger.info(`Stored ${storedReleaseEntries.length} releases`);
// media is more error-prone, associate separately
await associateReleaseMedia(releasesWithId);
await updateReleasesSearch(releasesWithId.map(release => release.id));
logger.info(`Stored ${storedReleaseEntries.length} releases`);
return releasesWithId;
await updateReleasesSearch(releasesWithId.map(release => release.id));
return releasesWithId;
}
module.exports = {
storeReleases,
updateReleasesSearch,
storeReleases,
updateReleasesSearch,
};

View File

@@ -5,106 +5,106 @@ const knex = require('./knex');
const whereOr = require('./utils/where-or');
async function curateTag(tag) {
const [aliases, media] = await Promise.all([
knex('tags').where({ alias_for: tag.id }),
knex('media')
.where('domain', 'tags')
.andWhere('target_id', tag.id)
.orderBy('index'),
]);
const [aliases, media] = await Promise.all([
knex('tags').where({ alias_for: tag.id }),
knex('media')
.where('domain', 'tags')
.andWhere('target_id', tag.id)
.orderBy('index'),
]);
return {
id: tag.id,
name: tag.name,
slug: tag.slug,
description: tag.description,
poster: media.find(photo => photo.role === 'poster'),
photos: media.filter(photo => photo.role === 'photo'),
group: {
id: tag.group_id,
name: tag.group_name,
description: tag.group_description,
slug: tag.group_slug,
},
aliases: aliases.map(({ name }) => name),
};
return {
id: tag.id,
name: tag.name,
slug: tag.slug,
description: tag.description,
poster: media.find(photo => photo.role === 'poster'),
photos: media.filter(photo => photo.role === 'photo'),
group: {
id: tag.group_id,
name: tag.group_name,
description: tag.group_description,
slug: tag.group_slug,
},
aliases: aliases.map(({ name }) => name),
};
}
function curateTags(tags) {
return Promise.all(tags.map(async tag => curateTag(tag)));
return Promise.all(tags.map(async tag => curateTag(tag)));
}
async function matchTags(rawTags) {
const filteredTags = rawTags.filter(Boolean);
const filteredTags = rawTags.filter(Boolean);
const tags = filteredTags
.concat(filteredTags.map(tag => tag.toLowerCase()))
.concat(filteredTags.map(tag => tag.toUpperCase()));
const tags = filteredTags
.concat(filteredTags.map(tag => tag.toLowerCase()))
.concat(filteredTags.map(tag => tag.toUpperCase()));
const tagEntries = await knex('tags')
.pluck('aliases.id')
.whereIn('tags.name', tags)
.leftJoin('tags as aliases', function join() {
this
.on('tags.alias_for', 'aliases.id')
.orOn('tags.id', 'aliases.id');
})
.where(function where() {
this
.whereNull('tags.alias_for')
.orWhereNull('aliases.alias_for');
})
.groupBy('aliases.id');
const tagEntries = await knex('tags')
.pluck('aliases.id')
.whereIn('tags.name', tags)
.leftJoin('tags as aliases', function join() {
this
.on('tags.alias_for', 'aliases.id')
.orOn('tags.id', 'aliases.id');
})
.where(function where() {
this
.whereNull('tags.alias_for')
.orWhereNull('aliases.alias_for');
})
.groupBy('aliases.id');
return tagEntries;
return tagEntries;
}
async function associateTags(release, releaseId) {
const siteTags = release.site?.tags?.filter(tag => tag.inherit === true).map(tag => tag.id) || [];
const siteTags = release.site?.tags?.filter(tag => tag.inherit === true).map(tag => tag.id) || [];
const rawReleaseTags = release.tags?.filter(Boolean) || [];
const releaseTags = rawReleaseTags.some(tag => typeof tag === 'string')
? await matchTags(release.tags) // scraper returned raw tags
: rawReleaseTags; // tags already matched by (outdated) scraper
const rawReleaseTags = release.tags?.filter(Boolean) || [];
const releaseTags = rawReleaseTags.some(tag => typeof tag === 'string')
? await matchTags(release.tags) // scraper returned raw tags
: rawReleaseTags; // tags already matched by (outdated) scraper
const tags = Array.from(new Set(releaseTags.concat(siteTags)));
const tags = Array.from(new Set(releaseTags.concat(siteTags)));
if (tags.length === 0) {
logger.info(`No tags available for (${release.site.name}, ${releaseId}) "${release.title}"`);
return;
}
if (tags.length === 0) {
logger.info(`No tags available for (${release.site.name}, ${releaseId}) "${release.title}"`);
return;
}
const associationEntries = await knex('releases_tags')
.where('release_id', releaseId)
.whereIn('tag_id', tags);
const associationEntries = await knex('releases_tags')
.where('release_id', releaseId)
.whereIn('tag_id', tags);
const existingAssociations = new Set(associationEntries.map(association => association.tag_id));
const newAssociations = tags.filter(tagId => !existingAssociations.has(tagId));
const existingAssociations = new Set(associationEntries.map(association => association.tag_id));
const newAssociations = tags.filter(tagId => !existingAssociations.has(tagId));
await knex('releases_tags').insert(newAssociations.map(tagId => ({
tag_id: tagId,
release_id: releaseId,
})));
await knex('releases_tags').insert(newAssociations.map(tagId => ({
tag_id: tagId,
release_id: releaseId,
})));
}
async function fetchTags(queryObject, groupsQueryObject, limit = 100) {
const tags = await knex('tags')
.where(builder => whereOr(queryObject, 'tags', builder))
.orWhere(builder => whereOr(groupsQueryObject, 'tags_groups', builder))
.andWhere({ 'tags.alias_for': null })
.select(
'tags.*',
'tags_groups.id as group_id', 'tags_groups.name as group_name', 'tags_groups.slug as group_slug', 'tags_groups.description as groups_description',
)
.leftJoin('tags_groups', 'tags.group_id', 'tags_groups.id')
.orderBy('name')
.limit(limit);
const tags = await knex('tags')
.where(builder => whereOr(queryObject, 'tags', builder))
.orWhere(builder => whereOr(groupsQueryObject, 'tags_groups', builder))
.andWhere({ 'tags.alias_for': null })
.select(
'tags.*',
'tags_groups.id as group_id', 'tags_groups.name as group_name', 'tags_groups.slug as group_slug', 'tags_groups.description as groups_description',
)
.leftJoin('tags_groups', 'tags.group_id', 'tags_groups.id')
.orderBy('name')
.limit(limit);
return curateTags(tags);
return curateTags(tags);
}
module.exports = {
associateTags,
fetchTags,
matchTags,
associateTags,
fetchTags,
matchTags,
};

View File

@@ -4,104 +4,104 @@ const knex = require('./knex');
const slugify = require('./utils/slugify');
async function matchReleaseTags(releases) {
const rawTags = releases
.map(release => release.tags).flat()
.filter(Boolean);
const rawTags = releases
.map(release => release.tags).flat()
.filter(Boolean);
const casedTags = [...new Set(
rawTags
.concat(rawTags.map(tag => tag.toLowerCase()))
.concat(rawTags.map(tag => tag.toUpperCase())),
)];
const casedTags = [...new Set(
rawTags
.concat(rawTags.map(tag => tag.toLowerCase()))
.concat(rawTags.map(tag => tag.toUpperCase())),
)];
const tagEntries = await knex('tags')
.select('tags.id', 'tags.name', 'tags.alias_for')
.whereIn('tags.name', casedTags);
const tagEntries = await knex('tags')
.select('tags.id', 'tags.name', 'tags.alias_for')
.whereIn('tags.name', casedTags);
const tagIdsBySlug = tagEntries
.reduce((acc, tag) => ({
...acc,
[slugify(tag.name)]: tag.alias_for || tag.id,
}), {});
const tagIdsBySlug = tagEntries
.reduce((acc, tag) => ({
...acc,
[slugify(tag.name)]: tag.alias_for || tag.id,
}), {});
return tagIdsBySlug;
return tagIdsBySlug;
}
async function getSiteTags(releases) {
const siteIds = releases.map(release => release.site.id);
const siteTags = await knex('sites_tags').whereIn('site_id', siteIds);
const siteIds = releases.map(release => release.site.id);
const siteTags = await knex('sites_tags').whereIn('site_id', siteIds);
const siteTagIdsBySiteId = siteTags.reduce((acc, siteTag) => {
if (!acc[siteTag.site_id]) {
acc[siteTag.site_id] = [];
}
const siteTagIdsBySiteId = siteTags.reduce((acc, siteTag) => {
if (!acc[siteTag.site_id]) {
acc[siteTag.site_id] = [];
}
acc[siteTag.site_id].push(siteTag.tag_id);
acc[siteTag.site_id].push(siteTag.tag_id);
return acc;
}, {});
return acc;
}, {});
return siteTagIdsBySiteId;
return siteTagIdsBySiteId;
}
function buildReleaseTagAssociations(releases, tagIdsBySlug, siteTagIdsBySiteId) {
const tagAssociations = releases
.map((release) => {
const siteTagIds = siteTagIdsBySiteId[release.site.id];
const releaseTags = release.tags || [];
const tagAssociations = releases
.map((release) => {
const siteTagIds = siteTagIdsBySiteId[release.site.id];
const releaseTags = release.tags || [];
const releaseTagIds = releaseTags.every(tag => typeof tag === 'number')
? releaseTags // obsolete scraper returned pre-matched tags
: releaseTags.map(tag => tagIdsBySlug[slugify(tag)]);
const releaseTagIds = releaseTags.every(tag => typeof tag === 'number')
? releaseTags // obsolete scraper returned pre-matched tags
: releaseTags.map(tag => tagIdsBySlug[slugify(tag)]);
const tags = [...new Set(
// filter duplicates and empties
releaseTagIds
.concat(siteTagIds)
.filter(Boolean),
)]
.map(tagId => ({
release_id: release.id,
tag_id: tagId,
}));
const tags = [...new Set(
// filter duplicates and empties
releaseTagIds
.concat(siteTagIds)
.filter(Boolean),
)]
.map(tagId => ({
release_id: release.id,
tag_id: tagId,
}));
return tags;
})
.flat();
return tags;
})
.flat();
return tagAssociations;
return tagAssociations;
}
async function filterUniqueAssociations(tagAssociations) {
const duplicateAssociations = await knex('releases_tags')
.whereIn(['release_id', 'tag_id'], tagAssociations.map(association => [association.release_id, association.tag_id]));
const duplicateAssociations = await knex('releases_tags')
.whereIn(['release_id', 'tag_id'], tagAssociations.map(association => [association.release_id, association.tag_id]));
const duplicateAssociationsByReleaseIdAndTagId = duplicateAssociations.reduce((acc, association) => {
if (!acc[association.release_id]) {
acc[association.release_id] = {};
}
const duplicateAssociationsByReleaseIdAndTagId = duplicateAssociations.reduce((acc, association) => {
if (!acc[association.release_id]) {
acc[association.release_id] = {};
}
acc[association.release_id][association.tag_id] = true;
acc[association.release_id][association.tag_id] = true;
return acc;
}, {});
return acc;
}, {});
const uniqueAssociations = tagAssociations
.filter(association => !duplicateAssociationsByReleaseIdAndTagId[association.release_id]?.[association.tag_id]);
const uniqueAssociations = tagAssociations
.filter(association => !duplicateAssociationsByReleaseIdAndTagId[association.release_id]?.[association.tag_id]);
return uniqueAssociations;
return uniqueAssociations;
}
async function associateReleaseTags(releases) {
const tagIdsBySlug = await matchReleaseTags(releases);
const siteTagIdsBySiteId = await getSiteTags(releases);
const tagIdsBySlug = await matchReleaseTags(releases);
const siteTagIdsBySiteId = await getSiteTags(releases);
const tagAssociations = buildReleaseTagAssociations(releases, tagIdsBySlug, siteTagIdsBySiteId);
const uniqueAssociations = await filterUniqueAssociations(tagAssociations);
const tagAssociations = buildReleaseTagAssociations(releases, tagIdsBySlug, siteTagIdsBySiteId);
const uniqueAssociations = await filterUniqueAssociations(tagAssociations);
await knex('releases_tags').insert(uniqueAssociations);
await knex('releases_tags').insert(uniqueAssociations);
}
module.exports = {
associateReleaseTags,
associateReleaseTags,
};

View File

@@ -11,228 +11,228 @@ const scrapers = require('./scrapers/scrapers');
const { fetchSitesFromArgv, fetchSitesFromConfig } = require('./sites');
const afterDate = (() => {
if (/\d{2,4}-\d{2}-\d{2,4}/.test(argv.after)) {
// using date
return moment
.utc(argv.after, ['YYYY-MM-DD', 'DD-MM-YYYY'])
.toDate();
}
if (/\d{2,4}-\d{2}-\d{2,4}/.test(argv.after)) {
// using date
return moment
.utc(argv.after, ['YYYY-MM-DD', 'DD-MM-YYYY'])
.toDate();
}
// using time distance (e.g. "1 month")
return moment
.utc()
.subtract(...argv.after.split(' '))
.toDate();
// using time distance (e.g. "1 month")
return moment
.utc()
.subtract(...argv.after.split(' '))
.toDate();
})();
async function filterUniqueReleases(latestReleases, accReleases) {
const latestReleaseIdentifiers = latestReleases
.map(release => [release.site.id, release.entryId]);
const latestReleaseIdentifiers = latestReleases
.map(release => [release.site.id, release.entryId]);
const duplicateReleases = await knex('releases')
.whereIn(['site_id', 'entry_id'], latestReleaseIdentifiers);
const duplicateReleases = await knex('releases')
.whereIn(['site_id', 'entry_id'], latestReleaseIdentifiers);
// add entry IDs of accumulated releases to prevent an infinite scrape loop
// when one page contains the same release as the previous
const duplicateReleasesSiteIdAndEntryIds = duplicateReleases
.concat(accReleases)
.reduce((acc, release) => {
const siteId = release.site_id || release.site.id;
const entryId = release.entry_id || release.entryId;
// add entry IDs of accumulated releases to prevent an infinite scrape loop
// when one page contains the same release as the previous
const duplicateReleasesSiteIdAndEntryIds = duplicateReleases
.concat(accReleases)
.reduce((acc, release) => {
const siteId = release.site_id || release.site.id;
const entryId = release.entry_id || release.entryId;
if (!acc[siteId]) acc[siteId] = {};
acc[siteId][entryId] = true;
if (!acc[siteId]) acc[siteId] = {};
acc[siteId][entryId] = true;
return acc;
}, {});
return acc;
}, {});
const uniqueReleases = latestReleases
.filter(release => !duplicateReleasesSiteIdAndEntryIds[release.site.id]?.[release.entryId]);
const uniqueReleases = latestReleases
.filter(release => !duplicateReleasesSiteIdAndEntryIds[release.site.id]?.[release.entryId]);
return uniqueReleases;
return uniqueReleases;
}
function needNextPage(uniqueReleases, pageAccReleases) {
if (uniqueReleases.length === 0) {
return false;
}
if (uniqueReleases.length === 0) {
return false;
}
if (argv.last && pageAccReleases.length < argv.last) {
// request for last N releases not yet satisfied
return true;
}
if (argv.last && pageAccReleases.length < argv.last) {
// request for last N releases not yet satisfied
return true;
}
if (uniqueReleases.every(release => !!release.date)) {
const oldestReleaseOnPage = uniqueReleases
.sort((releaseA, releaseB) => releaseB.date - releaseA.date)
.slice(-1)[0];
if (uniqueReleases.every(release => !!release.date)) {
const oldestReleaseOnPage = uniqueReleases
.sort((releaseA, releaseB) => releaseB.date - releaseA.date)
.slice(-1)[0];
if (moment(oldestReleaseOnPage.date).isAfter(afterDate)) {
// oldest release on page is newer than the specified date cut-off
return true;
}
}
if (moment(oldestReleaseOnPage.date).isAfter(afterDate)) {
// oldest release on page is newer than the specified date cut-off
return true;
}
}
// dates missing, and limit for scenes without dates not yet reached
return pageAccReleases.length <= argv.nullDateLimit;
// dates missing, and limit for scenes without dates not yet reached
return pageAccReleases.length <= argv.nullDateLimit;
}
async function scrapeReleases(scraper, site, preData, upcoming = false) {
const scrapePage = async (page = 1, accReleases = []) => {
const latestReleases = upcoming
? await scraper.fetchUpcoming(site, page, preData, include)
: await scraper.fetchLatest(site, page, preData, include);
const scrapePage = async (page = 1, accReleases = []) => {
const latestReleases = upcoming
? await scraper.fetchUpcoming(site, page, preData, include)
: await scraper.fetchLatest(site, page, preData, include);
if (!Array.isArray(latestReleases)) {
// scraper is unable to fetch the releases and returned a HTTP code or null
logger.warn(`Scraper returned ${latestReleases} when fetching latest from '${site.name}' (${site.network.name})`);
return accReleases;
}
if (!Array.isArray(latestReleases)) {
// scraper is unable to fetch the releases and returned a HTTP code or null
logger.warn(`Scraper returned ${latestReleases} when fetching latest from '${site.name}' (${site.network.name})`);
return accReleases;
}
if (latestReleases.length === 0) {
// scraper successfully requested releases, but found none
return accReleases;
}
if (latestReleases.length === 0) {
// scraper successfully requested releases, but found none
return accReleases;
}
const latestReleasesWithSite = latestReleases.map(release => ({ ...release, site: release.site || site })); // attach site release is assigned to when stored
const latestReleasesWithSite = latestReleases.map(release => ({ ...release, site: release.site || site })); // attach site release is assigned to when stored
const uniqueReleases = argv.redownload
? latestReleasesWithSite
: await filterUniqueReleases(latestReleasesWithSite, accReleases);
const uniqueReleases = argv.redownload
? latestReleasesWithSite
: await filterUniqueReleases(latestReleasesWithSite, accReleases);
const pageAccReleases = accReleases.concat(uniqueReleases);
const pageAccReleases = accReleases.concat(uniqueReleases);
logger.verbose(`Scraped '${site.name}' (${site.network.name}) ${upcoming ? 'upcoming' : 'latest'} page ${page}, found ${uniqueReleases.length} unique updates`);
logger.verbose(`Scraped '${site.name}' (${site.network.name}) ${upcoming ? 'upcoming' : 'latest'} page ${page}, found ${uniqueReleases.length} unique updates`);
if (needNextPage(uniqueReleases, pageAccReleases)) {
return scrapePage(page + 1, pageAccReleases);
}
if (needNextPage(uniqueReleases, pageAccReleases)) {
return scrapePage(page + 1, pageAccReleases);
}
return pageAccReleases;
};
return pageAccReleases;
};
const rawReleases = await scrapePage(argv.page || 1, []);
const releases = upcoming
? rawReleases.map(rawRelease => ({ ...rawRelease, upcoming: true }))
: rawReleases;
const rawReleases = await scrapePage(argv.page || 1, []);
const releases = upcoming
? rawReleases.map(rawRelease => ({ ...rawRelease, upcoming: true }))
: rawReleases;
if (argv.last) {
return releases.slice(0, argv.last);
}
if (argv.last) {
return releases.slice(0, argv.last);
}
if (releases.every(release => release.date)) {
return releases.filter(release => moment(release.date).isAfter(afterDate));
}
if (releases.every(release => release.date)) {
return releases.filter(release => moment(release.date).isAfter(afterDate));
}
return releases.slice(0, argv.nullDateLimit);
return releases.slice(0, argv.nullDateLimit);
}
async function scrapeLatestReleases(scraper, site, preData) {
if (!scraper.fetchLatest) {
return [];
}
if (!scraper.fetchLatest) {
return [];
}
try {
return await scrapeReleases(scraper, site, preData, false);
} catch (error) {
logger.warn(`Failed to scrape latest updates for '${site.slug}' (${site.network.slug}): ${error.message}`);
}
try {
return await scrapeReleases(scraper, site, preData, false);
} catch (error) {
logger.warn(`Failed to scrape latest updates for '${site.slug}' (${site.network.slug}): ${error.message}`);
}
return [];
return [];
}
async function scrapeUpcomingReleases(scraper, site, preData) {
if (!scraper.fetchUpcoming) {
return [];
}
if (!scraper.fetchUpcoming) {
return [];
}
try {
return await scrapeReleases(scraper, site, preData, true);
} catch (error) {
logger.warn(`Failed to scrape upcoming updates for '${site.slug}' (${site.network.slug}): ${error.message}`);
}
try {
return await scrapeReleases(scraper, site, preData, true);
} catch (error) {
logger.warn(`Failed to scrape upcoming updates for '${site.slug}' (${site.network.slug}): ${error.message}`);
}
return [];
return [];
}
async function scrapeSiteReleases(scraper, site, preData) {
const [latestReleases, upcomingReleases] = await Promise.all([
argv.latest
? scrapeLatestReleases(scraper, site, preData)
: [],
argv.upcoming
? scrapeUpcomingReleases(scraper, site, preData)
: [],
]);
const [latestReleases, upcomingReleases] = await Promise.all([
argv.latest
? scrapeLatestReleases(scraper, site, preData)
: [],
argv.upcoming
? scrapeUpcomingReleases(scraper, site, preData)
: [],
]);
logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${site.name}' (${site.network.name})`);
logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${site.name}' (${site.network.name})`);
return [...latestReleases, ...upcomingReleases];
return [...latestReleases, ...upcomingReleases];
}
async function scrapeSite(site, accSiteReleases) {
const scraper = scrapers.releases[site.slug]
const scraper = scrapers.releases[site.slug]
|| scrapers.releases[site.network.slug]
|| scrapers.releases[site.network.parent?.slug];
if (!scraper) {
logger.warn(`No scraper found for '${site.name}' (${site.network.name})`);
return [];
}
if (!scraper) {
logger.warn(`No scraper found for '${site.name}' (${site.network.name})`);
return [];
}
try {
const beforeFetchLatest = await scraper.beforeFetchLatest?.(site);
try {
const beforeFetchLatest = await scraper.beforeFetchLatest?.(site);
const siteReleases = await scrapeSiteReleases(scraper, site, {
accSiteReleases,
beforeFetchLatest,
});
const siteReleases = await scrapeSiteReleases(scraper, site, {
accSiteReleases,
beforeFetchLatest,
});
return siteReleases.map(release => ({ ...release, site }));
} catch (error) {
logger.error(`Failed to scrape releases from ${site.name} using ${scraper.slug}: ${error.message}`);
return siteReleases.map(release => ({ ...release, site }));
} catch (error) {
logger.error(`Failed to scrape releases from ${site.name} using ${scraper.slug}: ${error.message}`);
return [];
}
return [];
}
}
async function scrapeNetworkSequential(network) {
return Promise.reduce(
network.sites,
async (chain, site) => {
const accSiteReleases = await chain;
const siteReleases = await scrapeSite(site, network, accSiteReleases);
return Promise.reduce(
network.sites,
async (chain, site) => {
const accSiteReleases = await chain;
const siteReleases = await scrapeSite(site, network, accSiteReleases);
return accSiteReleases.concat(siteReleases);
},
Promise.resolve([]),
);
return accSiteReleases.concat(siteReleases);
},
Promise.resolve([]),
);
}
async function scrapeNetworkParallel(network) {
return Promise.map(
network.sites,
async site => scrapeSite(site, network),
{ concurrency: 3 },
);
return Promise.map(
network.sites,
async site => scrapeSite(site, network),
{ concurrency: 3 },
);
}
async function fetchUpdates() {
const includedNetworks = argv.sites || argv.networks
? await fetchSitesFromArgv()
: await fetchSitesFromConfig();
const includedNetworks = argv.sites || argv.networks
? await fetchSitesFromArgv()
: await fetchSitesFromConfig();
const scrapedNetworks = await Promise.map(
includedNetworks,
async network => (network.parameters?.sequential
? scrapeNetworkSequential(network)
: scrapeNetworkParallel(network)),
{ concurrency: 5 },
);
const scrapedNetworks = await Promise.map(
includedNetworks,
async network => (network.parameters?.sequential
? scrapeNetworkSequential(network)
: scrapeNetworkParallel(network)),
{ concurrency: 5 },
);
const releases = scrapedNetworks.flat(2);
const releases = scrapedNetworks.flat(2);
return releases;
return releases;
}
module.exports = fetchUpdates;

View File

@@ -1,20 +1,20 @@
'use strict';
function include(argv) {
return {
covers: argv.media && argv.covers,
media: argv.media,
photos: argv.media && argv.photos,
poster: argv.media && argv.posters,
posters: argv.media && argv.posters,
releases: argv.withReleases,
scenes: argv.withReleases,
teaser: argv.media && argv.videos && argv.teasers,
teasers: argv.media && argv.videos && argv.teasers,
trailer: argv.media && argv.videos && argv.trailers,
trailers: argv.media && argv.videos && argv.trailers,
videos: argv.videos,
};
return {
covers: argv.media && argv.covers,
media: argv.media,
photos: argv.media && argv.photos,
poster: argv.media && argv.posters,
posters: argv.media && argv.posters,
releases: argv.withReleases,
scenes: argv.withReleases,
teaser: argv.media && argv.videos && argv.teasers,
teasers: argv.media && argv.videos && argv.teasers,
trailer: argv.media && argv.videos && argv.trailers,
trailers: argv.media && argv.videos && argv.trailers,
videos: argv.videos,
};
}
module.exports = include;

View File

@@ -13,106 +13,106 @@ const file = 'https://speed.hetzner.de/100MB.bin';
// const file = 'https://speed.hetzner.de/10GB.bin';
function getMemoryUsage() {
return process.memoryUsage().rss / (10 ** 6);
return process.memoryUsage().rss / (10 ** 6);
}
const stats = {
peakMemoryUsage: getMemoryUsage(),
done: false,
downloads: {},
peakMemoryUsage: getMemoryUsage(),
done: false,
downloads: {},
};
function render() {
const downloads = Object.entries(stats.downloads);
const downloads = Object.entries(stats.downloads);
process.stdout.clearScreenDown();
process.stdout.clearScreenDown();
process.stdout.write(`peak memory: ${stats.peakMemoryUsage.toFixed(2)} MB\n`);
process.stdout.write(`peak memory: ${stats.peakMemoryUsage.toFixed(2)} MB\n`);
downloads.forEach(([download, progress]) => {
process.stdout.write(`${download}: ${progress}${typeof progress === 'string' ? '' : '%'}\n`);
});
downloads.forEach(([download, progress]) => {
process.stdout.write(`${download}: ${progress}${typeof progress === 'string' ? '' : '%'}\n`);
});
process.stdout.moveCursor(0, -(downloads.length + 1));
process.stdout.cursorTo(0);
process.stdout.moveCursor(0, -(downloads.length + 1));
process.stdout.cursorTo(0);
if (downloads.length === 0 || !downloads.every(([_label, download]) => typeof download === 'string')) {
setTimeout(() => render(), 1000);
return;
}
if (downloads.length === 0 || !downloads.every(([_label, download]) => typeof download === 'string')) {
setTimeout(() => render(), 1000);
return;
}
process.stdout.moveCursor(0, downloads.length + 1);
process.stdout.moveCursor(0, downloads.length + 1);
}
function setProgress(label, completedBytes, totalBytes, hash) {
const memory = getMemoryUsage();
const memory = getMemoryUsage();
stats.peakMemoryUsage = Math.max(memory, stats.peakMemoryUsage);
stats.downloads[label] = hash || Math.round((completedBytes / totalBytes) * 100);
stats.peakMemoryUsage = Math.max(memory, stats.peakMemoryUsage);
stats.downloads[label] = hash || Math.round((completedBytes / totalBytes) * 100);
}
async function buffered(label) {
const hash = new blake2.Hash('blake2b');
const hash = new blake2.Hash('blake2b');
const imageRes = await bhttp.get(file, {
onDownloadProgress(completedBytes, totalBytes) {
setProgress(label, completedBytes, totalBytes);
},
});
const imageRes = await bhttp.get(file, {
onDownloadProgress(completedBytes, totalBytes) {
setProgress(label, completedBytes, totalBytes);
},
});
hash.update(imageRes.body);
setProgress(label, null, null, hash.digest('hex'));
hash.update(imageRes.body);
setProgress(label, null, null, hash.digest('hex'));
await fsPromises.writeFile(`/mnt/stor/Pictures/traxxx/temp/buffered-${label}.bin`, imageRes.body);
await fsPromises.writeFile(`/mnt/stor/Pictures/traxxx/temp/buffered-${label}.bin`, imageRes.body);
}
async function streamed(label) {
const hash = new blake2.Hash('blake2b');
hash.setEncoding('hex');
const hash = new blake2.Hash('blake2b');
hash.setEncoding('hex');
const hashStream = new PassThrough();
const targetStream = fs.createWriteStream(`/mnt/stor/Pictures/traxxx/temp/streamed-${label}.bin`);
const hashStream = new PassThrough();
const targetStream = fs.createWriteStream(`/mnt/stor/Pictures/traxxx/temp/streamed-${label}.bin`);
const imageRes = await bhttp.get(file, {
stream: true,
});
const imageRes = await bhttp.get(file, {
stream: true,
});
const stream = imageRes
.pipe(hashStream)
.pipe(targetStream);
const stream = imageRes
.pipe(hashStream)
.pipe(targetStream);
imageRes.on('progress', (completedBytes, totalBytes) => {
setProgress(label, completedBytes, totalBytes);
});
imageRes.on('progress', (completedBytes, totalBytes) => {
setProgress(label, completedBytes, totalBytes);
});
hashStream.on('data', (chunk) => {
hash.write(chunk);
});
hashStream.on('data', (chunk) => {
hash.write(chunk);
});
stream.on('finish', () => {
hash.end();
setProgress(label, null, null, hash.read());
});
stream.on('finish', () => {
hash.end();
setProgress(label, null, null, hash.read());
});
}
async function init() {
const n = argv.n || 1;
const n = argv.n || 1;
if (argv._.includes('stream')) {
console.log('using streams');
render();
if (argv._.includes('stream')) {
console.log('using streams');
render();
await Promise.map(Array.from({ length: n }), async (value, index) => streamed(index + 1));
await Promise.map(Array.from({ length: n }), async (value, index) => streamed(index + 1));
return;
}
return;
}
if (argv._.includes('buffer')) {
console.log('using buffers');
render();
if (argv._.includes('buffer')) {
console.log('using buffers');
render();
await Promise.map(Array.from({ length: n }), async (value, index) => buffered(index + 1));
}
await Promise.map(Array.from({ length: n }), async (value, index) => buffered(index + 1));
}
}
init();

View File

@@ -1,16 +1,16 @@
'use strict';
function capitalize(string, trim = true) {
if (!string) {
return '';
}
if (!string) {
return '';
}
const capitalized = string
.split(/\s+/)
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
.join(' ');
const capitalized = string
.split(/\s+/)
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
.join(' ');
return trim ? capitalized.trim() : capitalized;
return trim ? capitalized.trim() : capitalized;
}
module.exports = capitalize;

View File

@@ -1,8 +1,8 @@
'use strict';
function chunk(array, chunkSize) {
return Array.from({ length: Math.ceil(array.length / chunkSize) })
.map((value, index) => array.slice(index * chunkSize, (index * chunkSize) + chunkSize));
return Array.from({ length: Math.ceil(array.length / chunkSize) })
.map((value, index) => array.slice(index * chunkSize, (index * chunkSize) + chunkSize));
}
module.exports = chunk;

View File

@@ -1,48 +1,48 @@
'use strict';
function inchesToCm(inches) {
return Math.round(Number(inches) * 2.54);
return Math.round(Number(inches) * 2.54);
}
function feetInchesToCm(feet, inches) {
if (typeof feet === 'string' && !inches) {
const [feetPart, inchesPart] = feet.match(/\d+/g);
return feetInchesToCm(feetPart, inchesPart);
}
if (typeof feet === 'string' && !inches) {
const [feetPart, inchesPart] = feet.match(/\d+/g);
return feetInchesToCm(feetPart, inchesPart);
}
return Math.round((Number(feet) * 30.48) + (Number(inches) * 2.54));
return Math.round((Number(feet) * 30.48) + (Number(inches) * 2.54));
}
function cmToFeetInches(centimeters) {
const feet = Math.floor(centimeters / 30.48);
const inches = Math.round((centimeters / 2.54) % (feet * 12));
const feet = Math.floor(centimeters / 30.48);
const inches = Math.round((centimeters / 2.54) % (feet * 12));
return { feet, inches };
return { feet, inches };
}
function heightToCm(height) {
const [feet, inches] = height.match(/\d+/g);
const [feet, inches] = height.match(/\d+/g);
return feetInchesToCm(feet, inches);
return feetInchesToCm(feet, inches);
}
function lbsToKg(lbs) {
const pounds = lbs.toString().match(/\d+/)[0];
const pounds = lbs.toString().match(/\d+/)[0];
return Math.round(Number(pounds) * 0.453592);
return Math.round(Number(pounds) * 0.453592);
}
function kgToLbs(kgs) {
const kilos = kgs.toString().match(/\d+/)[0];
const kilos = kgs.toString().match(/\d+/)[0];
return Math.round(Number(kilos) / 0.453592);
return Math.round(Number(kilos) / 0.453592);
}
module.exports = {
cmToFeetInches,
feetInchesToCm,
heightToCm,
inchesToCm,
lbsToKg,
kgToLbs,
cmToFeetInches,
feetInchesToCm,
heightToCm,
inchesToCm,
lbsToKg,
kgToLbs,
};

View File

@@ -1,16 +1,16 @@
'use strict';
function cookieToData(cookieString) {
return cookieString.split('; ').reduce((acc, cookie) => {
const [key, value] = cookie.split('=');
return cookieString.split('; ').reduce((acc, cookie) => {
const [key, value] = cookie.split('=');
return {
...acc,
[key]: value,
};
}, {});
return {
...acc,
[key]: value,
};
}, {});
}
module.exports = {
cookieToData,
cookieToData,
};

View File

@@ -1,10 +1,10 @@
function escapeHtml(text) {
return text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
return text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
}
module.exports = escapeHtml;

View File

@@ -11,107 +11,107 @@ const pipeline = util.promisify(stream.pipeline);
const logger = require('../logger')(__filename);
const defaultHeaders = {
'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1',
'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1',
};
const defaultOptions = {
responseTimeout: 30000,
responseTimeout: 30000,
};
const proxyAgent = tunnel.httpsOverHttp({
proxy: {
host: config.proxy.host,
port: config.proxy.port,
},
proxy: {
host: config.proxy.host,
port: config.proxy.port,
},
});
function useProxy(url) {
if (!config.proxy.enable) {
return false;
}
if (!config.proxy.enable) {
return false;
}
const { hostname } = new URL(url);
return config.proxy.hostnames.includes(hostname);
const { hostname } = new URL(url);
return config.proxy.hostnames.includes(hostname);
}
const queue = taskQueue();
queue.on('concurrencyReached:http', () => {
logger.silly('Queueing requests');
logger.silly('Queueing requests');
});
queue.define('http', async ({
url,
method = 'GET',
body,
headers = {},
options = {},
url,
method = 'GET',
body,
headers = {},
options = {},
}) => {
if (body) {
logger.silly(`${method.toUpperCase()} ${url} with ${JSON.stringify(body)}`);
} else {
logger.silly(`${method.toUpperCase()} ${url}`);
}
if (body) {
logger.silly(`${method.toUpperCase()} ${url} with ${JSON.stringify(body)}`);
} else {
logger.silly(`${method.toUpperCase()} ${url}`);
}
const reqOptions = {
headers: {
...(options.defaultHeaders !== false && defaultHeaders),
...headers,
},
...defaultOptions,
...options,
...(options.timeout && { responseTimeout: options.timeout }),
};
const reqOptions = {
headers: {
...(options.defaultHeaders !== false && defaultHeaders),
...headers,
},
...defaultOptions,
...options,
...(options.timeout && { responseTimeout: options.timeout }),
};
if (useProxy(url)) {
reqOptions.agent = proxyAgent;
}
if (useProxy(url)) {
reqOptions.agent = proxyAgent;
}
const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())
? await bhttp[method.toLowerCase()](url, body, reqOptions)
: await bhttp[method.toLowerCase()](url, reqOptions);
const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())
? await bhttp[method.toLowerCase()](url, body, reqOptions)
: await bhttp[method.toLowerCase()](url, reqOptions);
if (options.stream && options.destination) {
await pipeline(res, ...(options.transforms || []), options.destination);
}
if (options.stream && options.destination) {
await pipeline(res, ...(options.transforms || []), options.destination);
}
const html = Buffer.isBuffer(res.body) ? res.body.toString() : null;
const json = Buffer.isBuffer(res.body) ? null : res.body;
const html = Buffer.isBuffer(res.body) ? res.body.toString() : null;
const json = Buffer.isBuffer(res.body) ? null : res.body;
return {
...res,
originalRes: res,
html,
json,
pipe: res.pipe,
ok: res.statusCode >= 200 && res.statusCode <= 299,
code: res.statusCode,
status: res.statusCode,
};
return {
...res,
originalRes: res,
html,
json,
pipe: res.pipe,
ok: res.statusCode >= 200 && res.statusCode <= 299,
code: res.statusCode,
status: res.statusCode,
};
}, {
concurrency: 20,
concurrency: 20,
});
async function get(url, headers, options) {
return queue.push('http', {
method: 'GET',
url,
headers,
options,
});
return queue.push('http', {
method: 'GET',
url,
headers,
options,
});
}
async function post(url, body, headers, options) {
return queue.push('http', {
method: 'POST',
url,
body,
headers,
options,
});
return queue.push('http', {
method: 'POST',
url,
body,
headers,
options,
});
}
module.exports = {
get,
post,
get,
post,
};

View File

@@ -7,12 +7,12 @@ const { argv } = require('yargs');
const url = argv.url || 'http://localhost:5000/media/actors/tommy-pistol/1580341442712.jpeg';
async function scan() {
console.log(url);
console.log(url);
const res = await bhttp.get(url);
const stats = await sharp(res.body).stats();
const res = await bhttp.get(url);
const stats = await sharp(res.body).stats();
console.log(stats);
console.log(stats);
}
scan();

View File

@@ -4,33 +4,33 @@ const Promise = require('bluebird');
const knex = require('../knex');
async function listSites() {
const [networks, allSites] = await Promise.all([
knex('networks').orderBy('name'),
knex('sites').orderBy('name'),
]);
const [networks, allSites] = await Promise.all([
knex('networks').orderBy('name'),
knex('sites').orderBy('name'),
]);
await Promise.each(networks, async (network) => {
console.log(`* **${network.name}**`);
await Promise.each(networks, async (network) => {
console.log(`* **${network.name}**`);
const sites = await knex('sites')
.where({ network_id: network.id })
.orderBy('name');
const sites = await knex('sites')
.where({ network_id: network.id })
.orderBy('name');
if (sites.length === 1 && sites[0].name === network.name) {
return;
}
if (sites.length === 1 && sites[0].name === network.name) {
return;
}
sites.forEach((site) => {
const rkSpecial = network.id === 'realitykings'
sites.forEach((site) => {
const rkSpecial = network.id === 'realitykings'
&& (new URL(site.url).hostname === 'www.realitykings.com'
|| (site.parameters?.altLayout))
? '\\*' : ''; // Reality Kings alt layout sites do not support scene fetch by URL
? '\\*' : ''; // Reality Kings alt layout sites do not support scene fetch by URL
console.log(` * ${site.name}${rkSpecial}`);
});
});
console.log(` * ${site.name}${rkSpecial}`);
});
});
console.log(`${networks.length} networks with ${allSites.length} sites total`);
console.log(`${networks.length} networks with ${allSites.length} sites total`);
}
listSites();

View File

@@ -12,99 +12,99 @@ const { PassThrough } = require('stream');
const http = require('./http');
function getMemoryUsage() {
return process.memoryUsage().rss / (10 ** 6);
return process.memoryUsage().rss / (10 ** 6);
}
let peakMemoryUsage = getMemoryUsage();
async function fetchSource(link) {
const id = nanoid();
const id = nanoid();
const hasher = new blake2.Hash('blake2b');
hasher.setEncoding('hex');
const hasher = new blake2.Hash('blake2b');
hasher.setEncoding('hex');
const tempFilePath = `/home/niels/Pictures/thumbs/temp/${id}.jpeg`;
const tempFileStream = fs.createWriteStream(tempFilePath);
const hashStream = new PassThrough();
const tempFilePath = `/home/niels/Pictures/thumbs/temp/${id}.jpeg`;
const tempFileStream = fs.createWriteStream(tempFilePath);
const hashStream = new PassThrough();
hashStream.on('data', chunk => hasher.write(chunk));
hashStream.on('data', chunk => hasher.write(chunk));
try {
const res = await http.get(link, null, {
stream: true,
transforms: [hashStream],
destination: tempFileStream,
timeout: 5000,
});
try {
const res = await http.get(link, null, {
stream: true,
transforms: [hashStream],
destination: tempFileStream,
timeout: 5000,
});
if (!res.ok) {
throw new Error(res.status);
}
if (!res.ok) {
throw new Error(res.status);
}
hasher.end();
const hash = hasher.read();
hasher.end();
const hash = hasher.read();
const memoryUsage = getMemoryUsage();
peakMemoryUsage = Math.max(memoryUsage, peakMemoryUsage);
const memoryUsage = getMemoryUsage();
peakMemoryUsage = Math.max(memoryUsage, peakMemoryUsage);
console.log(`Stored ${tempFilePath}, memory usage: ${memoryUsage.toFixed(2)} MB`);
console.log(`Stored ${tempFilePath}, memory usage: ${memoryUsage.toFixed(2)} MB`);
return {
id,
path: tempFilePath,
hash,
};
} catch (error) {
await fsPromises.unlink(tempFilePath);
return {
id,
path: tempFilePath,
hash,
};
} catch (error) {
await fsPromises.unlink(tempFilePath);
throw error;
}
throw error;
}
}
async function init() {
const linksFile = await fsPromises.readFile('/home/niels/Pictures/photos', 'utf8');
const links = linksFile.split('\n').filter(Boolean);
const linksFile = await fsPromises.readFile('/home/niels/Pictures/photos', 'utf8');
const links = linksFile.split('\n').filter(Boolean);
await fsPromises.mkdir('/home/niels/Pictures/thumbs/temp', { recursive: true });
await fsPromises.mkdir('/home/niels/Pictures/thumbs/temp', { recursive: true });
console.time('thumbs');
console.time('thumbs');
const files = await Promise.map(links, async (link) => {
try {
return await fetchSource(link);
} catch (error) {
console.log(`Failed to fetch ${link}: ${error.message}`);
return null;
}
});
const files = await Promise.map(links, async (link) => {
try {
return await fetchSource(link);
} catch (error) {
console.log(`Failed to fetch ${link}: ${error.message}`);
return null;
}
});
await Promise.map(files.filter(Boolean), async (file) => {
const image = sharp(file.path).jpeg();
await Promise.map(files.filter(Boolean), async (file) => {
const image = sharp(file.path).jpeg();
const [{ width, height }, { size }] = await Promise.all([
image.metadata(),
fsPromises.stat(file.path),
]);
const [{ width, height }, { size }] = await Promise.all([
image.metadata(),
fsPromises.stat(file.path),
]);
await Promise.all([
image
.toFile(`/home/niels/Pictures/thumbs/${file.hash}.jpeg`),
image
.resize({
height: config.media.thumbnailSize,
withoutEnlargement: true,
})
.toFile(`/home/niels/Pictures/thumbs/${file.hash}_thumb.jpeg`),
]);
await Promise.all([
image
.toFile(`/home/niels/Pictures/thumbs/${file.hash}.jpeg`),
image
.resize({
height: config.media.thumbnailSize,
withoutEnlargement: true,
})
.toFile(`/home/niels/Pictures/thumbs/${file.hash}_thumb.jpeg`),
]);
const memoryUsage = getMemoryUsage();
peakMemoryUsage = Math.max(memoryUsage, peakMemoryUsage);
const memoryUsage = getMemoryUsage();
peakMemoryUsage = Math.max(memoryUsage, peakMemoryUsage);
console.log(`Resized ${file.id} (${width}, ${height}, ${size}), memory usage: ${memoryUsage.toFixed(2)} MB`);
}, { concurrency: 10 });
console.log(`Resized ${file.id} (${width}, ${height}, ${size}), memory usage: ${memoryUsage.toFixed(2)} MB`);
}, { concurrency: 10 });
console.log(`Peak memory usage: ${peakMemoryUsage.toFixed(2)} MB`);
console.timeEnd('thumbs');
console.log(`Peak memory usage: ${peakMemoryUsage.toFixed(2)} MB`);
console.timeEnd('thumbs');
}
init();

View File

@@ -6,16 +6,16 @@ const bhttp = require('bhttp');
const knex = require('../knex');
async function run() {
const network = await knex('networks').where('slug', 'mofos').first();
const sites = await knex('sites').where('network_id', network.id);
const network = await knex('networks').where('slug', 'mofos').first();
const sites = await knex('sites').where('network_id', network.id);
await Promise.map(sites, async (site) => {
const res = await bhttp.get(site.url);
await Promise.map(sites, async (site) => {
const res = await bhttp.get(site.url);
console.log(site.url, res.statusCode);
}, {
concurrency: 5,
});
console.log(site.url, res.statusCode);
}, {
concurrency: 5,
});
}
run();

View File

@@ -1,5 +1,5 @@
function pickRandom(array) {
return array[Math.floor(Math.random() * array.length)];
return array[Math.floor(Math.random() * array.length)];
}
module.exports = pickRandom;

View File

@@ -9,32 +9,32 @@ const argv = require('../argv');
const knex = require('../knex');
async function init() {
const posters = await knex('actors')
.select('actors.name as actor_name', 'releases.title', 'releases.date', 'media.path', 'media.index', 'sites.name as site_name', 'networks.name as network_name')
.whereIn('actors.name', (argv.actors || []).concat(argv._))
.join('releases_actors', 'releases_actors.actor_id', 'actors.id')
.join('releases', 'releases_actors.release_id', 'releases.id')
.join('sites', 'sites.id', 'releases.site_id')
.join('networks', 'networks.id', 'sites.network_id')
.join('releases_posters', 'releases_posters.release_id', 'releases.id')
.join('media', 'releases_posters.media_id', 'media.id');
// .join('releases_photos', 'releases_photos.release_id', 'releases.id')
// .join('media', 'releases_photos.media_id', 'media.id');
const posters = await knex('actors')
.select('actors.name as actor_name', 'releases.title', 'releases.date', 'media.path', 'media.index', 'sites.name as site_name', 'networks.name as network_name')
.whereIn('actors.name', (argv.actors || []).concat(argv._))
.join('releases_actors', 'releases_actors.actor_id', 'actors.id')
.join('releases', 'releases_actors.release_id', 'releases.id')
.join('sites', 'sites.id', 'releases.site_id')
.join('networks', 'networks.id', 'sites.network_id')
.join('releases_posters', 'releases_posters.release_id', 'releases.id')
.join('media', 'releases_posters.media_id', 'media.id');
// .join('releases_photos', 'releases_photos.release_id', 'releases.id')
// .join('media', 'releases_photos.media_id', 'media.id');
await Promise.all(posters.map(async (poster) => {
const source = path.join(config.media.path, poster.path);
await Promise.all(posters.map(async (poster) => {
const source = path.join(config.media.path, poster.path);
const directory = path.join(config.media.path, 'extracted', poster.actor_name);
const target = path.join(directory, `${poster.actor_name} - ${poster.network_name}: ${poster.site_name} - ${poster.title.replace(/[/.]/g, '_')} (${moment.utc(poster.date).format('YYYY-MM-DD')})-${poster.index}.jpeg`);
await fs.mkdir(path.join(directory), { recursive: true });
const directory = path.join(config.media.path, 'extracted', poster.actor_name);
const target = path.join(directory, `${poster.actor_name} - ${poster.network_name}: ${poster.site_name} - ${poster.title.replace(/[/.]/g, '_')} (${moment.utc(poster.date).format('YYYY-MM-DD')})-${poster.index}.jpeg`);
await fs.mkdir(path.join(directory), { recursive: true });
const file = await fs.readFile(source);
await fs.writeFile(target, file);
const file = await fs.readFile(source);
await fs.writeFile(target, file);
return file;
}));
return file;
}));
knex.destroy();
knex.destroy();
}
init();

View File

@@ -5,341 +5,341 @@ const moment = require('moment');
const http = require('./http');
function trim(str) {
if (!str) return null;
return str.trim().replace(/\s+/g, ' ');
if (!str) return null;
return str.trim().replace(/\s+/g, ' ');
}
function extractDate(dateString, format, match) {
if (match) {
const dateStamp = trim(dateString).match(match);
if (match) {
const dateStamp = trim(dateString).match(match);
if (dateStamp) {
const dateValue = moment.utc(dateStamp[0], format);
if (dateStamp) {
const dateValue = moment.utc(dateStamp[0], format);
return dateValue.isValid() ? dateValue.toDate() : null;
}
return null;
}
return dateValue.isValid() ? dateValue.toDate() : null;
}
return null;
}
const dateValue = moment.utc(trim(dateString), format);
const dateValue = moment.utc(trim(dateString), format);
return dateValue.isValid() ? dateValue.toDate() : null;
return dateValue.isValid() ? dateValue.toDate() : null;
}
function formatDate(dateValue, format, inputFormat) {
if (inputFormat) {
return moment(dateValue, inputFormat).format(format);
}
if (inputFormat) {
return moment(dateValue, inputFormat).format(format);
}
return moment(dateValue).format(format);
return moment(dateValue).format(format);
}
function prefixUrl(urlValue, origin, protocol = 'https') {
if (protocol && /^\/\//.test(urlValue)) {
return `${protocol}:${urlValue}`;
}
if (protocol && /^\/\//.test(urlValue)) {
return `${protocol}:${urlValue}`;
}
if (origin && /^\//.test(urlValue)) {
return `${origin}${urlValue}`;
}
if (origin && /^\//.test(urlValue)) {
return `${origin}${urlValue}`;
}
return urlValue;
return urlValue;
}
function q(context, selector, attrArg, applyTrim = true) {
const attr = attrArg === true ? 'textContent' : attrArg;
const attr = attrArg === true ? 'textContent' : attrArg;
if (attr) {
const value = selector
? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value
: context[attr] || context.attributes[attr]?.value;
if (attr) {
const value = selector
? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value
: context[attr] || context.attributes[attr]?.value;
return applyTrim && value ? trim(value) : value;
}
return applyTrim && value ? trim(value) : value;
}
return selector ? context.querySelector(selector) : context;
return selector ? context.querySelector(selector) : context;
}
function all(context, selector, attrArg, applyTrim = true) {
const attr = attrArg === true ? 'textContent' : attrArg;
const attr = attrArg === true ? 'textContent' : attrArg;
if (attr) {
return Array.from(context.querySelectorAll(selector), el => q(el, null, attr, applyTrim));
}
if (attr) {
return Array.from(context.querySelectorAll(selector), el => q(el, null, attr, applyTrim));
}
return Array.from(context.querySelectorAll(selector));
return Array.from(context.querySelectorAll(selector));
}
function exists(context, selector) {
return !!q(context, selector);
return !!q(context, selector);
}
function html(context, selector) {
const el = q(context, selector, null, true);
const el = q(context, selector, null, true);
return el && el.innerHTML;
return el && el.innerHTML;
}
function texts(context, selector, applyTrim = true, filter = true) {
const el = q(context, selector, null, applyTrim);
if (!el) return null;
const el = q(context, selector, null, applyTrim);
if (!el) return null;
const nodes = Array.from(el.childNodes)
.filter(node => node.nodeName === '#text')
.map(node => (applyTrim ? trim(node.textContent) : node.textContent));
const nodes = Array.from(el.childNodes)
.filter(node => node.nodeName === '#text')
.map(node => (applyTrim ? trim(node.textContent) : node.textContent));
return filter ? nodes.filter(Boolean) : nodes;
return filter ? nodes.filter(Boolean) : nodes;
}
function text(context, selector, applyTrim = true) {
const nodes = texts(context, selector, applyTrim, true);
if (!nodes) return null;
const nodes = texts(context, selector, applyTrim, true);
if (!nodes) return null;
const textValue = nodes.join(' ');
const textValue = nodes.join(' ');
return applyTrim ? trim(textValue) : textValue;
return applyTrim ? trim(textValue) : textValue;
}
function meta(context, selector, attrArg = 'content', applyTrim = true) {
if (/meta\[.*\]/.test(selector)) {
return q(context, selector, attrArg, applyTrim);
}
if (/meta\[.*\]/.test(selector)) {
return q(context, selector, attrArg, applyTrim);
}
return q(context, `meta[${selector}]`, attrArg, applyTrim);
return q(context, `meta[${selector}]`, attrArg, applyTrim);
}
function date(context, selector, format, match, attr = 'textContent') {
const dateString = q(context, selector, attr, true);
const dateString = q(context, selector, attr, true);
if (!dateString) return null;
if (!dateString) return null;
return extractDate(dateString, format, match);
return extractDate(dateString, format, match);
}
function image(context, selector = 'img', attr = 'src', origin, protocol = 'https') {
const imageEl = q(context, selector, attr);
const imageEl = q(context, selector, attr);
// no attribute means q output will be HTML element
return attr ? prefixUrl(imageEl, origin, protocol) : imageEl;
// no attribute means q output will be HTML element
return attr ? prefixUrl(imageEl, origin, protocol) : imageEl;
}
function images(context, selector = 'img', attr = 'src', origin, protocol = 'https') {
const imageEls = all(context, selector, attr);
const imageEls = all(context, selector, attr);
return attr ? imageEls.map(imageEl => prefixUrl(imageEl, origin, protocol)) : imageEls;
return attr ? imageEls.map(imageEl => prefixUrl(imageEl, origin, protocol)) : imageEls;
}
function url(context, selector = 'a', attr = 'href', origin, protocol = 'https') {
const urlEl = q(context, selector, attr);
const urlEl = q(context, selector, attr);
return attr ? prefixUrl(urlEl, origin, protocol) : urlEl;
return attr ? prefixUrl(urlEl, origin, protocol) : urlEl;
}
function urls(context, selector = 'a', attr = 'href', origin, protocol = 'https') {
const urlEls = all(context, selector, attr);
const urlEls = all(context, selector, attr);
return attr ? urlEls.map(urlEl => prefixUrl(urlEl, origin, protocol)) : urlEls;
return attr ? urlEls.map(urlEl => prefixUrl(urlEl, origin, protocol)) : urlEls;
}
function poster(context, selector = 'video', attr = 'poster', origin, protocol = 'https') {
const posterEl = q(context, selector, attr);
const posterEl = q(context, selector, attr);
return attr ? prefixUrl(posterEl, origin, protocol) : posterEl;
return attr ? prefixUrl(posterEl, origin, protocol) : posterEl;
}
function video(context, selector = 'source', attr = 'src', origin, protocol = 'https') {
const trailerEl = q(context, selector, attr);
const trailerEl = q(context, selector, attr);
return attr ? prefixUrl(trailerEl, origin, protocol) : trailerEl;
return attr ? prefixUrl(trailerEl, origin, protocol) : trailerEl;
}
function videos(context, selector = 'source', attr = 'src', origin, protocol = 'https') {
const trailerEls = all(context, selector, attr);
const trailerEls = all(context, selector, attr);
return attr ? trailerEls.map(trailerEl => prefixUrl(trailerEl, origin, protocol)) : trailerEls;
return attr ? trailerEls.map(trailerEl => prefixUrl(trailerEl, origin, protocol)) : trailerEls;
}
function duration(context, selector, match, attr = 'textContent') {
const durationString = q(context, selector, attr);
const durationString = q(context, selector, attr);
if (!durationString) return null;
const durationMatch = durationString.match(match || /(\d+:)?\d+:\d+/);
if (!durationString) return null;
const durationMatch = durationString.match(match || /(\d+:)?\d+:\d+/);
if (durationMatch) {
const segments = ['00'].concat(durationMatch[0].split(':')).slice(-3);
if (durationMatch) {
const segments = ['00'].concat(durationMatch[0].split(':')).slice(-3);
return moment.duration(segments.join(':')).asSeconds();
}
return moment.duration(segments.join(':')).asSeconds();
}
return null;
return null;
}
const legacyFuncs = {
q,
qa: all,
qall: all,
qd: date,
qdate: date,
qh: html,
qhtml: html,
qi: image,
qimage: image,
qimages: images,
qis: images,
ql: duration,
qlength: duration,
qm: meta,
qmeta: meta,
qp: poster,
qposter: poster,
qs: all,
qt: video,
qtext: text,
qtexts: texts,
qtrailer: video,
qtrailers: videos,
qts: videos,
qtx: text,
qtxs: texts,
qtxt: text,
qtxts: texts,
// qu: url,
qurl: url,
qurls: urls,
qus: urls,
q,
qa: all,
qall: all,
qd: date,
qdate: date,
qh: html,
qhtml: html,
qi: image,
qimage: image,
qimages: images,
qis: images,
ql: duration,
qlength: duration,
qm: meta,
qmeta: meta,
qp: poster,
qposter: poster,
qs: all,
qt: video,
qtext: text,
qtexts: texts,
qtrailer: video,
qtrailers: videos,
qts: videos,
qtx: text,
qtxs: texts,
qtxt: text,
qtxts: texts,
// qu: url,
qurl: url,
qurls: urls,
qus: urls,
};
const quFuncs = {
all,
html,
date,
dur: duration,
duration,
exists,
image,
images,
img: image,
imgs: images,
length: duration,
meta,
poster,
q,
text,
texts,
trailer: video,
url,
urls,
video,
videos,
all,
html,
date,
dur: duration,
duration,
exists,
image,
images,
img: image,
imgs: images,
length: duration,
meta,
poster,
q,
text,
texts,
trailer: video,
url,
urls,
video,
videos,
};
function init(element, window) {
if (!element) return null;
if (!element) return null;
const legacyContextFuncs = Object.entries(legacyFuncs) // dynamically attach methods with context
.reduce((acc, [key, func]) => ({
...acc,
[key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context
? func(...args)
: func(element, ...args)),
}), {});
const legacyContextFuncs = Object.entries(legacyFuncs) // dynamically attach methods with context
.reduce((acc, [key, func]) => ({
...acc,
[key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context
? func(...args)
: func(element, ...args)),
}), {});
const quContextFuncs = Object.entries(quFuncs) // dynamically attach methods with context
.reduce((acc, [key, func]) => ({
...acc,
[key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context
? func(...args)
: func(element, ...args)),
}), {});
const quContextFuncs = Object.entries(quFuncs) // dynamically attach methods with context
.reduce((acc, [key, func]) => ({
...acc,
[key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context
? func(...args)
: func(element, ...args)),
}), {});
return {
element,
el: element,
html: element.outerHTML || element.body.outerHTML,
text: trim(element.textContent),
...(window && {
window,
document: window.document,
}),
...legacyContextFuncs,
qu: quContextFuncs,
};
return {
element,
el: element,
html: element.outerHTML || element.body.outerHTML,
text: trim(element.textContent),
...(window && {
window,
document: window.document,
}),
...legacyContextFuncs,
qu: quContextFuncs,
};
}
function initAll(context, selector, window) {
if (Array.isArray(context)) {
return context.map(element => init(element, window));
}
if (Array.isArray(context)) {
return context.map(element => init(element, window));
}
return Array.from(context.querySelectorAll(selector))
.map(element => init(element, window));
return Array.from(context.querySelectorAll(selector))
.map(element => init(element, window));
}
function extract(htmlValue, selector) {
const { window } = new JSDOM(htmlValue);
const { window } = new JSDOM(htmlValue);
if (selector) {
return init(window.document.querySelector(selector), window);
}
if (selector) {
return init(window.document.querySelector(selector), window);
}
return init(window.document, window);
return init(window.document, window);
}
function extractAll(htmlValue, selector) {
const { window } = new JSDOM(htmlValue);
const { window } = new JSDOM(htmlValue);
return initAll(window.document, selector, window);
return initAll(window.document, selector, window);
}
async function get(urlValue, selector, headers, options, queryAll = false) {
const res = await http.get(urlValue, headers);
const res = await http.get(urlValue, headers);
if (res.statusCode === 200) {
const item = queryAll
? extractAll(res.body.toString(), selector)
: extract(res.body.toString(), selector);
if (res.statusCode === 200) {
const item = queryAll
? extractAll(res.body.toString(), selector)
: extract(res.body.toString(), selector);
return {
item,
items: all ? item : [item],
res,
ok: true,
status: res.statusCode,
};
}
return {
item,
items: all ? item : [item],
res,
ok: true,
status: res.statusCode,
};
}
return {
item: null,
items: [],
res,
ok: false,
status: res.statusCode,
};
return {
item: null,
items: [],
res,
ok: false,
status: res.statusCode,
};
}
async function getAll(urlValue, selector, headers, options) {
return get(urlValue, selector, headers, options, true);
return get(urlValue, selector, headers, options, true);
}
module.exports = {
extractDate,
extract,
extractAll,
init,
initAll,
formatDate,
get,
getAll,
context: init,
contextAll: initAll,
ed: extractDate,
ex: extract,
exa: extractAll,
fd: formatDate,
parseDate: extractDate,
ctx: init,
ctxa: initAll,
geta: getAll,
qu: quFuncs,
...legacyFuncs,
extractDate,
extract,
extractAll,
init,
initAll,
formatDate,
get,
getAll,
context: init,
contextAll: initAll,
ed: extractDate,
ex: extract,
exa: extractAll,
fd: formatDate,
parseDate: extractDate,
ctx: init,
ctxa: initAll,
geta: getAll,
qu: quFuncs,
...legacyFuncs,
};

View File

@@ -1,29 +0,0 @@
'use strict';
const path = require('path');
const Promise = require('bluebird');
const fs = require('fs-extra');
const fetchScene = require('../scrape-releases');
const argv = require('../argv');
async function renameFiles() {
const filenames = await fs.readdir(process.cwd());
const curated = await Promise.map(filenames, async (filename) => {
const shootId = filename.split(' ')[1];
const scene = await fetchScene(`https://kink.com/shoot/${shootId}`);
if (argv.confirm) {
await fs.rename(path.join(process.cwd(), filename), path.join(process.cwd(), `${scene.filename}.mp4`));
}
return scene.filename;
}, {
concurrency: 5,
});
console.log(curated);
}
renameFiles();

View File

@@ -3,26 +3,26 @@
const bhttp = require('bhttp');
async function resolvePlace(query) {
if (!query) {
return null;
}
if (!query) {
return null;
}
const res = await bhttp.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`);
const [item] = res.body;
const res = await bhttp.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`);
const [item] = res.body;
if (item && item.address) {
const rawPlace = item.address;
const place = {};
if (item && item.address) {
const rawPlace = item.address;
const place = {};
if (rawPlace.city) place.city = rawPlace.city;
if (rawPlace.state) place.state = rawPlace.state;
if (rawPlace.country_code) place.country = rawPlace.country_code.toUpperCase();
if (rawPlace.continent) place.continent = rawPlace.continent;
if (rawPlace.city) place.city = rawPlace.city;
if (rawPlace.state) place.state = rawPlace.state;
if (rawPlace.country_code) place.country = rawPlace.country_code.toUpperCase();
if (rawPlace.continent) place.continent = rawPlace.continent;
return place;
}
return place;
}
return null;
return null;
}
module.exports = resolvePlace;

View File

@@ -6,32 +6,32 @@ const fs = require('fs-extra');
const knex = require('../knex');
async function init() {
const sites = await knex('sites')
.select('networks.name', 'sites.slug')
.join('networks', 'networks.id', 'sites.network_id')
.where('networks.slug', 'score');
const sites = await knex('sites')
.select('networks.name', 'sites.slug')
.join('networks', 'networks.id', 'sites.network_id')
.where('networks.slug', 'score');
await Promise.map(sites, async (site) => {
const url = `https://cdn77.scoreuniverse.com/${site.slug}/images/logo.png`;
await Promise.map(sites, async (site) => {
const url = `https://cdn77.scoreuniverse.com/${site.slug}/images/logo.png`;
console.log(url);
console.log(url);
const res = await bhttp.get(url, {
responseTimeout: 5000,
});
const res = await bhttp.get(url, {
responseTimeout: 5000,
});
if (res.statusCode === 200) {
console.log(`Saving logo for ${site.slug}`);
if (res.statusCode === 200) {
console.log(`Saving logo for ${site.slug}`);
await fs.writeFile(`./score/${site.slug}.png`, res.body);
}
await fs.writeFile(`./score/${site.slug}.png`, res.body);
}
console.log(`No logo found for ${site.slug}`);
}, {
concurrency: 10,
});
console.log(`No logo found for ${site.slug}`);
}, {
concurrency: 10,
});
knex.destroy();
knex.destroy();
}
init();

Some files were not shown because too many files have changed in this diff Show More