Switched to tabs. Adding missing actor entries when scraping actors, with batch ID.
This commit is contained in:
@@ -6,10 +6,11 @@
|
||||
},
|
||||
"rules": {
|
||||
"strict": 0,
|
||||
"indent": ["error", "tab"],
|
||||
"no-tabs": "off",
|
||||
"no-unused-vars": ["error", {"argsIgnorePattern": "^_"}],
|
||||
"no-console": 0,
|
||||
"no-underscore-dangle": 0,
|
||||
"indent": "off",
|
||||
"prefer-destructuring": "off",
|
||||
"template-curly-spacing": "off",
|
||||
"object-curly-newline": "off",
|
||||
|
||||
@@ -18,522 +18,522 @@ const { curateSites } = require('./sites');
|
||||
const { storeMedia, associateMedia } = require('./media');
|
||||
|
||||
async function curateActor(actor) {
|
||||
const [aliases, avatar, photos, social] = await Promise.all([
|
||||
knex('actors').where({ alias_for: actor.id }),
|
||||
knex('actors_avatars')
|
||||
.where('actor_id', actor.id)
|
||||
.join('media', 'media.id', 'actors_avatars.media_id')
|
||||
.first(),
|
||||
knex('actors_photos')
|
||||
.where('actor_id', actor.id)
|
||||
.join('media', 'media.id', 'actors_photos.media_id')
|
||||
.orderBy('index'),
|
||||
knex('actors_social')
|
||||
.where('actor_id', actor.id)
|
||||
.orderBy('platform', 'desc'),
|
||||
]);
|
||||
const [aliases, avatar, photos, social] = await Promise.all([
|
||||
knex('actors').where({ alias_for: actor.id }),
|
||||
knex('actors_avatars')
|
||||
.where('actor_id', actor.id)
|
||||
.join('media', 'media.id', 'actors_avatars.media_id')
|
||||
.first(),
|
||||
knex('actors_photos')
|
||||
.where('actor_id', actor.id)
|
||||
.join('media', 'media.id', 'actors_photos.media_id')
|
||||
.orderBy('index'),
|
||||
knex('actors_social')
|
||||
.where('actor_id', actor.id)
|
||||
.orderBy('platform', 'desc'),
|
||||
]);
|
||||
|
||||
const curatedActor = {
|
||||
id: actor.id,
|
||||
gender: actor.gender,
|
||||
name: actor.name,
|
||||
description: actor.description,
|
||||
birthdate: actor.birthdate && new Date(actor.birthdate),
|
||||
country: actor.country_alpha2,
|
||||
origin: (actor.birth_city || actor.birth_state || actor.birth_country_alpha2) ? {} : null,
|
||||
residence: (actor.residence_city || actor.residence_state || actor.residence_country_alpha2) ? {} : null,
|
||||
ethnicity: actor.ethnicity,
|
||||
height: actor.height,
|
||||
weight: actor.weight,
|
||||
bust: actor.bust,
|
||||
waist: actor.waist,
|
||||
hip: actor.hip,
|
||||
naturalBoobs: actor.natural_boobs,
|
||||
aliases: aliases.map(({ name }) => name),
|
||||
slug: actor.slug,
|
||||
avatar,
|
||||
photos,
|
||||
hasTattoos: actor.has_tattoos,
|
||||
hasPiercings: actor.has_piercings,
|
||||
tattoos: actor.tattoos,
|
||||
piercings: actor.piercings,
|
||||
social,
|
||||
scrapedAt: actor.scraped_at,
|
||||
};
|
||||
const curatedActor = {
|
||||
id: actor.id,
|
||||
gender: actor.gender,
|
||||
name: actor.name,
|
||||
description: actor.description,
|
||||
birthdate: actor.birthdate && new Date(actor.birthdate),
|
||||
country: actor.country_alpha2,
|
||||
origin: (actor.birth_city || actor.birth_state || actor.birth_country_alpha2) ? {} : null,
|
||||
residence: (actor.residence_city || actor.residence_state || actor.residence_country_alpha2) ? {} : null,
|
||||
ethnicity: actor.ethnicity,
|
||||
height: actor.height,
|
||||
weight: actor.weight,
|
||||
bust: actor.bust,
|
||||
waist: actor.waist,
|
||||
hip: actor.hip,
|
||||
naturalBoobs: actor.natural_boobs,
|
||||
aliases: aliases.map(({ name }) => name),
|
||||
slug: actor.slug,
|
||||
avatar,
|
||||
photos,
|
||||
hasTattoos: actor.has_tattoos,
|
||||
hasPiercings: actor.has_piercings,
|
||||
tattoos: actor.tattoos,
|
||||
piercings: actor.piercings,
|
||||
social,
|
||||
scrapedAt: actor.scraped_at,
|
||||
};
|
||||
|
||||
if (curatedActor.birthdate) {
|
||||
curatedActor.age = moment().diff(curatedActor.birthdate, 'years');
|
||||
}
|
||||
if (curatedActor.birthdate) {
|
||||
curatedActor.age = moment().diff(curatedActor.birthdate, 'years');
|
||||
}
|
||||
|
||||
if (actor.birth_city) curatedActor.origin.city = actor.birth_city;
|
||||
if (actor.birth_state) curatedActor.origin.state = actor.birth_state;
|
||||
if (actor.birth_city) curatedActor.origin.city = actor.birth_city;
|
||||
if (actor.birth_state) curatedActor.origin.state = actor.birth_state;
|
||||
|
||||
if (actor.birth_country_alpha2) {
|
||||
curatedActor.origin.country = {
|
||||
alpha2: actor.birth_country_alpha2,
|
||||
name: actor.birth_country_name,
|
||||
alias: actor.birth_country_alias,
|
||||
};
|
||||
}
|
||||
if (actor.birth_country_alpha2) {
|
||||
curatedActor.origin.country = {
|
||||
alpha2: actor.birth_country_alpha2,
|
||||
name: actor.birth_country_name,
|
||||
alias: actor.birth_country_alias,
|
||||
};
|
||||
}
|
||||
|
||||
if (actor.residence_city) curatedActor.residence.city = actor.residence_city;
|
||||
if (actor.residence_state) curatedActor.residence.state = actor.residence_state;
|
||||
if (actor.residence_city) curatedActor.residence.city = actor.residence_city;
|
||||
if (actor.residence_state) curatedActor.residence.state = actor.residence_state;
|
||||
|
||||
if (actor.residence_country_alpha2) {
|
||||
curatedActor.residence.country = {
|
||||
alpha2: actor.residence_country_alpha2,
|
||||
name: actor.residence_country_name,
|
||||
alias: actor.residence_country_alias,
|
||||
};
|
||||
}
|
||||
if (actor.residence_country_alpha2) {
|
||||
curatedActor.residence.country = {
|
||||
alpha2: actor.residence_country_alpha2,
|
||||
name: actor.residence_country_name,
|
||||
alias: actor.residence_country_alias,
|
||||
};
|
||||
}
|
||||
|
||||
return curatedActor;
|
||||
return curatedActor;
|
||||
}
|
||||
|
||||
function curateActors(releases) {
|
||||
return Promise.all(releases.map(async release => curateActor(release)));
|
||||
return Promise.all(releases.map(async release => curateActor(release)));
|
||||
}
|
||||
|
||||
function curateActorEntry(actor, scraped, scrapeSuccess) {
|
||||
const curatedActor = {
|
||||
name: capitalize(actor.name),
|
||||
slug: slugify(actor.name),
|
||||
birthdate: actor.birthdate,
|
||||
description: actor.description,
|
||||
gender: actor.gender,
|
||||
ethnicity: actor.ethnicity,
|
||||
bust: actor.bust,
|
||||
waist: actor.waist,
|
||||
hip: actor.hip,
|
||||
natural_boobs: actor.naturalBoobs,
|
||||
height: actor.height,
|
||||
weight: actor.weight,
|
||||
hair: actor.hair,
|
||||
eyes: actor.eyes,
|
||||
has_tattoos: actor.hasTattoos,
|
||||
has_piercings: actor.hasPiercings,
|
||||
tattoos: actor.tattoos,
|
||||
piercings: actor.piercings,
|
||||
};
|
||||
const curatedActor = {
|
||||
name: capitalize(actor.name),
|
||||
slug: slugify(actor.name),
|
||||
birthdate: actor.birthdate,
|
||||
description: actor.description,
|
||||
gender: actor.gender,
|
||||
ethnicity: actor.ethnicity,
|
||||
bust: actor.bust,
|
||||
waist: actor.waist,
|
||||
hip: actor.hip,
|
||||
natural_boobs: actor.naturalBoobs,
|
||||
height: actor.height,
|
||||
weight: actor.weight,
|
||||
hair: actor.hair,
|
||||
eyes: actor.eyes,
|
||||
has_tattoos: actor.hasTattoos,
|
||||
has_piercings: actor.hasPiercings,
|
||||
tattoos: actor.tattoos,
|
||||
piercings: actor.piercings,
|
||||
};
|
||||
|
||||
if (actor.id) {
|
||||
curatedActor.id = actor.id;
|
||||
}
|
||||
if (actor.id) {
|
||||
curatedActor.id = actor.id;
|
||||
}
|
||||
|
||||
if (actor.birthPlace) {
|
||||
curatedActor.birth_city = actor.birthPlace.city;
|
||||
curatedActor.birth_state = actor.birthPlace.state;
|
||||
curatedActor.birth_country_alpha2 = actor.birthPlace.country;
|
||||
}
|
||||
if (actor.birthPlace) {
|
||||
curatedActor.birth_city = actor.birthPlace.city;
|
||||
curatedActor.birth_state = actor.birthPlace.state;
|
||||
curatedActor.birth_country_alpha2 = actor.birthPlace.country;
|
||||
}
|
||||
|
||||
if (actor.residencePlace) {
|
||||
curatedActor.residence_city = actor.residencePlace.city;
|
||||
curatedActor.residence_state = actor.residencePlace.state;
|
||||
curatedActor.residence_country_alpha2 = actor.residencePlace.country;
|
||||
}
|
||||
if (actor.residencePlace) {
|
||||
curatedActor.residence_city = actor.residencePlace.city;
|
||||
curatedActor.residence_state = actor.residencePlace.state;
|
||||
curatedActor.residence_country_alpha2 = actor.residencePlace.country;
|
||||
}
|
||||
|
||||
if (scraped) {
|
||||
curatedActor.scraped_at = new Date();
|
||||
curatedActor.scrape_success = scrapeSuccess;
|
||||
}
|
||||
if (scraped) {
|
||||
curatedActor.scraped_at = new Date();
|
||||
curatedActor.scrape_success = scrapeSuccess;
|
||||
}
|
||||
|
||||
return curatedActor;
|
||||
return curatedActor;
|
||||
}
|
||||
|
||||
function curateSocialEntry(url, actorId) {
|
||||
const platforms = [
|
||||
// links supplied by PH often look like domain.com/domain.com/username
|
||||
{
|
||||
label: 'twitter',
|
||||
pattern: 'http(s)\\://(*)twitter.com/:username(/)(?*)',
|
||||
format: username => `https://www.twitter.com/${username}`,
|
||||
},
|
||||
{
|
||||
label: 'youtube',
|
||||
pattern: 'http(s)\\://(*)youtube.com/channel/:username(?*)',
|
||||
format: username => `https://www.youtube.com/channel/${username}`,
|
||||
},
|
||||
{
|
||||
label: 'instagram',
|
||||
pattern: 'http(s)\\://(*)instagram.com/:username(/)(?*)',
|
||||
format: username => `https://www.instagram.com/${username}`,
|
||||
},
|
||||
{
|
||||
label: 'snapchat',
|
||||
pattern: 'http(s)\\://(*)snapchat.com/add/:username(/)(?*)',
|
||||
format: username => `https://www.snapchat.com/add/${username}`,
|
||||
},
|
||||
{
|
||||
label: 'tumblr',
|
||||
pattern: 'http(s)\\://:username.tumblr.com(*)',
|
||||
format: username => `https://${username}.tumblr.com`,
|
||||
},
|
||||
{
|
||||
label: 'onlyfans',
|
||||
pattern: 'http(s)\\://(*)onlyfans.com/:username(/)(?*)',
|
||||
format: username => `https://www.onlyfans.com/${username}`,
|
||||
},
|
||||
{
|
||||
label: 'fancentro',
|
||||
pattern: 'http(s)\\://(*)fancentro.com/:username(/)(?*)',
|
||||
format: username => `https://www.fancentro.com/${username}`,
|
||||
},
|
||||
{
|
||||
label: 'modelhub',
|
||||
pattern: 'http(s)\\://(*)modelhub.com/:username(/)(?*)',
|
||||
format: username => `https://www.modelhub.com/${username}`,
|
||||
},
|
||||
];
|
||||
const platforms = [
|
||||
// links supplied by PH often look like domain.com/domain.com/username
|
||||
{
|
||||
label: 'twitter',
|
||||
pattern: 'http(s)\\://(*)twitter.com/:username(/)(?*)',
|
||||
format: username => `https://www.twitter.com/${username}`,
|
||||
},
|
||||
{
|
||||
label: 'youtube',
|
||||
pattern: 'http(s)\\://(*)youtube.com/channel/:username(?*)',
|
||||
format: username => `https://www.youtube.com/channel/${username}`,
|
||||
},
|
||||
{
|
||||
label: 'instagram',
|
||||
pattern: 'http(s)\\://(*)instagram.com/:username(/)(?*)',
|
||||
format: username => `https://www.instagram.com/${username}`,
|
||||
},
|
||||
{
|
||||
label: 'snapchat',
|
||||
pattern: 'http(s)\\://(*)snapchat.com/add/:username(/)(?*)',
|
||||
format: username => `https://www.snapchat.com/add/${username}`,
|
||||
},
|
||||
{
|
||||
label: 'tumblr',
|
||||
pattern: 'http(s)\\://:username.tumblr.com(*)',
|
||||
format: username => `https://${username}.tumblr.com`,
|
||||
},
|
||||
{
|
||||
label: 'onlyfans',
|
||||
pattern: 'http(s)\\://(*)onlyfans.com/:username(/)(?*)',
|
||||
format: username => `https://www.onlyfans.com/${username}`,
|
||||
},
|
||||
{
|
||||
label: 'fancentro',
|
||||
pattern: 'http(s)\\://(*)fancentro.com/:username(/)(?*)',
|
||||
format: username => `https://www.fancentro.com/${username}`,
|
||||
},
|
||||
{
|
||||
label: 'modelhub',
|
||||
pattern: 'http(s)\\://(*)modelhub.com/:username(/)(?*)',
|
||||
format: username => `https://www.modelhub.com/${username}`,
|
||||
},
|
||||
];
|
||||
|
||||
const match = platforms.reduce((acc, platform) => {
|
||||
if (acc) return acc;
|
||||
const match = platforms.reduce((acc, platform) => {
|
||||
if (acc) return acc;
|
||||
|
||||
const patternMatch = new UrlPattern(platform.pattern).match(url);
|
||||
const patternMatch = new UrlPattern(platform.pattern).match(url);
|
||||
|
||||
if (patternMatch) {
|
||||
return {
|
||||
platform: platform.label,
|
||||
original: url,
|
||||
username: patternMatch.username,
|
||||
url: platform.format ? platform.format(patternMatch.username) : url,
|
||||
};
|
||||
}
|
||||
if (patternMatch) {
|
||||
return {
|
||||
platform: platform.label,
|
||||
original: url,
|
||||
username: patternMatch.username,
|
||||
url: platform.format ? platform.format(patternMatch.username) : url,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}, null) || { url };
|
||||
return null;
|
||||
}, null) || { url };
|
||||
|
||||
return {
|
||||
url: match.url,
|
||||
platform: match.platform,
|
||||
actor_id: actorId,
|
||||
};
|
||||
return {
|
||||
url: match.url,
|
||||
platform: match.platform,
|
||||
actor_id: actorId,
|
||||
};
|
||||
}
|
||||
|
||||
async function curateSocialEntries(urls, actorId) {
|
||||
if (!urls) {
|
||||
return [];
|
||||
}
|
||||
if (!urls) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const existingSocialLinks = await knex('actors_social').where('actor_id', actorId);
|
||||
const existingSocialLinks = await knex('actors_social').where('actor_id', actorId);
|
||||
|
||||
return urls.reduce((acc, url) => {
|
||||
const socialEntry = curateSocialEntry(url, actorId);
|
||||
return urls.reduce((acc, url) => {
|
||||
const socialEntry = curateSocialEntry(url, actorId);
|
||||
|
||||
if (acc.some(entry => socialEntry.url.toLowerCase() === entry.url.toLowerCase()) || existingSocialLinks.some(entry => socialEntry.url.toLowerCase() === entry.url.toLowerCase())) {
|
||||
// prevent duplicates
|
||||
return acc;
|
||||
}
|
||||
if (acc.some(entry => socialEntry.url.toLowerCase() === entry.url.toLowerCase()) || existingSocialLinks.some(entry => socialEntry.url.toLowerCase() === entry.url.toLowerCase())) {
|
||||
// prevent duplicates
|
||||
return acc;
|
||||
}
|
||||
|
||||
return [...acc, socialEntry];
|
||||
}, []);
|
||||
return [...acc, socialEntry];
|
||||
}, []);
|
||||
}
|
||||
|
||||
async function fetchActors(queryObject, limit = 100) {
|
||||
const releases = await knex('actors')
|
||||
.select(
|
||||
'actors.*',
|
||||
'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'birth_countries.alias as birth_country_alias',
|
||||
'residence_countries.alpha2 as residence_country_alpha2', 'residence_countries.name as residence_country_name', 'residence_countries.alias as residence_country_alias',
|
||||
)
|
||||
.leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2')
|
||||
.leftJoin('countries as residence_countries', 'actors.residence_country_alpha2', 'residence_countries.alpha2')
|
||||
.orderBy(['actors.name', 'actors.gender'])
|
||||
.where(builder => whereOr(queryObject, 'actors', builder))
|
||||
.limit(limit);
|
||||
const releases = await knex('actors')
|
||||
.select(
|
||||
'actors.*',
|
||||
'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'birth_countries.alias as birth_country_alias',
|
||||
'residence_countries.alpha2 as residence_country_alpha2', 'residence_countries.name as residence_country_name', 'residence_countries.alias as residence_country_alias',
|
||||
)
|
||||
.leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2')
|
||||
.leftJoin('countries as residence_countries', 'actors.residence_country_alpha2', 'residence_countries.alpha2')
|
||||
.orderBy(['actors.name', 'actors.gender'])
|
||||
.where(builder => whereOr(queryObject, 'actors', builder))
|
||||
.limit(limit);
|
||||
|
||||
return curateActors(releases);
|
||||
return curateActors(releases);
|
||||
}
|
||||
|
||||
async function storeSocialLinks(urls, actorId) {
|
||||
const curatedSocialEntries = await curateSocialEntries(urls, actorId);
|
||||
const curatedSocialEntries = await curateSocialEntries(urls, actorId);
|
||||
|
||||
await knex('actors_social').insert(curatedSocialEntries);
|
||||
await knex('actors_social').insert(curatedSocialEntries);
|
||||
}
|
||||
|
||||
async function storeAvatars(avatars, actorId) {
|
||||
if (!avatars || avatars.length === 0) {
|
||||
return [];
|
||||
}
|
||||
if (!avatars || avatars.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const avatarsBySource = await storeMedia(avatars, 'actor', 'avatar');
|
||||
await associateMedia({ [actorId]: avatars }, avatarsBySource, 'actor', 'photo', 'avatar');
|
||||
const avatarsBySource = await storeMedia(avatars, 'actor', 'avatar');
|
||||
await associateMedia({ [actorId]: avatars }, avatarsBySource, 'actor', 'photo', 'avatar');
|
||||
|
||||
return avatarsBySource;
|
||||
return avatarsBySource;
|
||||
}
|
||||
|
||||
async function storeActor(actor, scraped = false, scrapeSuccess = false) {
|
||||
const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
|
||||
const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
|
||||
|
||||
const [actorEntry] = await knex('actors')
|
||||
.insert(curatedActor)
|
||||
.returning('*');
|
||||
const [actorEntry] = await knex('actors')
|
||||
.insert(curatedActor)
|
||||
.returning('*');
|
||||
|
||||
await storeSocialLinks(actor.social, actorEntry.id);
|
||||
await storeSocialLinks(actor.social, actorEntry.id);
|
||||
|
||||
if (actor.avatars) {
|
||||
await storeAvatars(actor.avatars, actorEntry.id);
|
||||
}
|
||||
if (actor.avatars) {
|
||||
await storeAvatars(actor.avatars, actorEntry.id);
|
||||
}
|
||||
|
||||
logger.info(`Added new entry for actor '${actor.name}'`);
|
||||
logger.info(`Added new entry for actor '${actor.name}'`);
|
||||
|
||||
return actorEntry;
|
||||
return actorEntry;
|
||||
}
|
||||
|
||||
async function updateActor(actor, scraped = false, scrapeSuccess = false) {
|
||||
const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
|
||||
const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
|
||||
|
||||
const [actorEntry] = await knex('actors')
|
||||
.where({ id: actor.id })
|
||||
.update(curatedActor)
|
||||
.returning('*');
|
||||
const [actorEntry] = await knex('actors')
|
||||
.where({ id: actor.id })
|
||||
.update(curatedActor)
|
||||
.returning('*');
|
||||
|
||||
await storeSocialLinks(actor.social, actor.id);
|
||||
await storeSocialLinks(actor.social, actor.id);
|
||||
|
||||
logger.info(`Updated entry for actor '${actor.name}'`);
|
||||
logger.info(`Updated entry for actor '${actor.name}'`);
|
||||
|
||||
return actorEntry;
|
||||
return actorEntry;
|
||||
}
|
||||
|
||||
async function mergeProfiles(profiles, actor) {
|
||||
if (profiles.filter(Boolean).length === 0) {
|
||||
return null;
|
||||
}
|
||||
if (profiles.filter(Boolean).length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const mergedProfile = profiles.reduce((prevProfile, profile) => {
|
||||
if (profile === null) {
|
||||
return prevProfile;
|
||||
}
|
||||
const mergedProfile = profiles.reduce((prevProfile, profile) => {
|
||||
if (profile === null) {
|
||||
return prevProfile;
|
||||
}
|
||||
|
||||
const accProfile = {
|
||||
id: actor ? actor.id : null,
|
||||
name: actor ? actor.name : (prevProfile.name || profile.name),
|
||||
description: prevProfile.description || profile.description,
|
||||
gender: prevProfile.gender || profile.gender,
|
||||
birthdate: !prevProfile.birthdate || Number.isNaN(Number(prevProfile.birthdate)) ? profile.birthdate : prevProfile.birthdate,
|
||||
birthPlace: prevProfile.birthPlace || profile.birthPlace,
|
||||
residencePlace: prevProfile.residencePlace || profile.residencePlace,
|
||||
nationality: prevProfile.nationality || profile.nationality, // used to derive country when not available
|
||||
ethnicity: prevProfile.ethnicity || profile.ethnicity,
|
||||
bust: prevProfile.bust || (/\d+\w+/.test(profile.bust) ? profile.bust : null),
|
||||
waist: prevProfile.waist || profile.waist,
|
||||
hip: prevProfile.hip || profile.hip,
|
||||
naturalBoobs: prevProfile.naturalBoobs === undefined ? profile.naturalBoobs : prevProfile.naturalBoobs,
|
||||
height: prevProfile.height || profile.height,
|
||||
weight: prevProfile.weight || profile.weight,
|
||||
hair: prevProfile.hair || profile.hair,
|
||||
eyes: prevProfile.eyes || profile.eyes,
|
||||
hasPiercings: prevProfile.hasPiercings === undefined ? profile.hasPiercings : prevProfile.hasPiercings,
|
||||
hasTattoos: prevProfile.hasTattoos === undefined ? profile.hasTattoos : prevProfile.hasTattoos,
|
||||
piercings: prevProfile.piercings || profile.piercings,
|
||||
tattoos: prevProfile.tattoos || profile.tattoos,
|
||||
social: prevProfile.social.concat(profile.social || []),
|
||||
releases: prevProfile.releases.concat(profile.releases ? profile.releases : []), // don't flatten fallbacks
|
||||
};
|
||||
const accProfile = {
|
||||
id: actor ? actor.id : null,
|
||||
name: actor ? actor.name : (prevProfile.name || profile.name),
|
||||
description: prevProfile.description || profile.description,
|
||||
gender: prevProfile.gender || profile.gender,
|
||||
birthdate: !prevProfile.birthdate || Number.isNaN(Number(prevProfile.birthdate)) ? profile.birthdate : prevProfile.birthdate,
|
||||
birthPlace: prevProfile.birthPlace || profile.birthPlace,
|
||||
residencePlace: prevProfile.residencePlace || profile.residencePlace,
|
||||
nationality: prevProfile.nationality || profile.nationality, // used to derive country when not available
|
||||
ethnicity: prevProfile.ethnicity || profile.ethnicity,
|
||||
bust: prevProfile.bust || (/\d+\w+/.test(profile.bust) ? profile.bust : null),
|
||||
waist: prevProfile.waist || profile.waist,
|
||||
hip: prevProfile.hip || profile.hip,
|
||||
naturalBoobs: prevProfile.naturalBoobs === undefined ? profile.naturalBoobs : prevProfile.naturalBoobs,
|
||||
height: prevProfile.height || profile.height,
|
||||
weight: prevProfile.weight || profile.weight,
|
||||
hair: prevProfile.hair || profile.hair,
|
||||
eyes: prevProfile.eyes || profile.eyes,
|
||||
hasPiercings: prevProfile.hasPiercings === undefined ? profile.hasPiercings : prevProfile.hasPiercings,
|
||||
hasTattoos: prevProfile.hasTattoos === undefined ? profile.hasTattoos : prevProfile.hasTattoos,
|
||||
piercings: prevProfile.piercings || profile.piercings,
|
||||
tattoos: prevProfile.tattoos || profile.tattoos,
|
||||
social: prevProfile.social.concat(profile.social || []),
|
||||
releases: prevProfile.releases.concat(profile.releases ? profile.releases : []), // don't flatten fallbacks
|
||||
};
|
||||
|
||||
if (profile.avatar) {
|
||||
const avatar = Array.isArray(profile.avatar)
|
||||
? profile.avatar.map(avatarX => ({
|
||||
src: avatarX.src || avatarX,
|
||||
scraper: profile.scraper,
|
||||
copyright: avatarX.copyright === undefined ? capitalize(profile.site?.name || profile.scraper) : profile.avatar.copyright,
|
||||
}))
|
||||
: {
|
||||
src: profile.avatar.src || profile.avatar,
|
||||
scraper: profile.scraper,
|
||||
copyright: profile.avatar.copyright === undefined ? capitalize(profile.site?.name || profile.scraper) : profile.avatar.copyright,
|
||||
};
|
||||
if (profile.avatar) {
|
||||
const avatar = Array.isArray(profile.avatar)
|
||||
? profile.avatar.map(avatarX => ({
|
||||
src: avatarX.src || avatarX,
|
||||
scraper: profile.scraper,
|
||||
copyright: avatarX.copyright === undefined ? capitalize(profile.site?.name || profile.scraper) : profile.avatar.copyright,
|
||||
}))
|
||||
: {
|
||||
src: profile.avatar.src || profile.avatar,
|
||||
scraper: profile.scraper,
|
||||
copyright: profile.avatar.copyright === undefined ? capitalize(profile.site?.name || profile.scraper) : profile.avatar.copyright,
|
||||
};
|
||||
|
||||
accProfile.avatars = prevProfile.avatars.concat([avatar]); // don't flatten fallbacks
|
||||
} else {
|
||||
accProfile.avatars = prevProfile.avatars;
|
||||
}
|
||||
accProfile.avatars = prevProfile.avatars.concat([avatar]); // don't flatten fallbacks
|
||||
} else {
|
||||
accProfile.avatars = prevProfile.avatars;
|
||||
}
|
||||
|
||||
return accProfile;
|
||||
}, {
|
||||
social: [],
|
||||
avatars: [],
|
||||
releases: [],
|
||||
});
|
||||
return accProfile;
|
||||
}, {
|
||||
social: [],
|
||||
avatars: [],
|
||||
releases: [],
|
||||
});
|
||||
|
||||
const [birthPlace, residencePlace] = await Promise.all([
|
||||
resolvePlace(mergedProfile.birthPlace),
|
||||
resolvePlace(mergedProfile.residencePlace),
|
||||
]);
|
||||
const [birthPlace, residencePlace] = await Promise.all([
|
||||
resolvePlace(mergedProfile.birthPlace),
|
||||
resolvePlace(mergedProfile.residencePlace),
|
||||
]);
|
||||
|
||||
mergedProfile.birthPlace = birthPlace;
|
||||
mergedProfile.residencePlace = residencePlace;
|
||||
mergedProfile.birthPlace = birthPlace;
|
||||
mergedProfile.residencePlace = residencePlace;
|
||||
|
||||
if (!mergedProfile.birthPlace && mergedProfile.nationality) {
|
||||
const country = await knex('countries')
|
||||
.where('nationality', 'ilike', `%${mergedProfile.nationality}%`)
|
||||
.orderBy('priority', 'desc')
|
||||
.first();
|
||||
if (!mergedProfile.birthPlace && mergedProfile.nationality) {
|
||||
const country = await knex('countries')
|
||||
.where('nationality', 'ilike', `%${mergedProfile.nationality}%`)
|
||||
.orderBy('priority', 'desc')
|
||||
.first();
|
||||
|
||||
mergedProfile.birthPlace = {
|
||||
country: country.alpha2,
|
||||
};
|
||||
}
|
||||
mergedProfile.birthPlace = {
|
||||
country: country.alpha2,
|
||||
};
|
||||
}
|
||||
|
||||
return mergedProfile;
|
||||
return mergedProfile;
|
||||
}
|
||||
|
||||
async function scrapeProfiles(sources, actorName, actorEntry, sitesBySlug) {
|
||||
return Promise.map(sources, async (source) => {
|
||||
// const [scraperSlug, scraper] = source;
|
||||
const profileScrapers = [].concat(source).map(slug => ({ scraperSlug: slug, scraper: scrapers.actors[slug] }));
|
||||
return Promise.map(sources, async (source) => {
|
||||
// const [scraperSlug, scraper] = source;
|
||||
const profileScrapers = [].concat(source).map(slug => ({ scraperSlug: slug, scraper: scrapers.actors[slug] }));
|
||||
|
||||
try {
|
||||
return await profileScrapers.reduce(async (outcome, { scraper, scraperSlug }) => outcome.catch(async () => {
|
||||
if (!scraper) {
|
||||
logger.warn(`No profile profile scraper available for ${scraperSlug}`);
|
||||
throw Object.assign(new Error(`No profile scraper available for ${scraperSlug}`));
|
||||
}
|
||||
try {
|
||||
return await profileScrapers.reduce(async (outcome, { scraper, scraperSlug }) => outcome.catch(async () => {
|
||||
if (!scraper) {
|
||||
logger.warn(`No profile profile scraper available for ${scraperSlug}`);
|
||||
throw Object.assign(new Error(`No profile scraper available for ${scraperSlug}`));
|
||||
}
|
||||
|
||||
logger.verbose(`Searching '${actorName}' on ${scraperSlug}`);
|
||||
logger.verbose(`Searching '${actorName}' on ${scraperSlug}`);
|
||||
|
||||
const site = sitesBySlug[scraperSlug] || null;
|
||||
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, include);
|
||||
const site = sitesBySlug[scraperSlug] || null;
|
||||
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, include);
|
||||
|
||||
if (profile && typeof profile !== 'number') {
|
||||
logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`);
|
||||
if (profile && typeof profile !== 'number') {
|
||||
logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`);
|
||||
|
||||
return {
|
||||
...profile,
|
||||
name: actorName,
|
||||
scraper: scraperSlug,
|
||||
site,
|
||||
releases: profile.releases?.map(release => (typeof release === 'string'
|
||||
? { url: release, site }
|
||||
: { ...release, site: release.site || site }
|
||||
)),
|
||||
};
|
||||
}
|
||||
return {
|
||||
...profile,
|
||||
name: actorName,
|
||||
scraper: scraperSlug,
|
||||
site,
|
||||
releases: profile.releases?.map(release => (typeof release === 'string'
|
||||
? { url: release, site }
|
||||
: { ...release, site: release.site || site }
|
||||
)),
|
||||
};
|
||||
}
|
||||
|
||||
logger.verbose(`No profile for '${actorName}' available on ${scraperSlug}: ${profile}`);
|
||||
throw Object.assign(new Error(`Profile for ${actorName} not available on ${scraperSlug}`), { warn: false });
|
||||
}), Promise.reject(new Error()));
|
||||
} catch (error) {
|
||||
if (error.warn !== false) {
|
||||
logger.warn(`Error in scraper ${source}: ${error.message}`);
|
||||
// logger.error(error.stack);
|
||||
}
|
||||
}
|
||||
logger.verbose(`No profile for '${actorName}' available on ${scraperSlug}: ${profile}`);
|
||||
throw Object.assign(new Error(`Profile for ${actorName} not available on ${scraperSlug}`), { warn: false });
|
||||
}), Promise.reject(new Error()));
|
||||
} catch (error) {
|
||||
if (error.warn !== false) {
|
||||
logger.warn(`Error in scraper ${source}: ${error.message}`);
|
||||
// logger.error(error.stack);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
});
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeActors(actorNames) {
|
||||
return Promise.map(actorNames || argv.actors, async (actorName) => {
|
||||
try {
|
||||
const actorSlug = slugify(actorName);
|
||||
const actorEntry = await knex('actors').where({ slug: actorSlug }).first();
|
||||
const sources = argv.sources || config.profiles || Object.keys(scrapers.actors);
|
||||
return Promise.map(actorNames || argv.actors, async (actorName) => {
|
||||
try {
|
||||
const actorSlug = slugify(actorName);
|
||||
const actorEntry = await knex('actors').where({ slug: actorSlug }).first();
|
||||
const sources = argv.sources || config.profiles || Object.keys(scrapers.actors);
|
||||
|
||||
const finalSources = argv.withReleases ? sources.flat() : sources; // ignore race-to-success grouping when scenes are requested
|
||||
const finalSources = argv.withReleases ? sources.flat() : sources; // ignore race-to-success grouping when scenes are requested
|
||||
|
||||
const [siteEntries, networkEntries] = await Promise.all([
|
||||
knex('sites')
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
)
|
||||
.whereIn('sites.slug', finalSources.flat()),
|
||||
knex('networks').select('*').whereIn('slug', finalSources.flat()),
|
||||
]);
|
||||
const [siteEntries, networkEntries] = await Promise.all([
|
||||
knex('sites')
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
)
|
||||
.whereIn('sites.slug', finalSources.flat()),
|
||||
knex('networks').select('*').whereIn('slug', finalSources.flat()),
|
||||
]);
|
||||
|
||||
const sites = await curateSites(siteEntries, true);
|
||||
const networks = networkEntries.map(network => ({ ...network, isFallback: true }));
|
||||
const sitesBySlug = [].concat(networks, sites).reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
|
||||
const sites = await curateSites(siteEntries, true);
|
||||
const networks = networkEntries.map(network => ({ ...network, isFallback: true }));
|
||||
const sitesBySlug = [].concat(networks, sites).reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
|
||||
|
||||
const profiles = await scrapeProfiles(sources, actorName, actorEntry, sitesBySlug);
|
||||
const profile = await mergeProfiles(profiles, actorEntry);
|
||||
const profiles = await scrapeProfiles(sources, actorName, actorEntry, sitesBySlug);
|
||||
const profile = await mergeProfiles(profiles, actorEntry);
|
||||
|
||||
if (profile === null) {
|
||||
logger.warn(`Could not find profile for actor '${actorName}'`);
|
||||
if (profile === null) {
|
||||
logger.warn(`Could not find profile for actor '${actorName}'`);
|
||||
|
||||
if (argv.save && !actorEntry) {
|
||||
await storeActor({ name: actorName }, false, false);
|
||||
}
|
||||
if (argv.save && !actorEntry) {
|
||||
await storeActor({ name: actorName }, false, false);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
if (argv.inspect) {
|
||||
console.log(profile);
|
||||
logger.info(`Found ${profile.releases.length} releases for ${actorName}`);
|
||||
}
|
||||
if (argv.inspect) {
|
||||
console.log(profile);
|
||||
logger.info(`Found ${profile.releases.length} releases for ${actorName}`);
|
||||
}
|
||||
|
||||
if (argv.save) {
|
||||
if (actorEntry && profile) {
|
||||
await Promise.all([
|
||||
updateActor(profile, true, true),
|
||||
storeAvatars(profile.avatars, actorEntry.id),
|
||||
]);
|
||||
if (argv.save) {
|
||||
if (actorEntry && profile) {
|
||||
await Promise.all([
|
||||
updateActor(profile, true, true),
|
||||
storeAvatars(profile.avatars, actorEntry.id),
|
||||
]);
|
||||
|
||||
return profile;
|
||||
}
|
||||
return profile;
|
||||
}
|
||||
|
||||
await storeActor(profile, true, true);
|
||||
}
|
||||
await storeActor(profile, true, true);
|
||||
}
|
||||
|
||||
return profile;
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
logger.warn(`${actorName}: ${error}`);
|
||||
return profile;
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
logger.warn(`${actorName}: ${error}`);
|
||||
|
||||
return null;
|
||||
}
|
||||
}, {
|
||||
concurrency: 3,
|
||||
});
|
||||
return null;
|
||||
}
|
||||
}, {
|
||||
concurrency: 3,
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeBasicActors() {
|
||||
const basicActors = await knex('actors').where('scraped_at', null);
|
||||
const basicActors = await knex('actors').where('scraped_at', null);
|
||||
|
||||
return scrapeActors(basicActors.map(actor => actor.name));
|
||||
return scrapeActors(basicActors.map(actor => actor.name));
|
||||
}
|
||||
|
||||
async function associateActors(mappedActors, releases) {
|
||||
const [existingActorEntries, existingAssociationEntries] = await Promise.all([
|
||||
knex('actors')
|
||||
.whereIn('name', Object.values(mappedActors).map(actor => actor.name))
|
||||
.orWhereIn('slug', Object.keys(mappedActors)),
|
||||
knex('releases_actors').whereIn('release_id', releases.map(release => release.id)),
|
||||
]);
|
||||
const [existingActorEntries, existingAssociationEntries] = await Promise.all([
|
||||
knex('actors')
|
||||
.whereIn('name', Object.values(mappedActors).map(actor => actor.name))
|
||||
.orWhereIn('slug', Object.keys(mappedActors)),
|
||||
knex('releases_actors').whereIn('release_id', releases.map(release => release.id)),
|
||||
]);
|
||||
|
||||
const associations = await Promise.map(Object.entries(mappedActors), async ([actorSlug, actor]) => {
|
||||
try {
|
||||
const actorEntry = existingActorEntries.find(actorX => actorX.slug === actorSlug)
|
||||
const associations = await Promise.map(Object.entries(mappedActors), async ([actorSlug, actor]) => {
|
||||
try {
|
||||
const actorEntry = existingActorEntries.find(actorX => actorX.slug === actorSlug)
|
||||
|| await storeActor(actor);
|
||||
|
||||
// if a scene
|
||||
return Array.from(actor.releaseIds)
|
||||
.map(releaseId => ({
|
||||
release_id: releaseId,
|
||||
actor_id: actorEntry.id,
|
||||
}))
|
||||
.filter(association => !existingAssociationEntries
|
||||
// remove associations already in database
|
||||
.some(associationEntry => associationEntry.actor_id === association.actor_id
|
||||
// if a scene
|
||||
return Array.from(actor.releaseIds)
|
||||
.map(releaseId => ({
|
||||
release_id: releaseId,
|
||||
actor_id: actorEntry.id,
|
||||
}))
|
||||
.filter(association => !existingAssociationEntries
|
||||
// remove associations already in database
|
||||
.some(associationEntry => associationEntry.actor_id === association.actor_id
|
||||
&& associationEntry.release_id === association.release_id));
|
||||
} catch (error) {
|
||||
logger.error(actor.name, error);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error(actor.name, error);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
|
||||
await knex('releases_actors').insert(associations.filter(association => association).flat());
|
||||
await knex('releases_actors').insert(associations.filter(association => association).flat());
|
||||
|
||||
// basic actor scraping is failure prone, don't run together with actor association
|
||||
// await scrapebasicactors(),
|
||||
// basic actor scraping is failure prone, don't run together with actor association
|
||||
// await scrapebasicactors(),
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
associateActors,
|
||||
fetchActors,
|
||||
scrapeActors,
|
||||
scrapeBasicActors,
|
||||
associateActors,
|
||||
fetchActors,
|
||||
scrapeActors,
|
||||
scrapeBasicActors,
|
||||
};
|
||||
|
||||
193
src/actors.js
193
src/actors.js
@@ -1,125 +1,156 @@
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const Promise = require('bluebird');
|
||||
|
||||
// const logger = require('./logger')(__filename);
|
||||
const knex = require('./knex');
|
||||
const scrapers = require('./scrapers/scrapers');
|
||||
|
||||
const argv = require('./argv');
|
||||
const slugify = require('./utils/slugify');
|
||||
const capitalize = require('./utils/capitalize');
|
||||
|
||||
function toBaseActors(actorsOrNames, release) {
|
||||
return actorsOrNames.map((actorOrName) => {
|
||||
const name = capitalize(actorOrName.name || actorOrName);
|
||||
const slug = slugify(name);
|
||||
return actorsOrNames.map((actorOrName) => {
|
||||
const name = capitalize(actorOrName.name || actorOrName);
|
||||
const slug = slugify(name);
|
||||
|
||||
const baseActor = {
|
||||
name,
|
||||
slug,
|
||||
network: release.site.network,
|
||||
};
|
||||
const baseActor = {
|
||||
name,
|
||||
slug,
|
||||
network: release?.site.network,
|
||||
};
|
||||
|
||||
if (actorOrName.name) {
|
||||
return {
|
||||
...actorOrName,
|
||||
...baseActor,
|
||||
};
|
||||
}
|
||||
if (actorOrName.name) {
|
||||
return {
|
||||
...actorOrName,
|
||||
...baseActor,
|
||||
};
|
||||
}
|
||||
|
||||
return baseActor;
|
||||
});
|
||||
return baseActor;
|
||||
});
|
||||
}
|
||||
|
||||
function curateActorEntry(baseActor, batchId) {
|
||||
return {
|
||||
name: baseActor.name,
|
||||
slug: baseActor.slug,
|
||||
network_id: null,
|
||||
batch_id: batchId,
|
||||
};
|
||||
return {
|
||||
name: baseActor.name,
|
||||
slug: baseActor.slug,
|
||||
network_id: null,
|
||||
batch_id: batchId,
|
||||
};
|
||||
}
|
||||
|
||||
function curateActorEntries(baseActors, batchId) {
|
||||
return baseActors.map(baseActor => curateActorEntry(baseActor, batchId));
|
||||
return baseActors.map(baseActor => curateActorEntry(baseActor, batchId));
|
||||
}
|
||||
|
||||
async function scrapeProfiles() {
|
||||
async function scrapeActors(actorNames) {
|
||||
const baseActors = toBaseActors(actorNames);
|
||||
|
||||
const sources = argv.sources || config.profiles || Object.keys(scrapers.actors);
|
||||
const siteSlugs = sources.flat();
|
||||
|
||||
const [networks, sites, existingActorEntries] = await Promise.all([
|
||||
knex('networks').whereIn('slug', siteSlugs),
|
||||
knex('sites').whereIn('slug', siteSlugs),
|
||||
knex('actors')
|
||||
.select(['id', 'name', 'slug'])
|
||||
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
|
||||
.whereNull('network_id'),
|
||||
]);
|
||||
|
||||
const existingActorEntriesBySlug = existingActorEntries.reduce((acc, actorEntry) => ({ ...acc, [actorEntry.slug]: actorEntry }), {});
|
||||
const networksBySlug = networks.reduce((acc, network) => ({ ...acc, [network.slug]: { ...network, isNetwork: true } }), {});
|
||||
const sitesBySlug = sites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
|
||||
|
||||
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlug[baseActor.slug]);
|
||||
|
||||
const [batchId] = newBaseActors.length > 0 ? await knex('batches').insert({ comment: null }).returning('id') : [null];
|
||||
const curatedActorEntries = batchId && curateActorEntries(newBaseActors, batchId);
|
||||
const newActorEntries = batchId && await knex('actors').insert(curatedActorEntries).returning(['id', 'name', 'slug']);
|
||||
|
||||
const actorEntries = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []);
|
||||
|
||||
console.log(actorEntries, newActorEntries, actorEntries);
|
||||
}
|
||||
|
||||
async function getOrCreateActors(baseActors, batchId) {
|
||||
const existingActors = await knex('actors')
|
||||
.select('id', 'alias_for', 'name', 'slug', 'network_id')
|
||||
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
|
||||
.whereNull('network_id')
|
||||
.orWhereIn(['slug', 'network_id'], baseActors.map(baseActor => [baseActor.slug, baseActor.network.id]));
|
||||
const existingActors = await knex('actors')
|
||||
.select('id', 'alias_for', 'name', 'slug', 'network_id')
|
||||
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
|
||||
.whereNull('network_id')
|
||||
.orWhereIn(['slug', 'network_id'], baseActors.map(baseActor => [baseActor.slug, baseActor.network.id]));
|
||||
|
||||
// const existingActorSlugs = new Set(existingActors.map(actor => actor.slug));
|
||||
const existingActorSlugs = existingActors.reduce((acc, actor) => ({
|
||||
...acc,
|
||||
[actor.network_id]: {
|
||||
...acc[actor.network_id],
|
||||
[actor.slug]: true,
|
||||
},
|
||||
}), {});
|
||||
// const existingActorSlugs = new Set(existingActors.map(actor => actor.slug));
|
||||
const existingActorSlugs = existingActors.reduce((acc, actor) => ({
|
||||
...acc,
|
||||
[actor.network_id]: {
|
||||
...acc[actor.network_id],
|
||||
[actor.slug]: true,
|
||||
},
|
||||
}), {});
|
||||
|
||||
const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.network.id]?.[baseActor.slug] && !existingActorSlugs.null?.[baseActor.slug]);
|
||||
const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.network.id]?.[baseActor.slug] && !existingActorSlugs.null?.[baseActor.slug]);
|
||||
|
||||
const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId);
|
||||
const newActors = await knex('actors').insert(curatedActorEntries, ['id', 'alias_for', 'name', 'slug', 'network_id']);
|
||||
const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId);
|
||||
const newActors = await knex('actors').insert(curatedActorEntries, ['id', 'alias_for', 'name', 'slug', 'network_id']);
|
||||
|
||||
if (Array.isArray(newActors)) {
|
||||
return newActors.concat(existingActors);
|
||||
}
|
||||
if (Array.isArray(newActors)) {
|
||||
return newActors.concat(existingActors);
|
||||
}
|
||||
|
||||
return existingActors;
|
||||
return existingActors;
|
||||
}
|
||||
|
||||
async function associateActors(releases, batchId) {
|
||||
const baseActorsByReleaseId = releases.reduce((acc, release) => {
|
||||
if (release.actors) {
|
||||
acc[release.id] = toBaseActors(release.actors, release);
|
||||
}
|
||||
const baseActorsByReleaseId = releases.reduce((acc, release) => {
|
||||
if (release.actors) {
|
||||
acc[release.id] = toBaseActors(release.actors, release);
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
const baseActors = Object.values(baseActorsByReleaseId).flat();
|
||||
const baseActors = Object.values(baseActorsByReleaseId).flat();
|
||||
|
||||
if (baseActors.length === 0) {
|
||||
return;
|
||||
}
|
||||
if (baseActors.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const baseActorsBySlugAndNetworkId = baseActors.reduce((acc, baseActor) => ({
|
||||
...acc,
|
||||
[baseActor.slug]: {
|
||||
...acc[baseActor.slug],
|
||||
[baseActor.network.id]: baseActor,
|
||||
},
|
||||
}), {});
|
||||
const baseActorsBySlugAndNetworkId = baseActors.reduce((acc, baseActor) => ({
|
||||
...acc,
|
||||
[baseActor.slug]: {
|
||||
...acc[baseActor.slug],
|
||||
[baseActor.network.id]: baseActor,
|
||||
},
|
||||
}), {});
|
||||
|
||||
const uniqueBaseActors = Object.values(baseActorsBySlugAndNetworkId).map(baseActorsByNetworkId => Object.values(baseActorsByNetworkId)).flat();
|
||||
const uniqueBaseActors = Object.values(baseActorsBySlugAndNetworkId).map(baseActorsByNetworkId => Object.values(baseActorsByNetworkId)).flat();
|
||||
|
||||
const actors = await getOrCreateActors(uniqueBaseActors, batchId);
|
||||
console.log(actors);
|
||||
const actorIdsBySlugAndNetworkId = actors.reduce((acc, actor) => ({
|
||||
...acc,
|
||||
[actor.network_id]: {
|
||||
...acc[actor.network_id],
|
||||
[actor.slug]: actor.alias_for || actor.id,
|
||||
},
|
||||
}), {});
|
||||
const actors = await getOrCreateActors(uniqueBaseActors, batchId);
|
||||
|
||||
console.log(actorIdsBySlugAndNetworkId);
|
||||
const actorIdsBySlugAndNetworkId = actors.reduce((acc, actor) => ({
|
||||
...acc,
|
||||
[actor.network_id]: {
|
||||
...acc[actor.network_id],
|
||||
[actor.slug]: actor.alias_for || actor.id,
|
||||
},
|
||||
}), {});
|
||||
|
||||
const releaseActorAssociations = Object.entries(baseActorsByReleaseId)
|
||||
.map(([releaseId, releaseActors]) => releaseActors
|
||||
.map(releaseActor => ({
|
||||
release_id: releaseId,
|
||||
actor_id: actorIdsBySlugAndNetworkId[releaseActor.network.id]?.[releaseActor.slug] || actorIdsBySlugAndNetworkId.null[releaseActor.slug],
|
||||
})))
|
||||
.flat();
|
||||
const releaseActorAssociations = Object.entries(baseActorsByReleaseId)
|
||||
.map(([releaseId, releaseActors]) => releaseActors
|
||||
.map(releaseActor => ({
|
||||
release_id: releaseId,
|
||||
actor_id: actorIdsBySlugAndNetworkId[releaseActor.network.id]?.[releaseActor.slug] || actorIdsBySlugAndNetworkId.null[releaseActor.slug],
|
||||
})))
|
||||
.flat();
|
||||
|
||||
await knex.raw(`${knex('releases_actors').insert(releaseActorAssociations).toString()} ON CONFLICT DO NOTHING;`);
|
||||
await knex.raw(`${knex('releases_actors').insert(releaseActorAssociations).toString()} ON CONFLICT DO NOTHING;`);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
associateActors,
|
||||
associateActors,
|
||||
scrapeActors,
|
||||
};
|
||||
|
||||
48
src/app.js
48
src/app.js
@@ -7,39 +7,39 @@ const knex = require('./knex');
|
||||
const fetchUpdates = require('./updates');
|
||||
const { fetchScenes, fetchMovies } = require('./deep');
|
||||
const { storeReleases, updateReleasesSearch } = require('./store-releases');
|
||||
const { scrapeActors } = require('./actors-legacy');
|
||||
const { scrapeActors } = require('./actors');
|
||||
|
||||
async function init() {
|
||||
if (argv.server) {
|
||||
await initServer();
|
||||
return;
|
||||
}
|
||||
if (argv.server) {
|
||||
await initServer();
|
||||
return;
|
||||
}
|
||||
|
||||
if (argv.updateSearch) {
|
||||
await updateReleasesSearch();
|
||||
}
|
||||
if (argv.updateSearch) {
|
||||
await updateReleasesSearch();
|
||||
}
|
||||
|
||||
if (argv.actors) {
|
||||
await scrapeActors(argv.actors);
|
||||
}
|
||||
if (argv.actors) {
|
||||
await scrapeActors(argv.actors);
|
||||
}
|
||||
|
||||
const updateBaseScenes = (argv.scrape || argv.sites || argv.networks) && await fetchUpdates();
|
||||
const updateBaseScenes = (argv.scrape || argv.sites || argv.networks) && await fetchUpdates();
|
||||
|
||||
const deepScenes = argv.deep
|
||||
? await fetchScenes([...(argv.scenes || []), ...(updateBaseScenes || [])])
|
||||
: updateBaseScenes;
|
||||
const deepScenes = argv.deep
|
||||
? await fetchScenes([...(argv.scenes || []), ...(updateBaseScenes || [])])
|
||||
: updateBaseScenes;
|
||||
|
||||
const sceneMovies = deepScenes && argv.sceneMovies && deepScenes.map(scene => scene.movie).filter(Boolean);
|
||||
const deepMovies = await fetchMovies([...(argv.movies || []), ...(sceneMovies || [])]);
|
||||
const sceneMovies = deepScenes && argv.sceneMovies && deepScenes.map(scene => scene.movie).filter(Boolean);
|
||||
const deepMovies = await fetchMovies([...(argv.movies || []), ...(sceneMovies || [])]);
|
||||
|
||||
if (argv.save) {
|
||||
await storeReleases([
|
||||
...(deepScenes || []),
|
||||
...(deepMovies || []),
|
||||
]);
|
||||
}
|
||||
if (argv.save) {
|
||||
await storeReleases([
|
||||
...(deepScenes || []),
|
||||
...(deepMovies || []),
|
||||
]);
|
||||
}
|
||||
|
||||
knex.destroy();
|
||||
knex.destroy();
|
||||
}
|
||||
|
||||
module.exports = init;
|
||||
|
||||
366
src/argv.js
366
src/argv.js
@@ -4,188 +4,188 @@ const config = require('config');
|
||||
const yargs = require('yargs');
|
||||
|
||||
const { argv } = yargs
|
||||
.command('npm start')
|
||||
.option('server', {
|
||||
describe: 'Start web server',
|
||||
type: 'boolean',
|
||||
alias: 'web',
|
||||
})
|
||||
.option('scrape', {
|
||||
describe: 'Scrape sites and networks defined in configuration',
|
||||
type: 'boolean',
|
||||
})
|
||||
.option('networks', {
|
||||
describe: 'Networks to scrape (overrides configuration)',
|
||||
type: 'array',
|
||||
alias: 'network',
|
||||
})
|
||||
.option('sites', {
|
||||
describe: 'Sites to scrape (overrides configuration)',
|
||||
type: 'array',
|
||||
alias: 'site',
|
||||
})
|
||||
.option('actors', {
|
||||
describe: 'Scrape actors by name or slug',
|
||||
type: 'array',
|
||||
alias: 'actor',
|
||||
})
|
||||
.option('actor-scenes', {
|
||||
describe: 'Fetch all scenes for an actor',
|
||||
type: 'boolean',
|
||||
alias: 'with-releases',
|
||||
default: false,
|
||||
})
|
||||
.option('movie-scenes', {
|
||||
describe: 'Fetch all scenes for a movie',
|
||||
type: 'boolean',
|
||||
alias: 'with-releases',
|
||||
default: false,
|
||||
})
|
||||
.option('scene-movies', {
|
||||
describe: 'Fetch movies for scenes',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('profiles', {
|
||||
describe: 'Scrape profiles for new actors after fetching scenes',
|
||||
type: 'boolean',
|
||||
alias: 'bios',
|
||||
default: false,
|
||||
})
|
||||
.option('scene', {
|
||||
describe: 'Scrape scene info from URL',
|
||||
type: 'array',
|
||||
alias: 'scenes',
|
||||
})
|
||||
.option('movie', {
|
||||
describe: 'Scrape movie info from URL',
|
||||
type: 'array',
|
||||
alias: 'movies',
|
||||
})
|
||||
.option('sources', {
|
||||
describe: 'Use these scrapers for actor data',
|
||||
type: 'array',
|
||||
alias: 'source',
|
||||
})
|
||||
.option('deep', {
|
||||
describe: 'Fetch details for all releases',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('latest', {
|
||||
describe: 'Scrape latest releases if available',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('upcoming', {
|
||||
describe: 'Scrape upcoming releases if available',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('redownload', {
|
||||
describe: 'Don\'t ignore duplicates, update existing entries',
|
||||
type: 'boolean',
|
||||
alias: 'force',
|
||||
})
|
||||
.option('after', {
|
||||
describe: 'Don\'t fetch scenes older than',
|
||||
type: 'string',
|
||||
default: config.fetchAfter.join(' '),
|
||||
})
|
||||
.option('last', {
|
||||
describe: 'Get the latest x releases, no matter the date range',
|
||||
type: 'number',
|
||||
})
|
||||
.option('null-date-limit', {
|
||||
describe: 'Limit amount of scenes when dates are missing.',
|
||||
type: 'number',
|
||||
default: config.nullDateLimit,
|
||||
alias: 'limit',
|
||||
})
|
||||
.option('page', {
|
||||
describe: 'Page to start scraping at',
|
||||
type: 'number',
|
||||
default: 1,
|
||||
})
|
||||
.option('save', {
|
||||
describe: 'Save fetched releases to database',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('media', {
|
||||
describe: 'Include any release media',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('media-limit', {
|
||||
describe: 'Maximum amount of assets of each type per release',
|
||||
type: 'number',
|
||||
default: config.media.limit,
|
||||
})
|
||||
.option('images', {
|
||||
describe: 'Include any photos, posters or covers',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
alias: 'pics',
|
||||
})
|
||||
.option('videos', {
|
||||
describe: 'Include any trailers or teasers',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('posters', {
|
||||
describe: 'Include release posters',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
alias: 'poster',
|
||||
})
|
||||
.option('covers', {
|
||||
describe: 'Include release covers',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
alias: 'cover',
|
||||
})
|
||||
.option('photos', {
|
||||
describe: 'Include release photos',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('trailers', {
|
||||
describe: 'Include release trailers',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
alias: 'trailer',
|
||||
})
|
||||
.option('teasers', {
|
||||
describe: 'Include release teasers',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
alias: 'teaser',
|
||||
})
|
||||
.option('avatars', {
|
||||
describe: 'Include actor avatars',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('inspect', {
|
||||
describe: 'Show data in console.',
|
||||
type: 'boolean',
|
||||
default: false,
|
||||
})
|
||||
.option('level', {
|
||||
describe: 'Log level',
|
||||
type: 'string',
|
||||
default: process.env.NODE_ENV === 'development' ? 'silly' : 'info',
|
||||
})
|
||||
.option('debug', {
|
||||
describe: 'Show error stack traces',
|
||||
type: 'boolean',
|
||||
default: process.env.NODE_ENV === 'development',
|
||||
})
|
||||
.option('update-search', {
|
||||
describe: 'Update search documents for all releases.',
|
||||
type: 'boolean',
|
||||
default: false,
|
||||
});
|
||||
.command('npm start')
|
||||
.option('server', {
|
||||
describe: 'Start web server',
|
||||
type: 'boolean',
|
||||
alias: 'web',
|
||||
})
|
||||
.option('scrape', {
|
||||
describe: 'Scrape sites and networks defined in configuration',
|
||||
type: 'boolean',
|
||||
})
|
||||
.option('networks', {
|
||||
describe: 'Networks to scrape (overrides configuration)',
|
||||
type: 'array',
|
||||
alias: 'network',
|
||||
})
|
||||
.option('sites', {
|
||||
describe: 'Sites to scrape (overrides configuration)',
|
||||
type: 'array',
|
||||
alias: 'site',
|
||||
})
|
||||
.option('actors', {
|
||||
describe: 'Scrape actors by name or slug',
|
||||
type: 'array',
|
||||
alias: 'actor',
|
||||
})
|
||||
.option('actor-scenes', {
|
||||
describe: 'Fetch all scenes for an actor',
|
||||
type: 'boolean',
|
||||
alias: 'with-releases',
|
||||
default: false,
|
||||
})
|
||||
.option('movie-scenes', {
|
||||
describe: 'Fetch all scenes for a movie',
|
||||
type: 'boolean',
|
||||
alias: 'with-releases',
|
||||
default: false,
|
||||
})
|
||||
.option('scene-movies', {
|
||||
describe: 'Fetch movies for scenes',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('profiles', {
|
||||
describe: 'Scrape profiles for new actors after fetching scenes',
|
||||
type: 'boolean',
|
||||
alias: 'bios',
|
||||
default: false,
|
||||
})
|
||||
.option('scene', {
|
||||
describe: 'Scrape scene info from URL',
|
||||
type: 'array',
|
||||
alias: 'scenes',
|
||||
})
|
||||
.option('movie', {
|
||||
describe: 'Scrape movie info from URL',
|
||||
type: 'array',
|
||||
alias: 'movies',
|
||||
})
|
||||
.option('sources', {
|
||||
describe: 'Use these scrapers for actor data',
|
||||
type: 'array',
|
||||
alias: 'source',
|
||||
})
|
||||
.option('deep', {
|
||||
describe: 'Fetch details for all releases',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('latest', {
|
||||
describe: 'Scrape latest releases if available',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('upcoming', {
|
||||
describe: 'Scrape upcoming releases if available',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('redownload', {
|
||||
describe: 'Don\'t ignore duplicates, update existing entries',
|
||||
type: 'boolean',
|
||||
alias: 'force',
|
||||
})
|
||||
.option('after', {
|
||||
describe: 'Don\'t fetch scenes older than',
|
||||
type: 'string',
|
||||
default: config.fetchAfter.join(' '),
|
||||
})
|
||||
.option('last', {
|
||||
describe: 'Get the latest x releases, no matter the date range',
|
||||
type: 'number',
|
||||
})
|
||||
.option('null-date-limit', {
|
||||
describe: 'Limit amount of scenes when dates are missing.',
|
||||
type: 'number',
|
||||
default: config.nullDateLimit,
|
||||
alias: 'limit',
|
||||
})
|
||||
.option('page', {
|
||||
describe: 'Page to start scraping at',
|
||||
type: 'number',
|
||||
default: 1,
|
||||
})
|
||||
.option('save', {
|
||||
describe: 'Save fetched releases to database',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('media', {
|
||||
describe: 'Include any release media',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('media-limit', {
|
||||
describe: 'Maximum amount of assets of each type per release',
|
||||
type: 'number',
|
||||
default: config.media.limit,
|
||||
})
|
||||
.option('images', {
|
||||
describe: 'Include any photos, posters or covers',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
alias: 'pics',
|
||||
})
|
||||
.option('videos', {
|
||||
describe: 'Include any trailers or teasers',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('posters', {
|
||||
describe: 'Include release posters',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
alias: 'poster',
|
||||
})
|
||||
.option('covers', {
|
||||
describe: 'Include release covers',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
alias: 'cover',
|
||||
})
|
||||
.option('photos', {
|
||||
describe: 'Include release photos',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('trailers', {
|
||||
describe: 'Include release trailers',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
alias: 'trailer',
|
||||
})
|
||||
.option('teasers', {
|
||||
describe: 'Include release teasers',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
alias: 'teaser',
|
||||
})
|
||||
.option('avatars', {
|
||||
describe: 'Include actor avatars',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('inspect', {
|
||||
describe: 'Show data in console.',
|
||||
type: 'boolean',
|
||||
default: false,
|
||||
})
|
||||
.option('level', {
|
||||
describe: 'Log level',
|
||||
type: 'string',
|
||||
default: process.env.NODE_ENV === 'development' ? 'silly' : 'info',
|
||||
})
|
||||
.option('debug', {
|
||||
describe: 'Show error stack traces',
|
||||
type: 'boolean',
|
||||
default: process.env.NODE_ENV === 'development',
|
||||
})
|
||||
.option('update-search', {
|
||||
describe: 'Update search documents for all releases.',
|
||||
type: 'boolean',
|
||||
default: false,
|
||||
});
|
||||
|
||||
module.exports = argv;
|
||||
|
||||
217
src/deep.js
217
src/deep.js
@@ -11,159 +11,160 @@ const { curateSites } = require('./sites');
|
||||
const { curateNetworks } = require('./networks');
|
||||
|
||||
function urlToSiteSlug(url) {
|
||||
try {
|
||||
const slug = new URL(url)
|
||||
.hostname
|
||||
.match(/([\w-]+)\.\w+$/)?.[1];
|
||||
try {
|
||||
const slug = new URL(url)
|
||||
.hostname
|
||||
.match(/([\w-]+)\.\w+$/)?.[1];
|
||||
|
||||
return slug;
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to derive site slug from '${url}': ${error.message}`);
|
||||
return slug;
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to derive site slug from '${url}': ${error.message}`);
|
||||
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function findSites(baseReleases) {
|
||||
const baseReleasesWithoutSite = baseReleases.filter(release => release.url && !release.site);
|
||||
const baseReleasesWithoutSite = baseReleases.filter(release => release.url && !release.site);
|
||||
|
||||
const siteSlugs = Array.from(new Set(
|
||||
baseReleasesWithoutSite
|
||||
.map(baseRelease => urlToSiteSlug(baseRelease.url))
|
||||
.filter(Boolean),
|
||||
));
|
||||
const siteSlugs = Array.from(new Set(
|
||||
baseReleasesWithoutSite
|
||||
.map(baseRelease => urlToSiteSlug(baseRelease.url))
|
||||
.filter(Boolean),
|
||||
));
|
||||
|
||||
const siteEntries = await knex('sites')
|
||||
.leftJoin('networks', 'networks.id', 'sites.network_id')
|
||||
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.parameters as network_parameters', 'networks.description as network_description')
|
||||
.whereIn('sites.slug', siteSlugs);
|
||||
const siteEntries = await knex('sites')
|
||||
.leftJoin('networks', 'networks.id', 'sites.network_id')
|
||||
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.parameters as network_parameters', 'networks.description as network_description')
|
||||
.whereIn('sites.slug', siteSlugs);
|
||||
|
||||
const networkEntries = await knex('networks').whereIn('slug', siteSlugs);
|
||||
const networkEntries = await knex('networks').whereIn('slug', siteSlugs);
|
||||
|
||||
const sites = await curateSites(siteEntries, true, false);
|
||||
const networks = await curateNetworks(networkEntries, true, false, false);
|
||||
const markedNetworks = networks.map(network => ({ ...network, isFallback: true }));
|
||||
const sites = await curateSites(siteEntries, true, false);
|
||||
const networks = await curateNetworks(networkEntries, true, false, false);
|
||||
const markedNetworks = networks.map(network => ({ ...network, isNetwork: true }));
|
||||
|
||||
const sitesBySlug = []
|
||||
.concat(markedNetworks, sites)
|
||||
.reduce((accSites, site) => ({ ...accSites, [site.slug]: site }), {});
|
||||
const sitesBySlug = []
|
||||
.concat(markedNetworks, sites)
|
||||
.reduce((accSites, site) => ({ ...accSites, [site.slug]: site }), {});
|
||||
|
||||
return sitesBySlug;
|
||||
return sitesBySlug;
|
||||
}
|
||||
|
||||
function toBaseReleases(baseReleasesOrUrls) {
|
||||
return baseReleasesOrUrls
|
||||
.map((baseReleaseOrUrl) => {
|
||||
if (baseReleaseOrUrl.url) {
|
||||
// base release with URL
|
||||
return {
|
||||
...baseReleaseOrUrl,
|
||||
deep: false,
|
||||
};
|
||||
}
|
||||
return baseReleasesOrUrls
|
||||
.map((baseReleaseOrUrl) => {
|
||||
if (baseReleaseOrUrl.url) {
|
||||
// base release with URL
|
||||
return {
|
||||
...baseReleaseOrUrl,
|
||||
deep: false,
|
||||
};
|
||||
}
|
||||
|
||||
if (/^http/.test(baseReleaseOrUrl)) {
|
||||
// URL
|
||||
return {
|
||||
url: baseReleaseOrUrl,
|
||||
deep: false,
|
||||
};
|
||||
}
|
||||
if (/^http/.test(baseReleaseOrUrl)) {
|
||||
// URL
|
||||
return {
|
||||
url: baseReleaseOrUrl,
|
||||
deep: false,
|
||||
};
|
||||
}
|
||||
|
||||
if (typeof baseReleaseOrUrl === 'object' && !Array.isArray(baseReleaseOrUrl)) {
|
||||
// base release without URL, prepare for passthrough
|
||||
return {
|
||||
...baseReleaseOrUrl,
|
||||
deep: false,
|
||||
};
|
||||
}
|
||||
if (typeof baseReleaseOrUrl === 'object' && !Array.isArray(baseReleaseOrUrl)) {
|
||||
// base release without URL, prepare for passthrough
|
||||
return {
|
||||
...baseReleaseOrUrl,
|
||||
deep: false,
|
||||
};
|
||||
}
|
||||
|
||||
logger.warn(`Malformed base release, discarding '${baseReleaseOrUrl}'`);
|
||||
return null;
|
||||
})
|
||||
.filter(Boolean);
|
||||
logger.warn(`Malformed base release, discarding '${baseReleaseOrUrl}'`);
|
||||
return null;
|
||||
})
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
async function scrapeRelease(baseRelease, sites, type = 'scene') {
|
||||
const site = baseRelease.site || sites[urlToSiteSlug(baseRelease.url)];
|
||||
const site = baseRelease.site || sites[urlToSiteSlug(baseRelease.url)];
|
||||
const siteWithFallbackNetwork = site.isNetwork ? { ...site, network: site } : site; // make site.network available, even when site is network fallback
|
||||
|
||||
if (!site) {
|
||||
logger.warn(`No site available for ${baseRelease.url}`);
|
||||
return baseRelease;
|
||||
}
|
||||
if (!site) {
|
||||
logger.warn(`No site available for ${baseRelease.url}`);
|
||||
return baseRelease;
|
||||
}
|
||||
|
||||
if ((!baseRelease.url && !baseRelease.path) || !argv.deep) {
|
||||
return {
|
||||
...baseRelease,
|
||||
site,
|
||||
};
|
||||
}
|
||||
if ((!baseRelease.url && !baseRelease.path) || !argv.deep) {
|
||||
return {
|
||||
...baseRelease,
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
||||
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
||||
|
||||
if (!scraper) {
|
||||
logger.warn(`Could not find scraper for ${baseRelease.url}`);
|
||||
return baseRelease;
|
||||
}
|
||||
if (!scraper) {
|
||||
logger.warn(`Could not find scraper for ${baseRelease.url}`);
|
||||
return baseRelease;
|
||||
}
|
||||
|
||||
if ((type === 'scene' && !scraper.fetchScene) || (type === 'movie' && !scraper.fetchMovie)) {
|
||||
logger.warn(`The '${site.name}'-scraper cannot fetch individual ${type}s`);
|
||||
return baseRelease;
|
||||
}
|
||||
if ((type === 'scene' && !scraper.fetchScene) || (type === 'movie' && !scraper.fetchMovie)) {
|
||||
logger.warn(`The '${site.name}'-scraper cannot fetch individual ${type}s`);
|
||||
return baseRelease;
|
||||
}
|
||||
|
||||
try {
|
||||
logger.verbose(`Fetching ${type} ${baseRelease.url}`);
|
||||
try {
|
||||
logger.verbose(`Fetching ${type} ${baseRelease.url}`);
|
||||
|
||||
const scrapedRelease = type === 'scene'
|
||||
? await scraper.fetchScene(baseRelease.url, site, baseRelease, null, include)
|
||||
: await scraper.fetchMovie(baseRelease.url, site, baseRelease, null, include);
|
||||
const scrapedRelease = type === 'scene'
|
||||
? await scraper.fetchScene(baseRelease.url, siteWithFallbackNetwork, baseRelease, null, include)
|
||||
: await scraper.fetchMovie(baseRelease.url, siteWithFallbackNetwork, baseRelease, null, include);
|
||||
|
||||
const mergedRelease = {
|
||||
...baseRelease,
|
||||
...scrapedRelease,
|
||||
deep: !!scrapedRelease,
|
||||
site,
|
||||
};
|
||||
const mergedRelease = {
|
||||
...baseRelease,
|
||||
...scrapedRelease,
|
||||
deep: !!scrapedRelease,
|
||||
site,
|
||||
};
|
||||
|
||||
if (scrapedRelease && baseRelease?.tags) {
|
||||
// accumulate all available tags
|
||||
mergedRelease.tags = baseRelease.tags.concat(scrapedRelease.tags);
|
||||
}
|
||||
if (scrapedRelease && baseRelease?.tags) {
|
||||
// accumulate all available tags
|
||||
mergedRelease.tags = baseRelease.tags.concat(scrapedRelease.tags);
|
||||
}
|
||||
|
||||
return mergedRelease;
|
||||
} catch (error) {
|
||||
logger.error(`Deep scrape failed for ${baseRelease.url}: ${error.message}`);
|
||||
return baseRelease;
|
||||
}
|
||||
return mergedRelease;
|
||||
} catch (error) {
|
||||
logger.error(`Deep scrape failed for ${baseRelease.url}: ${error.message}`);
|
||||
return baseRelease;
|
||||
}
|
||||
}
|
||||
|
||||
async function scrapeReleases(baseReleases, sites, type) {
|
||||
return Promise.map(
|
||||
baseReleases,
|
||||
async baseRelease => scrapeRelease(baseRelease, sites, type),
|
||||
{ concurrency: 10 },
|
||||
);
|
||||
return Promise.map(
|
||||
baseReleases,
|
||||
async baseRelease => scrapeRelease(baseRelease, sites, type),
|
||||
{ concurrency: 10 },
|
||||
);
|
||||
}
|
||||
|
||||
async function fetchReleases(baseReleasesOrUrls, type = 'scene') {
|
||||
const baseReleases = toBaseReleases(baseReleasesOrUrls);
|
||||
const sites = await findSites(baseReleases);
|
||||
const baseReleases = toBaseReleases(baseReleasesOrUrls);
|
||||
const sites = await findSites(baseReleases);
|
||||
|
||||
const deepReleases = await scrapeReleases(baseReleases, sites, type);
|
||||
const deepReleases = await scrapeReleases(baseReleases, sites, type);
|
||||
|
||||
return deepReleases;
|
||||
return deepReleases;
|
||||
}
|
||||
|
||||
async function fetchScenes(baseReleasesOrUrls) {
|
||||
return fetchReleases(baseReleasesOrUrls, 'scene');
|
||||
return fetchReleases(baseReleasesOrUrls, 'scene');
|
||||
}
|
||||
|
||||
async function fetchMovies(baseReleasesOrUrls) {
|
||||
return fetchReleases(baseReleasesOrUrls, 'movie');
|
||||
return fetchReleases(baseReleasesOrUrls, 'movie');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchReleases,
|
||||
fetchScenes,
|
||||
fetchMovies,
|
||||
fetchReleases,
|
||||
fetchScenes,
|
||||
fetchMovies,
|
||||
};
|
||||
|
||||
@@ -4,8 +4,8 @@ const config = require('config');
|
||||
const knex = require('knex');
|
||||
|
||||
module.exports = knex({
|
||||
client: 'pg',
|
||||
connection: config.database,
|
||||
// performance overhead, don't use asyncStackTraces in production
|
||||
asyncStackTraces: process.env.NODE_ENV === 'development',
|
||||
client: 'pg',
|
||||
connection: config.database,
|
||||
// performance overhead, don't use asyncStackTraces in production
|
||||
asyncStackTraces: process.env.NODE_ENV === 'development',
|
||||
});
|
||||
|
||||
@@ -9,31 +9,31 @@ require('winston-daily-rotate-file');
|
||||
const args = require('./argv');
|
||||
|
||||
function logger(filepath) {
|
||||
const root = filepath.match(/src\/|dist\//);
|
||||
const filename = filepath.slice(root.index + root[0].length)
|
||||
.replace(path.extname(filepath), '');
|
||||
const root = filepath.match(/src\/|dist\//);
|
||||
const filename = filepath.slice(root.index + root[0].length)
|
||||
.replace(path.extname(filepath), '');
|
||||
|
||||
return winston.createLogger({
|
||||
format: winston.format.combine(
|
||||
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
|
||||
winston.format(info => (info instanceof Error
|
||||
? { ...info, message: info.stack }
|
||||
: { ...info, message: typeof info.message === 'string' ? info.message : util.inspect(info.message) }))(),
|
||||
winston.format.colorize(),
|
||||
winston.format.printf(({ level, timestamp, label, message }) => `${timestamp} ${level} [${label || filename}] ${message}`),
|
||||
),
|
||||
transports: [
|
||||
new winston.transports.Console({
|
||||
level: args.level,
|
||||
timestamp: true,
|
||||
}),
|
||||
new winston.transports.DailyRotateFile({
|
||||
datePattern: 'YYYY-MM-DD',
|
||||
filename: 'log/%DATE%.log',
|
||||
level: 'silly',
|
||||
}),
|
||||
],
|
||||
});
|
||||
return winston.createLogger({
|
||||
format: winston.format.combine(
|
||||
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
|
||||
winston.format(info => (info instanceof Error
|
||||
? { ...info, message: info.stack }
|
||||
: { ...info, message: typeof info.message === 'string' ? info.message : util.inspect(info.message) }))(),
|
||||
winston.format.colorize(),
|
||||
winston.format.printf(({ level, timestamp, label, message }) => `${timestamp} ${level} [${label || filename}] ${message}`),
|
||||
),
|
||||
transports: [
|
||||
new winston.transports.Console({
|
||||
level: args.level,
|
||||
timestamp: true,
|
||||
}),
|
||||
new winston.transports.DailyRotateFile({
|
||||
datePattern: 'YYYY-MM-DD',
|
||||
filename: 'log/%DATE%.log',
|
||||
level: 'silly',
|
||||
}),
|
||||
],
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = logger;
|
||||
|
||||
866
src/media.js
866
src/media.js
File diff suppressed because it is too large
Load Diff
@@ -5,77 +5,77 @@ const whereOr = require('./utils/where-or');
|
||||
const { fetchSites } = require('./sites');
|
||||
|
||||
async function curateNetwork(network, includeParameters = false, includeSites = true, includeStudios = false) {
|
||||
const curatedNetwork = {
|
||||
id: network.id,
|
||||
name: network.name,
|
||||
url: network.url,
|
||||
description: network.description,
|
||||
slug: network.slug,
|
||||
parameters: includeParameters ? network.parameters : null,
|
||||
};
|
||||
const curatedNetwork = {
|
||||
id: network.id,
|
||||
name: network.name,
|
||||
url: network.url,
|
||||
description: network.description,
|
||||
slug: network.slug,
|
||||
parameters: includeParameters ? network.parameters : null,
|
||||
};
|
||||
|
||||
if (includeSites) {
|
||||
curatedNetwork.sites = await fetchSites({ network_id: network.id });
|
||||
}
|
||||
if (includeSites) {
|
||||
curatedNetwork.sites = await fetchSites({ network_id: network.id });
|
||||
}
|
||||
|
||||
if (includeStudios) {
|
||||
const studios = await knex('studios').where({ network_id: network.id });
|
||||
if (includeStudios) {
|
||||
const studios = await knex('studios').where({ network_id: network.id });
|
||||
|
||||
curatedNetwork.studios = studios.map(studio => ({
|
||||
id: studio.id,
|
||||
name: studio.name,
|
||||
url: studio.url,
|
||||
description: studio.description,
|
||||
slug: studio.slug,
|
||||
}));
|
||||
}
|
||||
curatedNetwork.studios = studios.map(studio => ({
|
||||
id: studio.id,
|
||||
name: studio.name,
|
||||
url: studio.url,
|
||||
description: studio.description,
|
||||
slug: studio.slug,
|
||||
}));
|
||||
}
|
||||
|
||||
return curatedNetwork;
|
||||
return curatedNetwork;
|
||||
}
|
||||
|
||||
function curateNetworks(releases) {
|
||||
return Promise.all(releases.map(async release => curateNetwork(release)));
|
||||
return Promise.all(releases.map(async release => curateNetwork(release)));
|
||||
}
|
||||
|
||||
async function findNetworkByUrl(url) {
|
||||
const { hostname } = new URL(url);
|
||||
const domain = hostname.replace(/^www./, '');
|
||||
const { hostname } = new URL(url);
|
||||
const domain = hostname.replace(/^www./, '');
|
||||
|
||||
const network = await knex('networks')
|
||||
.where('networks.url', 'like', `%${domain}`)
|
||||
.orWhere('networks.url', url)
|
||||
.first();
|
||||
const network = await knex('networks')
|
||||
.where('networks.url', 'like', `%${domain}`)
|
||||
.orWhere('networks.url', url)
|
||||
.first();
|
||||
|
||||
if (network) {
|
||||
return curateNetwork(network, true);
|
||||
}
|
||||
if (network) {
|
||||
return curateNetwork(network, true);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchNetworks(queryObject) {
|
||||
const releases = await knex('networks')
|
||||
.where(builder => whereOr(queryObject, 'networks', builder))
|
||||
.limit(100);
|
||||
const releases = await knex('networks')
|
||||
.where(builder => whereOr(queryObject, 'networks', builder))
|
||||
.limit(100);
|
||||
|
||||
return curateNetworks(releases);
|
||||
return curateNetworks(releases);
|
||||
}
|
||||
|
||||
async function fetchNetworksFromReleases() {
|
||||
const releases = await knex('releases')
|
||||
.select('site_id', '')
|
||||
.leftJoin('sites', 'sites.id', 'releases.site_id')
|
||||
.leftJoin('networks', 'networks.id', 'sites.network_id')
|
||||
.groupBy('networks.id')
|
||||
.limit(100);
|
||||
const releases = await knex('releases')
|
||||
.select('site_id', '')
|
||||
.leftJoin('sites', 'sites.id', 'releases.site_id')
|
||||
.leftJoin('networks', 'networks.id', 'sites.network_id')
|
||||
.groupBy('networks.id')
|
||||
.limit(100);
|
||||
|
||||
return curateNetworks(releases);
|
||||
return curateNetworks(releases);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
curateNetwork,
|
||||
curateNetworks,
|
||||
fetchNetworks,
|
||||
fetchNetworksFromReleases,
|
||||
findNetworkByUrl,
|
||||
curateNetwork,
|
||||
curateNetworks,
|
||||
fetchNetworks,
|
||||
fetchNetworksFromReleases,
|
||||
findNetworkByUrl,
|
||||
};
|
||||
|
||||
@@ -11,356 +11,356 @@ const whereOr = require('./utils/where-or');
|
||||
const { associateTags } = require('./tags');
|
||||
const { associateActors, scrapeBasicActors } = require('./actors');
|
||||
const {
|
||||
pluckItems,
|
||||
storeMedia,
|
||||
associateMedia,
|
||||
pluckItems,
|
||||
storeMedia,
|
||||
associateMedia,
|
||||
} = require('./media');
|
||||
const { fetchSites } = require('./sites');
|
||||
const slugify = require('./utils/slugify');
|
||||
const capitalize = require('./utils/capitalize');
|
||||
|
||||
function commonQuery(queryBuilder, {
|
||||
filter = [],
|
||||
after = new Date(0), // January 1970
|
||||
before = new Date(2 ** 44), // May 2109
|
||||
limit = 100,
|
||||
filter = [],
|
||||
after = new Date(0), // January 1970
|
||||
before = new Date(2 ** 44), // May 2109
|
||||
limit = 100,
|
||||
}) {
|
||||
const finalFilter = [].concat(filter); // ensure filter is array
|
||||
const finalFilter = [].concat(filter); // ensure filter is array
|
||||
|
||||
queryBuilder
|
||||
.leftJoin('sites', 'releases.site_id', 'sites.id')
|
||||
.leftJoin('studios', 'releases.studio_id', 'studios.id')
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.select(
|
||||
'releases.*',
|
||||
'sites.name as site_name', 'sites.slug as site_slug', 'sites.url as site_url', 'sites.network_id', 'sites.parameters as site_parameters',
|
||||
'studios.name as studio_name', 'sites.slug as site_slug', 'studios.url as studio_url',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description',
|
||||
)
|
||||
.whereNotExists((builder) => {
|
||||
// apply tag filters
|
||||
builder
|
||||
.select('*')
|
||||
.from('tags_associated')
|
||||
.leftJoin('tags', 'tags_associated.tag_id', 'tags.id')
|
||||
.whereIn('tags.slug', finalFilter)
|
||||
.where('tags_associated.domain', 'releases')
|
||||
.whereRaw('tags_associated.target_id = releases.id');
|
||||
})
|
||||
.andWhere('releases.date', '>', after)
|
||||
.andWhere('releases.date', '<=', before)
|
||||
.orderBy([{ column: 'date', order: 'desc' }, { column: 'created_at', order: 'desc' }])
|
||||
.limit(limit);
|
||||
queryBuilder
|
||||
.leftJoin('sites', 'releases.site_id', 'sites.id')
|
||||
.leftJoin('studios', 'releases.studio_id', 'studios.id')
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.select(
|
||||
'releases.*',
|
||||
'sites.name as site_name', 'sites.slug as site_slug', 'sites.url as site_url', 'sites.network_id', 'sites.parameters as site_parameters',
|
||||
'studios.name as studio_name', 'sites.slug as site_slug', 'studios.url as studio_url',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description',
|
||||
)
|
||||
.whereNotExists((builder) => {
|
||||
// apply tag filters
|
||||
builder
|
||||
.select('*')
|
||||
.from('tags_associated')
|
||||
.leftJoin('tags', 'tags_associated.tag_id', 'tags.id')
|
||||
.whereIn('tags.slug', finalFilter)
|
||||
.where('tags_associated.domain', 'releases')
|
||||
.whereRaw('tags_associated.target_id = releases.id');
|
||||
})
|
||||
.andWhere('releases.date', '>', after)
|
||||
.andWhere('releases.date', '<=', before)
|
||||
.orderBy([{ column: 'date', order: 'desc' }, { column: 'created_at', order: 'desc' }])
|
||||
.limit(limit);
|
||||
}
|
||||
|
||||
async function curateRelease(release) {
|
||||
const [actors, tags, media] = await Promise.all([
|
||||
knex('actors_associated')
|
||||
.select(
|
||||
'actors.id', 'actors.name', 'actors.gender', 'actors.slug', 'actors.birthdate',
|
||||
'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'birth_countries.alias as birth_country_alias',
|
||||
'media.thumbnail as avatar',
|
||||
)
|
||||
.where({ release_id: release.id })
|
||||
.leftJoin('actors', 'actors.id', 'actors_associated.actor_id')
|
||||
.leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2')
|
||||
.leftJoin('media', (builder) => {
|
||||
builder
|
||||
.on('media.target_id', 'actors.id')
|
||||
.andOnVal('media.domain', 'actors')
|
||||
.andOnVal('media.index', '0');
|
||||
})
|
||||
.orderBy('actors.gender'),
|
||||
knex('tags_associated')
|
||||
.select('tags.name', 'tags.slug')
|
||||
.where({
|
||||
domain: 'releases',
|
||||
target_id: release.id,
|
||||
})
|
||||
.leftJoin('tags', 'tags.id', 'tags_associated.tag_id')
|
||||
.orderBy('tags.priority', 'desc'),
|
||||
knex('media')
|
||||
.where({
|
||||
target_id: release.id,
|
||||
domain: 'releases',
|
||||
})
|
||||
.orderBy(['role', 'index']),
|
||||
]);
|
||||
const [actors, tags, media] = await Promise.all([
|
||||
knex('actors_associated')
|
||||
.select(
|
||||
'actors.id', 'actors.name', 'actors.gender', 'actors.slug', 'actors.birthdate',
|
||||
'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'birth_countries.alias as birth_country_alias',
|
||||
'media.thumbnail as avatar',
|
||||
)
|
||||
.where({ release_id: release.id })
|
||||
.leftJoin('actors', 'actors.id', 'actors_associated.actor_id')
|
||||
.leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2')
|
||||
.leftJoin('media', (builder) => {
|
||||
builder
|
||||
.on('media.target_id', 'actors.id')
|
||||
.andOnVal('media.domain', 'actors')
|
||||
.andOnVal('media.index', '0');
|
||||
})
|
||||
.orderBy('actors.gender'),
|
||||
knex('tags_associated')
|
||||
.select('tags.name', 'tags.slug')
|
||||
.where({
|
||||
domain: 'releases',
|
||||
target_id: release.id,
|
||||
})
|
||||
.leftJoin('tags', 'tags.id', 'tags_associated.tag_id')
|
||||
.orderBy('tags.priority', 'desc'),
|
||||
knex('media')
|
||||
.where({
|
||||
target_id: release.id,
|
||||
domain: 'releases',
|
||||
})
|
||||
.orderBy(['role', 'index']),
|
||||
]);
|
||||
|
||||
const curatedRelease = {
|
||||
id: release.id,
|
||||
type: release.type,
|
||||
title: release.title,
|
||||
date: release.date,
|
||||
dateAdded: release.created_at,
|
||||
description: release.description,
|
||||
url: release.url,
|
||||
shootId: release.shoot_id,
|
||||
entryId: release.entry_id,
|
||||
actors: actors.map(actor => ({
|
||||
id: actor.id,
|
||||
slug: actor.slug,
|
||||
name: actor.name,
|
||||
gender: actor.gender,
|
||||
birthdate: actor.birthdate,
|
||||
age: moment().diff(actor.birthdate, 'years'),
|
||||
ageThen: moment(release.date).diff(actor.birthdate, 'years'),
|
||||
avatar: actor.avatar,
|
||||
origin: actor.birth_country_alpha2
|
||||
? {
|
||||
country: {
|
||||
name: actor.birth_country_alias,
|
||||
alpha2: actor.birth_country_alpha2,
|
||||
},
|
||||
}
|
||||
: null,
|
||||
})),
|
||||
director: release.director,
|
||||
tags,
|
||||
duration: release.duration,
|
||||
photos: media.filter(item => item.role === 'photo'),
|
||||
poster: media.filter(item => item.role === 'poster')[0],
|
||||
covers: media.filter(item => item.role === 'cover'),
|
||||
trailer: media.filter(item => item.role === 'trailer')[0],
|
||||
site: {
|
||||
id: release.site_id,
|
||||
name: release.site_name,
|
||||
independent: !!release.site_parameters?.independent,
|
||||
slug: release.site_slug,
|
||||
url: release.site_url,
|
||||
},
|
||||
studio: release.studio_id
|
||||
? {
|
||||
id: release.studio_id,
|
||||
name: release.studio_name,
|
||||
slug: release.studio_slug,
|
||||
url: release.studio_url,
|
||||
}
|
||||
: null,
|
||||
network: {
|
||||
id: release.network_id,
|
||||
name: release.network_name,
|
||||
description: release.network_description,
|
||||
slug: release.network_slug,
|
||||
url: release.network_url,
|
||||
},
|
||||
};
|
||||
const curatedRelease = {
|
||||
id: release.id,
|
||||
type: release.type,
|
||||
title: release.title,
|
||||
date: release.date,
|
||||
dateAdded: release.created_at,
|
||||
description: release.description,
|
||||
url: release.url,
|
||||
shootId: release.shoot_id,
|
||||
entryId: release.entry_id,
|
||||
actors: actors.map(actor => ({
|
||||
id: actor.id,
|
||||
slug: actor.slug,
|
||||
name: actor.name,
|
||||
gender: actor.gender,
|
||||
birthdate: actor.birthdate,
|
||||
age: moment().diff(actor.birthdate, 'years'),
|
||||
ageThen: moment(release.date).diff(actor.birthdate, 'years'),
|
||||
avatar: actor.avatar,
|
||||
origin: actor.birth_country_alpha2
|
||||
? {
|
||||
country: {
|
||||
name: actor.birth_country_alias,
|
||||
alpha2: actor.birth_country_alpha2,
|
||||
},
|
||||
}
|
||||
: null,
|
||||
})),
|
||||
director: release.director,
|
||||
tags,
|
||||
duration: release.duration,
|
||||
photos: media.filter(item => item.role === 'photo'),
|
||||
poster: media.filter(item => item.role === 'poster')[0],
|
||||
covers: media.filter(item => item.role === 'cover'),
|
||||
trailer: media.filter(item => item.role === 'trailer')[0],
|
||||
site: {
|
||||
id: release.site_id,
|
||||
name: release.site_name,
|
||||
independent: !!release.site_parameters?.independent,
|
||||
slug: release.site_slug,
|
||||
url: release.site_url,
|
||||
},
|
||||
studio: release.studio_id
|
||||
? {
|
||||
id: release.studio_id,
|
||||
name: release.studio_name,
|
||||
slug: release.studio_slug,
|
||||
url: release.studio_url,
|
||||
}
|
||||
: null,
|
||||
network: {
|
||||
id: release.network_id,
|
||||
name: release.network_name,
|
||||
description: release.network_description,
|
||||
slug: release.network_slug,
|
||||
url: release.network_url,
|
||||
},
|
||||
};
|
||||
|
||||
return curatedRelease;
|
||||
return curatedRelease;
|
||||
}
|
||||
|
||||
function curateReleases(releases) {
|
||||
return Promise.all(releases.map(async release => curateRelease(release)));
|
||||
return Promise.all(releases.map(async release => curateRelease(release)));
|
||||
}
|
||||
|
||||
async function attachChannelSite(release) {
|
||||
if (!release.site?.isFallback && !release.channel?.force) {
|
||||
return release;
|
||||
}
|
||||
if (!release.site?.isFallback && !release.channel?.force) {
|
||||
return release;
|
||||
}
|
||||
|
||||
if (!release.channel) {
|
||||
throw new Error(`Unable to derive channel site from generic URL: ${release.url}`);
|
||||
}
|
||||
if (!release.channel) {
|
||||
throw new Error(`Unable to derive channel site from generic URL: ${release.url}`);
|
||||
}
|
||||
|
||||
const [site] = await fetchSites({
|
||||
name: release.channel.name || release.channel,
|
||||
slug: release.channel.slug || release.channel,
|
||||
});
|
||||
const [site] = await fetchSites({
|
||||
name: release.channel.name || release.channel,
|
||||
slug: release.channel.slug || release.channel,
|
||||
});
|
||||
|
||||
if (site) {
|
||||
return {
|
||||
...release,
|
||||
site,
|
||||
};
|
||||
}
|
||||
if (site) {
|
||||
return {
|
||||
...release,
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error(`Unable to match channel '${release.channel.slug || release.channel}' from generic URL: ${release.url}`);
|
||||
throw new Error(`Unable to match channel '${release.channel.slug || release.channel}' from generic URL: ${release.url}`);
|
||||
}
|
||||
|
||||
async function attachStudio(release) {
|
||||
if (!release.studio) {
|
||||
return release;
|
||||
}
|
||||
if (!release.studio) {
|
||||
return release;
|
||||
}
|
||||
|
||||
const studio = await knex('studios')
|
||||
.where('name', release.studio)
|
||||
.orWhere('slug', release.studio)
|
||||
.orWhere('url', release.studio)
|
||||
.first();
|
||||
const studio = await knex('studios')
|
||||
.where('name', release.studio)
|
||||
.orWhere('slug', release.studio)
|
||||
.orWhere('url', release.studio)
|
||||
.first();
|
||||
|
||||
return {
|
||||
...release,
|
||||
studio,
|
||||
};
|
||||
return {
|
||||
...release,
|
||||
studio,
|
||||
};
|
||||
}
|
||||
|
||||
async function curateReleaseEntry(release, batchId, existingRelease) {
|
||||
const slug = slugify(release.title, {
|
||||
encode: true,
|
||||
limit: config.titleSlugLength,
|
||||
});
|
||||
const slug = slugify(release.title, {
|
||||
encode: true,
|
||||
limit: config.titleSlugLength,
|
||||
});
|
||||
|
||||
const curatedRelease = {
|
||||
site_id: release.site.id,
|
||||
studio_id: release.studio ? release.studio.id : null,
|
||||
shoot_id: release.shootId || null,
|
||||
entry_id: release.entryId || null,
|
||||
type: release.type,
|
||||
url: release.url,
|
||||
title: release.title,
|
||||
slug,
|
||||
date: release.date,
|
||||
description: release.description,
|
||||
// director: release.director,
|
||||
duration: release.duration,
|
||||
// likes: release.rating && release.rating.likes,
|
||||
// dislikes: release.rating && release.rating.dislikes,
|
||||
// rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
|
||||
deep: typeof release.deep === 'boolean' ? release.deep : false,
|
||||
deep_url: release.deepUrl,
|
||||
updated_batch_id: batchId,
|
||||
...(!existingRelease && { created_batch_id: batchId }),
|
||||
};
|
||||
const curatedRelease = {
|
||||
site_id: release.site.id,
|
||||
studio_id: release.studio ? release.studio.id : null,
|
||||
shoot_id: release.shootId || null,
|
||||
entry_id: release.entryId || null,
|
||||
type: release.type,
|
||||
url: release.url,
|
||||
title: release.title,
|
||||
slug,
|
||||
date: release.date,
|
||||
description: release.description,
|
||||
// director: release.director,
|
||||
duration: release.duration,
|
||||
// likes: release.rating && release.rating.likes,
|
||||
// dislikes: release.rating && release.rating.dislikes,
|
||||
// rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
|
||||
deep: typeof release.deep === 'boolean' ? release.deep : false,
|
||||
deep_url: release.deepUrl,
|
||||
updated_batch_id: batchId,
|
||||
...(!existingRelease && { created_batch_id: batchId }),
|
||||
};
|
||||
|
||||
return curatedRelease;
|
||||
return curatedRelease;
|
||||
}
|
||||
|
||||
async function fetchReleases(queryObject = {}, options = {}) {
|
||||
const releases = await knex('releases')
|
||||
.modify(commonQuery, options)
|
||||
.andWhere(builder => whereOr(queryObject, 'releases', builder));
|
||||
const releases = await knex('releases')
|
||||
.modify(commonQuery, options)
|
||||
.andWhere(builder => whereOr(queryObject, 'releases', builder));
|
||||
|
||||
return curateReleases(releases);
|
||||
return curateReleases(releases);
|
||||
}
|
||||
|
||||
async function fetchSiteReleases(queryObject, options = {}) {
|
||||
const releases = await knex('releases')
|
||||
.modify(commonQuery, options)
|
||||
.where(builder => whereOr(queryObject, 'sites', builder));
|
||||
const releases = await knex('releases')
|
||||
.modify(commonQuery, options)
|
||||
.where(builder => whereOr(queryObject, 'sites', builder));
|
||||
|
||||
return curateReleases(releases);
|
||||
return curateReleases(releases);
|
||||
}
|
||||
|
||||
async function fetchNetworkReleases(queryObject, options = {}) {
|
||||
const releases = await knex('releases')
|
||||
.modify(commonQuery, options)
|
||||
.where(builder => whereOr(queryObject, 'networks', builder));
|
||||
const releases = await knex('releases')
|
||||
.modify(commonQuery, options)
|
||||
.where(builder => whereOr(queryObject, 'networks', builder));
|
||||
|
||||
return curateReleases(releases);
|
||||
return curateReleases(releases);
|
||||
}
|
||||
|
||||
async function fetchActorReleases(queryObject, options = {}) {
|
||||
const releases = await knex('actors_associated')
|
||||
.leftJoin('releases', 'actors_associated.release_id', 'releases.id')
|
||||
.leftJoin('actors', 'actors_associated.actor_id', 'actors.id')
|
||||
.select(
|
||||
'actors.name as actor_name',
|
||||
)
|
||||
.modify(commonQuery, options)
|
||||
.where(builder => whereOr(queryObject, 'actors', builder));
|
||||
const releases = await knex('actors_associated')
|
||||
.leftJoin('releases', 'actors_associated.release_id', 'releases.id')
|
||||
.leftJoin('actors', 'actors_associated.actor_id', 'actors.id')
|
||||
.select(
|
||||
'actors.name as actor_name',
|
||||
)
|
||||
.modify(commonQuery, options)
|
||||
.where(builder => whereOr(queryObject, 'actors', builder));
|
||||
|
||||
return curateReleases(releases);
|
||||
return curateReleases(releases);
|
||||
}
|
||||
|
||||
async function fetchTagReleases(queryObject, options = {}) {
|
||||
const releases = await knex('tags_associated')
|
||||
.leftJoin('releases', 'tags_associated.target_id', 'releases.id')
|
||||
.leftJoin('tags', 'tags_associated.tag_id', 'tags.id')
|
||||
.select(
|
||||
'tags.name as tag_name',
|
||||
)
|
||||
.modify(commonQuery, options)
|
||||
.where('tags_associated.domain', 'releases')
|
||||
.where(builder => whereOr(queryObject, 'tags', builder));
|
||||
const releases = await knex('tags_associated')
|
||||
.leftJoin('releases', 'tags_associated.target_id', 'releases.id')
|
||||
.leftJoin('tags', 'tags_associated.tag_id', 'tags.id')
|
||||
.select(
|
||||
'tags.name as tag_name',
|
||||
)
|
||||
.modify(commonQuery, options)
|
||||
.where('tags_associated.domain', 'releases')
|
||||
.where(builder => whereOr(queryObject, 'tags', builder));
|
||||
|
||||
return curateReleases(releases);
|
||||
return curateReleases(releases);
|
||||
}
|
||||
|
||||
function accumulateActors(releases) {
|
||||
return releases.reduce((acc, release) => {
|
||||
if (!Array.isArray(release.actors)) return acc;
|
||||
return releases.reduce((acc, release) => {
|
||||
if (!Array.isArray(release.actors)) return acc;
|
||||
|
||||
release.actors.forEach((actor) => {
|
||||
const actorName = actor.name ? actor.name.trim() : actor.trim();
|
||||
const actorSlug = slugify(actorName);
|
||||
release.actors.forEach((actor) => {
|
||||
const actorName = actor.name ? actor.name.trim() : actor.trim();
|
||||
const actorSlug = slugify(actorName);
|
||||
|
||||
if (!actorSlug) return;
|
||||
if (!actorSlug) return;
|
||||
|
||||
if (!acc[actorSlug]) {
|
||||
acc[actorSlug] = {
|
||||
name: actorName,
|
||||
slug: actorSlug,
|
||||
releaseIds: new Set(),
|
||||
avatars: [],
|
||||
};
|
||||
}
|
||||
if (!acc[actorSlug]) {
|
||||
acc[actorSlug] = {
|
||||
name: actorName,
|
||||
slug: actorSlug,
|
||||
releaseIds: new Set(),
|
||||
avatars: [],
|
||||
};
|
||||
}
|
||||
|
||||
acc[actorSlug].releaseIds.add(release.id);
|
||||
acc[actorSlug].releaseIds.add(release.id);
|
||||
|
||||
if (actor.name) acc[actorSlug] = { ...acc[actorSlug], ...actor }; // actor input contains profile info
|
||||
if (actor.avatar) {
|
||||
const avatar = Array.isArray(actor.avatar)
|
||||
? actor.avatar.map(avatarX => ({
|
||||
src: avatarX.src || avatarX,
|
||||
copyright: avatarX.copyright === undefined ? capitalize(release.site?.network?.name) : avatarX.copyright,
|
||||
}))
|
||||
: {
|
||||
src: actor.avatar.src || actor.avatar,
|
||||
copyright: actor.avatar.copyright === undefined ? capitalize(release.site?.network?.name) : actor.avatar.copyright,
|
||||
};
|
||||
if (actor.name) acc[actorSlug] = { ...acc[actorSlug], ...actor }; // actor input contains profile info
|
||||
if (actor.avatar) {
|
||||
const avatar = Array.isArray(actor.avatar)
|
||||
? actor.avatar.map(avatarX => ({
|
||||
src: avatarX.src || avatarX,
|
||||
copyright: avatarX.copyright === undefined ? capitalize(release.site?.network?.name) : avatarX.copyright,
|
||||
}))
|
||||
: {
|
||||
src: actor.avatar.src || actor.avatar,
|
||||
copyright: actor.avatar.copyright === undefined ? capitalize(release.site?.network?.name) : actor.avatar.copyright,
|
||||
};
|
||||
|
||||
acc[actorSlug].avatars = acc[actorSlug].avatars.concat([avatar]); // don't flatten fallbacks
|
||||
}
|
||||
});
|
||||
acc[actorSlug].avatars = acc[actorSlug].avatars.concat([avatar]); // don't flatten fallbacks
|
||||
}
|
||||
});
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
}
|
||||
|
||||
async function storeReleaseAssets(releases) {
|
||||
if (!argv.media) {
|
||||
return;
|
||||
}
|
||||
if (!argv.media) {
|
||||
return;
|
||||
}
|
||||
|
||||
const releasePostersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.poster] }), {});
|
||||
const releaseCoversById = releases.reduce((acc, release) => ({ ...acc, [release.id]: release.covers }), {});
|
||||
const releaseTrailersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.trailer] }), {});
|
||||
const releaseTeasersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.teaser] }), {});
|
||||
const releasePhotosById = releases.reduce((acc, release) => ({
|
||||
...acc,
|
||||
[release.id]: pluckItems(release.photos),
|
||||
}), {});
|
||||
const releasePostersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.poster] }), {});
|
||||
const releaseCoversById = releases.reduce((acc, release) => ({ ...acc, [release.id]: release.covers }), {});
|
||||
const releaseTrailersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.trailer] }), {});
|
||||
const releaseTeasersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.teaser] }), {});
|
||||
const releasePhotosById = releases.reduce((acc, release) => ({
|
||||
...acc,
|
||||
[release.id]: pluckItems(release.photos),
|
||||
}), {});
|
||||
|
||||
if (argv.images && argv.posters) {
|
||||
const posters = await storeMedia(Object.values(releasePostersById).flat(), 'release', 'poster');
|
||||
if (posters) await associateMedia(releasePostersById, posters, 'release', 'poster');
|
||||
}
|
||||
if (argv.images && argv.posters) {
|
||||
const posters = await storeMedia(Object.values(releasePostersById).flat(), 'release', 'poster');
|
||||
if (posters) await associateMedia(releasePostersById, posters, 'release', 'poster');
|
||||
}
|
||||
|
||||
if (argv.images && argv.covers) {
|
||||
const covers = await storeMedia(Object.values(releaseCoversById).flat(), 'release', 'cover');
|
||||
if (covers) await associateMedia(releaseCoversById, covers, 'release', 'cover');
|
||||
}
|
||||
if (argv.images && argv.covers) {
|
||||
const covers = await storeMedia(Object.values(releaseCoversById).flat(), 'release', 'cover');
|
||||
if (covers) await associateMedia(releaseCoversById, covers, 'release', 'cover');
|
||||
}
|
||||
|
||||
if (argv.images && argv.photos) {
|
||||
const photos = await storeMedia(Object.values(releasePhotosById).flat(), 'release', 'photo');
|
||||
if (photos) await associateMedia(releasePhotosById, photos, 'release', 'photo');
|
||||
}
|
||||
if (argv.images && argv.photos) {
|
||||
const photos = await storeMedia(Object.values(releasePhotosById).flat(), 'release', 'photo');
|
||||
if (photos) await associateMedia(releasePhotosById, photos, 'release', 'photo');
|
||||
}
|
||||
|
||||
if (argv.videos && argv.trailers) {
|
||||
const trailers = await storeMedia(Object.values(releaseTrailersById).flat(), 'release', 'trailer');
|
||||
if (trailers) await associateMedia(releaseTrailersById, trailers, 'release', 'trailer');
|
||||
}
|
||||
if (argv.videos && argv.trailers) {
|
||||
const trailers = await storeMedia(Object.values(releaseTrailersById).flat(), 'release', 'trailer');
|
||||
if (trailers) await associateMedia(releaseTrailersById, trailers, 'release', 'trailer');
|
||||
}
|
||||
|
||||
if (argv.videos && argv.teasers) {
|
||||
const teasers = await storeMedia(Object.values(releaseTeasersById).flat(), 'release', 'teaser');
|
||||
if (teasers) await associateMedia(releaseTeasersById, teasers, 'release', 'teaser');
|
||||
}
|
||||
if (argv.videos && argv.teasers) {
|
||||
const teasers = await storeMedia(Object.values(releaseTeasersById).flat(), 'release', 'teaser');
|
||||
if (teasers) await associateMedia(releaseTeasersById, teasers, 'release', 'teaser');
|
||||
}
|
||||
}
|
||||
|
||||
async function updateReleasesSearch(releaseIds) {
|
||||
logger.info(`Updating search documents for ${releaseIds ? releaseIds.length : 'all' } releases`);
|
||||
logger.info(`Updating search documents for ${releaseIds ? releaseIds.length : 'all' } releases`);
|
||||
|
||||
const documents = await knex.raw(`
|
||||
const documents = await knex.raw(`
|
||||
SELECT
|
||||
releases.id AS release_id,
|
||||
TO_TSVECTOR(
|
||||
@@ -391,117 +391,117 @@ async function updateReleasesSearch(releaseIds) {
|
||||
GROUP BY releases.id, sites.name, sites.slug, sites.alias, sites.url, networks.name, networks.slug, networks.url;
|
||||
`, releaseIds && [releaseIds]);
|
||||
|
||||
if (documents.rows?.length > 0) {
|
||||
const query = knex('releases_search').insert(documents.rows).toString();
|
||||
await knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
|
||||
}
|
||||
if (documents.rows?.length > 0) {
|
||||
const query = knex('releases_search').insert(documents.rows).toString();
|
||||
await knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
|
||||
}
|
||||
}
|
||||
|
||||
async function storeRelease(release, batchId) {
|
||||
if (!release.site) {
|
||||
throw new Error(`Missing site, unable to store "${release.title}" (${release.url})`);
|
||||
}
|
||||
if (!release.site) {
|
||||
throw new Error(`Missing site, unable to store "${release.title}" (${release.url})`);
|
||||
}
|
||||
|
||||
if (!release.entryId) {
|
||||
logger.warn(`Missing entry ID, unable to store "${release.title}" (${release.url})`);
|
||||
return null;
|
||||
}
|
||||
if (!release.entryId) {
|
||||
logger.warn(`Missing entry ID, unable to store "${release.title}" (${release.url})`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const existingRelease = await knex('releases')
|
||||
.where({
|
||||
entry_id: release.entryId,
|
||||
site_id: release.site.id,
|
||||
})
|
||||
.first();
|
||||
const existingRelease = await knex('releases')
|
||||
.where({
|
||||
entry_id: release.entryId,
|
||||
site_id: release.site.id,
|
||||
})
|
||||
.first();
|
||||
|
||||
const curatedRelease = await curateReleaseEntry(release, batchId, existingRelease);
|
||||
const curatedRelease = await curateReleaseEntry(release, batchId, existingRelease);
|
||||
|
||||
if (existingRelease && !argv.redownload) {
|
||||
return existingRelease;
|
||||
}
|
||||
if (existingRelease && !argv.redownload) {
|
||||
return existingRelease;
|
||||
}
|
||||
|
||||
if (existingRelease && argv.redownload) {
|
||||
const [updatedRelease] = await knex('releases')
|
||||
.where('id', existingRelease.id)
|
||||
.update({
|
||||
...existingRelease,
|
||||
...curatedRelease,
|
||||
})
|
||||
.returning('*');
|
||||
if (existingRelease && argv.redownload) {
|
||||
const [updatedRelease] = await knex('releases')
|
||||
.where('id', existingRelease.id)
|
||||
.update({
|
||||
...existingRelease,
|
||||
...curatedRelease,
|
||||
})
|
||||
.returning('*');
|
||||
|
||||
if (updatedRelease) {
|
||||
await associateTags(release, updatedRelease.id);
|
||||
logger.info(`Updated release "${release.title}" (${existingRelease.id}, ${release.site.name})`);
|
||||
}
|
||||
if (updatedRelease) {
|
||||
await associateTags(release, updatedRelease.id);
|
||||
logger.info(`Updated release "${release.title}" (${existingRelease.id}, ${release.site.name})`);
|
||||
}
|
||||
|
||||
await associateTags(release, existingRelease.id);
|
||||
await associateTags(release, existingRelease.id);
|
||||
|
||||
return existingRelease;
|
||||
}
|
||||
return existingRelease;
|
||||
}
|
||||
|
||||
const [releaseEntry] = await knex('releases')
|
||||
.insert(curatedRelease)
|
||||
.returning('*');
|
||||
const [releaseEntry] = await knex('releases')
|
||||
.insert(curatedRelease)
|
||||
.returning('*');
|
||||
|
||||
await associateTags(release, releaseEntry.id);
|
||||
await associateTags(release, releaseEntry.id);
|
||||
|
||||
logger.info(`Stored release "${release.title}" (${releaseEntry.id}, ${release.site.name})`);
|
||||
logger.info(`Stored release "${release.title}" (${releaseEntry.id}, ${release.site.name})`);
|
||||
|
||||
return releaseEntry;
|
||||
return releaseEntry;
|
||||
}
|
||||
|
||||
async function storeReleases(releases) {
|
||||
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
|
||||
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
|
||||
|
||||
const storedReleases = await Promise.map(releases, async (release) => {
|
||||
try {
|
||||
const releaseWithChannelSite = await attachChannelSite(release);
|
||||
const releaseWithStudio = await attachStudio(releaseWithChannelSite);
|
||||
const storedRelease = await storeRelease(releaseWithStudio, batchId);
|
||||
const storedReleases = await Promise.map(releases, async (release) => {
|
||||
try {
|
||||
const releaseWithChannelSite = await attachChannelSite(release);
|
||||
const releaseWithStudio = await attachStudio(releaseWithChannelSite);
|
||||
const storedRelease = await storeRelease(releaseWithStudio, batchId);
|
||||
|
||||
return storedRelease && {
|
||||
id: storedRelease.id,
|
||||
slug: storedRelease.slug,
|
||||
...releaseWithChannelSite,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(error);
|
||||
return storedRelease && {
|
||||
id: storedRelease.id,
|
||||
slug: storedRelease.slug,
|
||||
...releaseWithChannelSite,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(error);
|
||||
|
||||
return null;
|
||||
}
|
||||
}, {
|
||||
concurrency: 10,
|
||||
}).filter(Boolean);
|
||||
return null;
|
||||
}
|
||||
}, {
|
||||
concurrency: 10,
|
||||
}).filter(Boolean);
|
||||
|
||||
logger.info(`Stored ${storedReleases.length} new releases`);
|
||||
logger.info(`Stored ${storedReleases.length} new releases`);
|
||||
|
||||
const actors = accumulateActors(storedReleases);
|
||||
const actors = accumulateActors(storedReleases);
|
||||
|
||||
await associateActors(actors, storedReleases);
|
||||
await associateActors(actors, storedReleases);
|
||||
|
||||
await Promise.all([
|
||||
// actors need to be stored before generating search
|
||||
updateReleasesSearch(storedReleases.map(release => release.id)),
|
||||
storeReleaseAssets(storedReleases),
|
||||
]);
|
||||
await Promise.all([
|
||||
// actors need to be stored before generating search
|
||||
updateReleasesSearch(storedReleases.map(release => release.id)),
|
||||
storeReleaseAssets(storedReleases),
|
||||
]);
|
||||
|
||||
if (argv.withProfiles && Object.keys(actors).length > 0) {
|
||||
await scrapeBasicActors();
|
||||
}
|
||||
if (argv.withProfiles && Object.keys(actors).length > 0) {
|
||||
await scrapeBasicActors();
|
||||
}
|
||||
|
||||
return {
|
||||
releases: storedReleases,
|
||||
actors,
|
||||
};
|
||||
return {
|
||||
releases: storedReleases,
|
||||
actors,
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchReleases,
|
||||
fetchActorReleases,
|
||||
fetchSiteReleases,
|
||||
fetchNetworkReleases,
|
||||
fetchTagReleases,
|
||||
storeRelease,
|
||||
storeReleases,
|
||||
updateReleasesSearch,
|
||||
fetchReleases,
|
||||
fetchActorReleases,
|
||||
fetchSiteReleases,
|
||||
fetchNetworkReleases,
|
||||
fetchTagReleases,
|
||||
storeRelease,
|
||||
storeReleases,
|
||||
updateReleasesSearch,
|
||||
};
|
||||
|
||||
@@ -3,18 +3,18 @@
|
||||
const knex = require('./knex');
|
||||
|
||||
async function fetchReleases(limit = 100) {
|
||||
const releases = await knex('releases').limit(limit);
|
||||
const releases = await knex('releases').limit(limit);
|
||||
|
||||
return releases;
|
||||
return releases;
|
||||
}
|
||||
|
||||
async function searchReleases(query, limit = 100) {
|
||||
const releases = await knex.raw('SELECT * FROM search_releases(?) LIMIT ?;', [query, limit]);
|
||||
const releases = await knex.raw('SELECT * FROM search_releases(?) LIMIT ?;', [query, limit]);
|
||||
|
||||
return releases.rows;
|
||||
return releases.rows;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchReleases,
|
||||
searchReleases,
|
||||
fetchReleases,
|
||||
searchReleases,
|
||||
};
|
||||
|
||||
@@ -1,199 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const Promise = require('bluebird');
|
||||
|
||||
const logger = require('./logger')(__filename);
|
||||
const argv = require('./argv');
|
||||
const include = require('./utils/argv-include')(argv);
|
||||
const knex = require('./knex');
|
||||
const scrapers = require('./scrapers/scrapers');
|
||||
const { findSiteByUrl } = require('./sites');
|
||||
const { findNetworkByUrl } = require('./networks');
|
||||
const { storeReleases } = require('./releases');
|
||||
|
||||
async function findSite(url, release) {
|
||||
if (release?.site) return release.site;
|
||||
if (!url) return null;
|
||||
|
||||
const site = await findSiteByUrl(url);
|
||||
|
||||
if (site) {
|
||||
return site;
|
||||
}
|
||||
|
||||
const network = await findNetworkByUrl(url);
|
||||
|
||||
if (network) {
|
||||
return {
|
||||
...network,
|
||||
network,
|
||||
isFallback: true,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeRelease(source, basicRelease = null, type = 'scene', beforeFetchLatest) {
|
||||
// profile scraper may return either URLs or pre-scraped scenes
|
||||
const sourceIsUrlOrEmpty = typeof source === 'string' || source === undefined;
|
||||
const url = sourceIsUrlOrEmpty ? source : source?.url;
|
||||
const release = sourceIsUrlOrEmpty ? basicRelease : source;
|
||||
|
||||
const site = basicRelease?.site || await findSite(url, release);
|
||||
|
||||
if (!site) {
|
||||
throw new Error(`Could not find site for ${url} in database`);
|
||||
}
|
||||
|
||||
if (!argv.deep && release) {
|
||||
return {
|
||||
...release,
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
||||
|
||||
if (!scraper) {
|
||||
throw new Error(`Could not find scraper for ${url}`);
|
||||
}
|
||||
|
||||
if ((type === 'scene' && !scraper.fetchScene) || (type === 'movie' && !scraper.fetchMovie)) {
|
||||
if (release) {
|
||||
logger.warn(`The '${site.name}'-scraper cannot fetch individual ${type}s`);
|
||||
return null;
|
||||
}
|
||||
|
||||
throw new Error(`The '${site.name}'-scraper cannot fetch individual ${type}s`);
|
||||
}
|
||||
|
||||
if (!release) {
|
||||
logger.info(`Scraping release from ${url}`);
|
||||
}
|
||||
|
||||
const scrapedRelease = type === 'scene'
|
||||
? await scraper.fetchScene(url, site, release, beforeFetchLatest, include)
|
||||
: await scraper.fetchMovie(url, site, release, beforeFetchLatest, include);
|
||||
|
||||
return {
|
||||
...release,
|
||||
...scrapedRelease,
|
||||
...(scrapedRelease && release?.tags && {
|
||||
tags: release.tags.concat(scrapedRelease.tags),
|
||||
}),
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
async function accumulateMovies(releases) {
|
||||
if (!argv.withMovies) return [];
|
||||
|
||||
const moviesByUrl = releases.reduce((acc, release) => {
|
||||
if (!release.movie) return acc;
|
||||
const movie = release.movie.url ? release.movie : { url: release.movie };
|
||||
|
||||
if (!acc[movie.url]) {
|
||||
acc[movie.url] = {
|
||||
...movie,
|
||||
type: 'movie',
|
||||
sceneIds: [],
|
||||
};
|
||||
}
|
||||
|
||||
acc[movie.url].sceneIds = acc[movie.url].sceneIds.concat(release.id);
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
const movies = await Promise.map(Object.values(moviesByUrl), async movie => scrapeRelease(movie, null, 'movie'));
|
||||
const { releases: storedMovies } = await storeReleases(movies);
|
||||
|
||||
const movieAssociations = storedMovies.reduce((acc, movie) => acc.concat(movie.sceneIds.map(sceneId => ({
|
||||
movie_id: movie.id,
|
||||
scene_id: sceneId,
|
||||
}))), []);
|
||||
|
||||
await knex('releases_movies').insert(movieAssociations);
|
||||
|
||||
// console.log(moviesByUrl);
|
||||
return movies;
|
||||
}
|
||||
|
||||
async function scrapeReleases(sources, type = 'scene') {
|
||||
const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, null, type), {
|
||||
concurrency: 5,
|
||||
}).filter(Boolean);
|
||||
|
||||
const curatedReleases = scrapedReleases.map(scrapedRelease => ({ ...scrapedRelease, type }));
|
||||
|
||||
if ((argv.scene || argv.movie) && argv.inspect) {
|
||||
// only show when fetching from URL
|
||||
}
|
||||
|
||||
if (argv.save) {
|
||||
const { releases: storedReleases } = await storeReleases(curatedReleases);
|
||||
|
||||
await accumulateMovies(storedReleases);
|
||||
|
||||
if (storedReleases) {
|
||||
logger.info(storedReleases.map(storedRelease => `\nhttp://${config.web.host}:${config.web.port}/scene/${storedRelease.id}/${storedRelease.slug}`).join(''));
|
||||
}
|
||||
|
||||
return storedReleases;
|
||||
}
|
||||
|
||||
return curatedReleases;
|
||||
}
|
||||
|
||||
async function scrapeScenes(sources) {
|
||||
return scrapeReleases(sources, 'scene');
|
||||
}
|
||||
|
||||
async function scrapeMovies(sources) {
|
||||
return scrapeReleases(sources, 'movie');
|
||||
}
|
||||
|
||||
async function deepFetchReleases(baseReleases, beforeFetchLatest) {
|
||||
const deepReleases = await Promise.map(baseReleases, async (release) => {
|
||||
if (release.url || (release.path && release.site)) {
|
||||
try {
|
||||
const fullRelease = await scrapeRelease(release.url, release, 'scene', beforeFetchLatest);
|
||||
|
||||
if (fullRelease) {
|
||||
return {
|
||||
...release,
|
||||
...fullRelease,
|
||||
deep: true,
|
||||
};
|
||||
}
|
||||
|
||||
logger.warn(`Release scraper returned empty result for ${release.url}`);
|
||||
|
||||
return release;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to scrape ${release.url}: ${error}`);
|
||||
|
||||
return {
|
||||
...release,
|
||||
deep: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return release;
|
||||
}, {
|
||||
concurrency: 2,
|
||||
});
|
||||
|
||||
return deepReleases;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
deepFetchReleases,
|
||||
scrapeMovies,
|
||||
scrapeRelease,
|
||||
scrapeReleases,
|
||||
scrapeScenes,
|
||||
};
|
||||
@@ -1,184 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
const Promise = require('bluebird');
|
||||
const moment = require('moment');
|
||||
|
||||
const argv = require('./argv');
|
||||
const include = require('./utils/argv-include')(argv);
|
||||
const logger = require('./logger')(__filename);
|
||||
const knex = require('./knex');
|
||||
const { fetchIncludedSites } = require('./sites');
|
||||
const scrapers = require('./scrapers/scrapers');
|
||||
const { deepFetchReleases } = require('./scrape-releases');
|
||||
const { storeReleases } = require('./releases');
|
||||
|
||||
function getAfterDate() {
|
||||
if (/\d{2,4}-\d{2}-\d{2,4}/.test(argv.after)) {
|
||||
// using date
|
||||
return moment
|
||||
.utc(argv.after, ['YYYY-MM-DD', 'DD-MM-YYYY'])
|
||||
.toDate();
|
||||
}
|
||||
|
||||
// using time distance (e.g. "1 month")
|
||||
return moment
|
||||
.utc()
|
||||
.subtract(...argv.after.split(' '))
|
||||
.toDate();
|
||||
}
|
||||
|
||||
async function findDuplicateReleaseIds(latestReleases, accReleases) {
|
||||
const duplicateReleases = await knex('releases')
|
||||
.whereIn('entry_id', latestReleases.map(({ entryId }) => entryId));
|
||||
|
||||
// include accumulated releases as duplicates to prevent an infinite
|
||||
// loop when the next page contains the same releases as the previous
|
||||
return new Set(duplicateReleases
|
||||
.map(release => String(release.entry_id))
|
||||
.concat(accReleases.map(release => String(release.entryId))));
|
||||
}
|
||||
|
||||
async function scrapeUniqueReleases(scraper, site, beforeFetchLatest, accSiteReleases, afterDate = getAfterDate(), accReleases = [], page = argv.page) {
|
||||
if (!argv.latest || !scraper.fetchLatest) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const latestReleases = await scraper.fetchLatest(site, page, beforeFetchLatest, accSiteReleases, include);
|
||||
|
||||
if (!Array.isArray(latestReleases)) {
|
||||
logger.warn(`Scraper returned ${latestReleases || 'null'} when fetching latest from '${site.name}' on '${site.network.name}'`);
|
||||
return accReleases;
|
||||
}
|
||||
|
||||
if (latestReleases.length === 0) {
|
||||
return accReleases;
|
||||
}
|
||||
|
||||
const latestReleasesWithSite = latestReleases.map(release => ({ ...release, site }));
|
||||
|
||||
const oldestReleaseOnPage = latestReleases.slice(-1)[0].date;
|
||||
const duplicateReleaseIds = argv.redownload ? new Set() : await findDuplicateReleaseIds(latestReleases, accReleases);
|
||||
|
||||
const uniqueReleases = latestReleasesWithSite
|
||||
.filter(release => !duplicateReleaseIds.has(String(release.entryId)) // release is already in database
|
||||
&& (argv.last || !release.date || moment(release.date).isAfter(afterDate))); // release is older than specified date limit
|
||||
|
||||
logger.verbose(`${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases`);
|
||||
|
||||
if (
|
||||
uniqueReleases.length > 0
|
||||
// && (oldestReleaseOnPage || page < argv.pages)
|
||||
&& ((oldestReleaseOnPage
|
||||
? moment(oldestReleaseOnPage).isAfter(afterDate)
|
||||
: accReleases.length + uniqueReleases.length <= argv.nullDateLimit)
|
||||
|| (argv.last && accReleases.length + uniqueReleases.length < argv.last))
|
||||
) {
|
||||
// oldest release on page is newer that specified date range, or latest count has not yet been met, fetch next page
|
||||
return scrapeUniqueReleases(scraper, site, beforeFetchLatest, accSiteReleases, afterDate, accReleases.concat(uniqueReleases), page + 1);
|
||||
}
|
||||
|
||||
if (argv.last && uniqueReleases.length >= argv.last) {
|
||||
return accReleases.concat(uniqueReleases).slice(0, argv.last);
|
||||
}
|
||||
|
||||
if (oldestReleaseOnPage) {
|
||||
return accReleases.concat(uniqueReleases);
|
||||
}
|
||||
|
||||
return accReleases.concat(uniqueReleases).slice(0, argv.nullDateLimit);
|
||||
}
|
||||
|
||||
async function scrapeUpcomingReleases(scraper, site, beforeFetchLatest) {
|
||||
if (argv.upcoming && scraper.fetchUpcoming) {
|
||||
const upcomingReleases = await scraper.fetchUpcoming(site, 1, beforeFetchLatest, include);
|
||||
|
||||
return upcomingReleases
|
||||
? upcomingReleases.map(release => ({ ...release, site, upcoming: true }))
|
||||
: [];
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function scrapeSiteReleases(scraper, site, accSiteReleases) {
|
||||
const beforeFetchLatest = await scraper.beforeFetchLatest?.(site, accSiteReleases);
|
||||
|
||||
const [newReleases, upcomingReleases] = await Promise.all([
|
||||
scrapeUniqueReleases(scraper, site, beforeFetchLatest, accSiteReleases), // fetch basic release info from scene overview
|
||||
scrapeUpcomingReleases(scraper, site, beforeFetchLatest, accSiteReleases), // fetch basic release info from upcoming overview
|
||||
]);
|
||||
|
||||
if (argv.upcoming) {
|
||||
logger.info(`${site.name}: ${argv.latest ? `Found ${newReleases.length}` : 'Ignoring'} latest releases,${argv.upcoming ? ' ' : ' ignoring '}${upcomingReleases.length || '0'} upcoming releases`);
|
||||
}
|
||||
|
||||
const baseReleases = [...newReleases, ...upcomingReleases];
|
||||
|
||||
if (argv.deep) {
|
||||
// follow URL for every release
|
||||
return deepFetchReleases(baseReleases, beforeFetchLatest);
|
||||
}
|
||||
|
||||
return baseReleases;
|
||||
}
|
||||
|
||||
async function scrapeSite(site, network, accSiteReleases = []) {
|
||||
if (site.parameters?.ignore) {
|
||||
logger.warn(`Ignoring ${network.name}: ${site.name}`);
|
||||
return [];
|
||||
}
|
||||
|
||||
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
||||
|
||||
if (!scraper) {
|
||||
logger.warn(`No scraper found for '${site.name}' (${site.slug})`);
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
const siteReleases = await scrapeSiteReleases(scraper, site, accSiteReleases);
|
||||
|
||||
return siteReleases.map(release => ({ ...release, site }));
|
||||
} catch (error) {
|
||||
logger.error(`${site.name}: Failed to scrape releases: ${error.message}`);
|
||||
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function scrapeSites() {
|
||||
const networks = await fetchIncludedSites();
|
||||
|
||||
const scrapedNetworks = await Promise.map(networks, async (network) => {
|
||||
if (network.parameters?.sequential) {
|
||||
logger.info(`Scraping '${network.name}' sequentially`);
|
||||
|
||||
return Promise.reduce(network.sites, async (acc, site) => {
|
||||
const accSiteReleases = await acc;
|
||||
const siteReleases = await scrapeSite(site, network, accSiteReleases);
|
||||
|
||||
return accSiteReleases.concat(siteReleases);
|
||||
}, Promise.resolve([]));
|
||||
}
|
||||
|
||||
return Promise.map(network.sites, async site => scrapeSite(site, network), {
|
||||
concurrency: network.parameters?.concurrency || 2,
|
||||
});
|
||||
},
|
||||
{
|
||||
// 5 networks at a time
|
||||
concurrency: 5,
|
||||
});
|
||||
|
||||
const releases = scrapedNetworks.flat(2);
|
||||
|
||||
if (argv.inspect) {
|
||||
console.log(releases);
|
||||
}
|
||||
|
||||
if (argv.save) {
|
||||
await storeReleases(releases);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = scrapeSites;
|
||||
@@ -3,8 +3,8 @@
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,37 +3,37 @@
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
function curateRelease(release, site) {
|
||||
if (['bubblegumdungeon', 'ladygonzo'].includes(site.slug)) {
|
||||
return {
|
||||
...release,
|
||||
title: release.title.split(/:|\|/)[1].trim(),
|
||||
};
|
||||
}
|
||||
if (['bubblegumdungeon', 'ladygonzo'].includes(site.slug)) {
|
||||
return {
|
||||
...release,
|
||||
title: release.title.split(/:|\|/)[1].trim(),
|
||||
};
|
||||
}
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function networkFetchScene(url, site, release) {
|
||||
const scene = await fetchScene(url, site, release);
|
||||
const scene = await fetchScene(url, site, release);
|
||||
|
||||
return curateRelease(scene, site);
|
||||
return curateRelease(scene, site);
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const releases = await fetchApiLatest(site, page, false);
|
||||
const releases = await fetchApiLatest(site, page, false);
|
||||
|
||||
return releases.map(release => curateRelease(release, site));
|
||||
return releases.map(release => curateRelease(release, site));
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site, page = 1) {
|
||||
const releases = await fetchApiUpcoming(site, page, false);
|
||||
const releases = await fetchApiUpcoming(site, page, false);
|
||||
|
||||
return releases.map(release => curateRelease(release, site));
|
||||
return releases.map(release => curateRelease(release, site));
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene: networkFetchScene,
|
||||
fetchUpcoming,
|
||||
fetchLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene: networkFetchScene,
|
||||
fetchUpcoming,
|
||||
};
|
||||
|
||||
@@ -3,47 +3,47 @@
|
||||
const { fetchLatest, fetchScene } = require('./julesjordan');
|
||||
|
||||
function extractActors(scene) {
|
||||
const release = scene;
|
||||
const release = scene;
|
||||
|
||||
if (!scene.actors || scene.actors.length === 0) {
|
||||
const introActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (\w+ \w+)/i);
|
||||
const introTwoActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (?:(\w+)|(\w+ \w+)) and (\w+ \w+)/i);
|
||||
const returnActorMatches = scene.title.match(/(?:(^\w+)|(\w+ \w+))(?:,| (?:return|visit|pov|give|suck|lick|milk|love|enjoy|service|is))/i);
|
||||
const returnTwoActorMatches = scene.title.match(/(\w+ \w+) and (?:(\w+)|(\w+ \w+)) (?:return|visit|give|suck|lick|milk|love|enjoy|service|are)/i);
|
||||
if (!scene.actors || scene.actors.length === 0) {
|
||||
const introActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (\w+ \w+)/i);
|
||||
const introTwoActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (?:(\w+)|(\w+ \w+)) and (\w+ \w+)/i);
|
||||
const returnActorMatches = scene.title.match(/(?:(^\w+)|(\w+ \w+))(?:,| (?:return|visit|pov|give|suck|lick|milk|love|enjoy|service|is))/i);
|
||||
const returnTwoActorMatches = scene.title.match(/(\w+ \w+) and (?:(\w+)|(\w+ \w+)) (?:return|visit|give|suck|lick|milk|love|enjoy|service|are)/i);
|
||||
|
||||
const rawActors = (introTwoActorMatches || introActorMatches || returnTwoActorMatches || returnActorMatches)?.slice(1);
|
||||
const actors = rawActors?.filter((actor) => {
|
||||
if (!actor) return false;
|
||||
if (/swallow|\bcum|fuck|suck|give|giving|take|takes|taking|head|teen|babe|cute|beaut|naughty|teacher|nanny|adorable|brunette|blonde|bust|audition|from|\band\b|\bto\b/i.test(actor)) return false;
|
||||
const rawActors = (introTwoActorMatches || introActorMatches || returnTwoActorMatches || returnActorMatches)?.slice(1);
|
||||
const actors = rawActors?.filter((actor) => {
|
||||
if (!actor) return false;
|
||||
if (/swallow|\bcum|fuck|suck|give|giving|take|takes|taking|head|teen|babe|cute|beaut|naughty|teacher|nanny|adorable|brunette|blonde|bust|audition|from|\band\b|\bto\b/i.test(actor)) return false;
|
||||
|
||||
return true;
|
||||
});
|
||||
return true;
|
||||
});
|
||||
|
||||
if (actors) {
|
||||
release.actors = actors;
|
||||
}
|
||||
}
|
||||
if (actors) {
|
||||
release.actors = actors;
|
||||
}
|
||||
}
|
||||
|
||||
if (release.actors?.length > 1 || /threesome|threeway/.test(scene.title)) {
|
||||
release.tags = scene.tags ? [...scene.tags, 'mff'] : ['mff'];
|
||||
}
|
||||
if (release.actors?.length > 1 || /threesome|threeway/.test(scene.title)) {
|
||||
release.tags = scene.tags ? [...scene.tags, 'mff'] : ['mff'];
|
||||
}
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatestWrap(site, page = 1) {
|
||||
const latest = await fetchLatest(site, page);
|
||||
const latest = await fetchLatest(site, page);
|
||||
|
||||
return latest.map(scene => extractActors(scene));
|
||||
return latest.map(scene => extractActors(scene));
|
||||
}
|
||||
|
||||
async function fetchSceneWrap(url, site) {
|
||||
const scene = await fetchScene(url, site);
|
||||
const scene = await fetchScene(url, site);
|
||||
|
||||
return extractActors(scene);
|
||||
return extractActors(scene);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchLatestWrap,
|
||||
fetchScene: fetchSceneWrap,
|
||||
fetchLatest: fetchLatestWrap,
|
||||
fetchScene: fetchSceneWrap,
|
||||
};
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
const { get, geta, ctxa } = require('../utils/q');
|
||||
|
||||
function extractActors(actorString) {
|
||||
return actorString
|
||||
return actorString
|
||||
?.replace(/.*:|\(.*\)|\d+(-|\s)year(-|\s)old|nurses?|tangled/ig, '') // remove Patient:, (date) and other nonsense
|
||||
.split(/\band\b|\bvs\b|\/|,|&/ig)
|
||||
.map(actor => actor.trim())
|
||||
@@ -12,120 +12,120 @@ function extractActors(actorString) {
|
||||
}
|
||||
|
||||
function matchActors(actorString, models) {
|
||||
return models
|
||||
.filter(model => new RegExp(model.name, 'i')
|
||||
.test(actorString));
|
||||
return models
|
||||
.filter(model => new RegExp(model.name, 'i')
|
||||
.test(actorString));
|
||||
}
|
||||
|
||||
function scrapeLatest(scenes, site, models) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
const pathname = qu.url('a.itemimg').slice(1);
|
||||
[release.entryId] = pathname.split('/').slice(-1);
|
||||
release.url = `${site.url}${pathname}`;
|
||||
const pathname = qu.url('a.itemimg').slice(1);
|
||||
[release.entryId] = pathname.split('/').slice(-1);
|
||||
release.url = `${site.url}${pathname}`;
|
||||
|
||||
release.title = qu.q('.itemimg img', 'alt') || qu.q('h4 a', true);
|
||||
release.description = qu.q('.mas_longdescription', true);
|
||||
release.date = qu.date('.movie_info2', 'MM/DD/YY', /\d{2}\/\d{2}\/\d{2}/);
|
||||
release.title = qu.q('.itemimg img', 'alt') || qu.q('h4 a', true);
|
||||
release.description = qu.q('.mas_longdescription', true);
|
||||
release.date = qu.date('.movie_info2', 'MM/DD/YY', /\d{2}\/\d{2}\/\d{2}/);
|
||||
|
||||
const actorString = qu.q('.mas_description', true);
|
||||
const actors = matchActors(actorString, models);
|
||||
if (actors.length > 0) release.actors = actors;
|
||||
else release.actors = extractActors(actorString);
|
||||
const actorString = qu.q('.mas_description', true);
|
||||
const actors = matchActors(actorString, models);
|
||||
if (actors.length > 0) release.actors = actors;
|
||||
else release.actors = extractActors(actorString);
|
||||
|
||||
const posterPath = qu.img('.itemimg img');
|
||||
release.poster = `${site.url}/${posterPath}`;
|
||||
const posterPath = qu.img('.itemimg img');
|
||||
release.poster = `${site.url}/${posterPath}`;
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ html, qu }, url, site, models) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
[release.entryId] = url.split('/').slice(-1);
|
||||
release.title = qu.q('.mas_title', true);
|
||||
release.description = qu.q('.mas_longdescription', true);
|
||||
release.date = qu.date('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
[release.entryId] = url.split('/').slice(-1);
|
||||
release.title = qu.q('.mas_title', true);
|
||||
release.description = qu.q('.mas_longdescription', true);
|
||||
release.date = qu.date('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
|
||||
const actorString = qu.q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, '');
|
||||
const actors = matchActors(actorString, models);
|
||||
if (actors.length > 0) release.actors = actors;
|
||||
else release.actors = extractActors(actorString);
|
||||
const actorString = qu.q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, '');
|
||||
const actors = matchActors(actorString, models);
|
||||
if (actors.length > 0) release.actors = actors;
|
||||
else release.actors = extractActors(actorString);
|
||||
|
||||
release.tags = qu.all('.tags a', true);
|
||||
release.tags = qu.all('.tags a', true);
|
||||
|
||||
release.photos = qu.imgs('.stills img').map(photoPath => `${site.url}/${photoPath}`);
|
||||
release.photos = qu.imgs('.stills img').map(photoPath => `${site.url}/${photoPath}`);
|
||||
|
||||
const posterIndex = 'splash:';
|
||||
const poster = html.slice(html.indexOf('faceimages/', posterIndex), html.indexOf('.jpg', posterIndex) + 4);
|
||||
if (poster) release.poster = `${site.url}/${poster}`;
|
||||
const posterIndex = 'splash:';
|
||||
const poster = html.slice(html.indexOf('faceimages/', posterIndex), html.indexOf('.jpg', posterIndex) + 4);
|
||||
if (poster) release.poster = `${site.url}/${poster}`;
|
||||
|
||||
const trailerIndex = html.indexOf('video/mp4');
|
||||
const trailer = html.slice(html.indexOf('/content', trailerIndex), html.indexOf('.mp4', trailerIndex) + 4);
|
||||
if (trailer) release.trailer = { src: `${site.url}${trailer}` };
|
||||
const trailerIndex = html.indexOf('video/mp4');
|
||||
const trailer = html.slice(html.indexOf('/content', trailerIndex), html.indexOf('.mp4', trailerIndex) + 4);
|
||||
if (trailer) release.trailer = { src: `${site.url}${trailer}` };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function extractModels({ el }, site) {
|
||||
const models = ctxa(el, '.item');
|
||||
const models = ctxa(el, '.item');
|
||||
|
||||
return models.map(({ qu }) => {
|
||||
const actor = { gender: 'female' };
|
||||
return models.map(({ qu }) => {
|
||||
const actor = { gender: 'female' };
|
||||
|
||||
const avatar = qu.q('.itemimg img');
|
||||
actor.avatar = `${site.url}/${avatar.src}`;
|
||||
actor.name = avatar.alt
|
||||
.split(':').slice(-1)[0]
|
||||
.replace(/xtreme girl|nurse/ig, '')
|
||||
.trim();
|
||||
const avatar = qu.q('.itemimg img');
|
||||
actor.avatar = `${site.url}/${avatar.src}`;
|
||||
actor.name = avatar.alt
|
||||
.split(':').slice(-1)[0]
|
||||
.replace(/xtreme girl|nurse/ig, '')
|
||||
.trim();
|
||||
|
||||
const actorPath = qu.url('.itemimg');
|
||||
actor.url = `${site.url}${actorPath.slice(1)}`;
|
||||
const actorPath = qu.url('.itemimg');
|
||||
actor.url = `${site.url}${actorPath.slice(1)}`;
|
||||
|
||||
return actor;
|
||||
});
|
||||
return actor;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchModels(site, page = 1, accModels = []) {
|
||||
const url = `${site.url}/?models/${page}`;
|
||||
const res = await get(url);
|
||||
const url = `${site.url}/?models/${page}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (res.ok) {
|
||||
const models = extractModels(res.item, site);
|
||||
const nextPage = res.item.qa('.pagenumbers', true)
|
||||
.map(pageX => Number(pageX))
|
||||
.filter(Boolean) // remove << and >>
|
||||
.includes(page + 1);
|
||||
if (res.ok) {
|
||||
const models = extractModels(res.item, site);
|
||||
const nextPage = res.item.qa('.pagenumbers', true)
|
||||
.map(pageX => Number(pageX))
|
||||
.filter(Boolean) // remove << and >>
|
||||
.includes(page + 1);
|
||||
|
||||
if (nextPage) {
|
||||
return fetchModels(site, page + 1, accModels.concat(models));
|
||||
}
|
||||
if (nextPage) {
|
||||
return fetchModels(site, page + 1, accModels.concat(models));
|
||||
}
|
||||
|
||||
return accModels.concat(models, { name: 'Dr. Gray' });
|
||||
}
|
||||
return accModels.concat(models, { name: 'Dr. Gray' });
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1, models) {
|
||||
const url = `${site.url}/show.php?a=${site.parameters.a}_${page}`;
|
||||
const res = await geta(url, '.item');
|
||||
const url = `${site.url}/show.php?a=${site.parameters.a}_${page}`;
|
||||
const res = await geta(url, '.item');
|
||||
|
||||
return res.ok ? scrapeLatest(res.items, site, models) : res.status;
|
||||
return res.ok ? scrapeLatest(res.items, site, models) : res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, release, beforeFetchLatest) {
|
||||
const models = beforeFetchLatest || await fetchModels(site);
|
||||
const res = await get(url);
|
||||
const models = beforeFetchLatest || await fetchModels(site);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, site, models) : res.status;
|
||||
return res.ok ? scrapeScene(res.item, url, site, models) : res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
beforeFetchLatest: fetchModels,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
beforeFetchLatest: fetchModels,
|
||||
};
|
||||
|
||||
@@ -5,141 +5,141 @@ const { get, getAll, initAll, extractDate } = require('../utils/qu');
|
||||
const { feetInchesToCm } = require('../utils/convert');
|
||||
|
||||
function getFallbacks(source) {
|
||||
return [
|
||||
source.replace('-1x.jpg', '-4x.jpg'),
|
||||
source.replace('-1x.jpg', '-3x.jpg'),
|
||||
source.replace('-1x.jpg', '-2x.jpg'),
|
||||
source,
|
||||
];
|
||||
return [
|
||||
source.replace('-1x.jpg', '-4x.jpg'),
|
||||
source.replace('-1x.jpg', '-3x.jpg'),
|
||||
source.replace('-1x.jpg', '-2x.jpg'),
|
||||
source,
|
||||
];
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
|
||||
release.url = qu.url('a');
|
||||
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
|
||||
release.url = qu.url('a');
|
||||
|
||||
release.title = qu.q('h5 a', true);
|
||||
release.date = qu.date('.icon-calendar + strong', 'MM/DD/YYYY');
|
||||
release.title = qu.q('h5 a', true);
|
||||
release.date = qu.date('.icon-calendar + strong', 'MM/DD/YYYY');
|
||||
|
||||
release.actors = qu.q('h3', true).replace(/featuring:\s?/i, '').split(', ');
|
||||
release.actors = qu.q('h3', true).replace(/featuring:\s?/i, '').split(', ');
|
||||
|
||||
const photoCount = qu.q('.stdimage', 'cnt');
|
||||
[release.poster, ...release.photos] = Array.from({ length: Number(photoCount) }, (value, index) => {
|
||||
const source = qu.img('.stdimage', `src${index}_1x`, site.url);
|
||||
const photoCount = qu.q('.stdimage', 'cnt');
|
||||
[release.poster, ...release.photos] = Array.from({ length: Number(photoCount) }, (value, index) => {
|
||||
const source = qu.img('.stdimage', `src${index}_1x`, site.url);
|
||||
|
||||
return getFallbacks(source);
|
||||
});
|
||||
return getFallbacks(source);
|
||||
});
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ html, qu }, url) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
|
||||
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
|
||||
|
||||
release.title = qu.q('h2', true);
|
||||
release.description = qu.q('p', true);
|
||||
release.title = qu.q('h2', true);
|
||||
release.description = qu.q('p', true);
|
||||
|
||||
release.date = extractDate(html, 'MM/DD/YYYY', /\b\d{2}\/\d{2}\/\d{4}\b/);
|
||||
release.date = extractDate(html, 'MM/DD/YYYY', /\b\d{2}\/\d{2}\/\d{4}\b/);
|
||||
|
||||
release.actors = qu.all('h5:not(.video_categories) a').map(actor => ({
|
||||
name: qu.q(actor, null, true),
|
||||
url: qu.url(actor, null),
|
||||
}));
|
||||
release.actors = qu.all('h5:not(.video_categories) a').map(actor => ({
|
||||
name: qu.q(actor, null, true),
|
||||
url: qu.url(actor, null),
|
||||
}));
|
||||
|
||||
release.tags = qu.all('.video_categories a', true);
|
||||
release.tags = qu.all('.video_categories a', true);
|
||||
|
||||
release.duration = qu.dur('.video_categories + p');
|
||||
release.duration = qu.dur('.video_categories + p');
|
||||
|
||||
const poster = qu.img('a img');
|
||||
const poster = qu.img('a img');
|
||||
|
||||
release.poster = getFallbacks(poster);
|
||||
release.photos = qu.imgs('.featured-video img', 'src0_1x').map(source => getFallbacks(source));
|
||||
release.poster = getFallbacks(poster);
|
||||
release.photos = qu.imgs('.featured-video img', 'src0_1x').map(source => getFallbacks(source));
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ el, qu }) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const bio = Array.from(qu.q('.widget-content').childNodes).reduce((acc, node, index, nodes) => {
|
||||
const nextNode = nodes[index + 1];
|
||||
const bio = Array.from(qu.q('.widget-content').childNodes).reduce((acc, node, index, nodes) => {
|
||||
const nextNode = nodes[index + 1];
|
||||
|
||||
if (node.tagName === 'STRONG' && nextNode?.nodeType === 3) {
|
||||
acc[slugify(node.textContent, '_')] = nextNode.textContent.trim();
|
||||
}
|
||||
if (node.tagName === 'STRONG' && nextNode?.nodeType === 3) {
|
||||
acc[slugify(node.textContent, '_')] = nextNode.textContent.trim();
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
if (bio.age) profile.age = Number(bio.age);
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
if (bio.age) profile.age = Number(bio.age);
|
||||
|
||||
if (bio.height && /\d{3}/.test(bio.height)) profile.height = Number(bio.height.match(/\d+/)[0]);
|
||||
if (bio.height && /\d[;']\d/.test(bio.height)) profile.height = feetInchesToCm(bio.height);
|
||||
if (bio.height && /\d{3}/.test(bio.height)) profile.height = Number(bio.height.match(/\d+/)[0]);
|
||||
if (bio.height && /\d[;']\d/.test(bio.height)) profile.height = feetInchesToCm(bio.height);
|
||||
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
|
||||
if (bust && /\d+[a-zA-Z]+/.test(bust)) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
if (bust && /\d+[a-zA-Z]+/.test(bust)) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (bio.bust_size && !profile.bust) profile.bust = bio.bust_size.toUpperCase();
|
||||
if (bio.bust_size && !profile.bust) profile.bust = bio.bust_size.toUpperCase();
|
||||
|
||||
if (bio.birth_location) profile.birthPlace = bio.birth_location;
|
||||
if (bio.status_married_or_single) profile.relationship = bio.status_married_or_single;
|
||||
if (bio.birth_location) profile.birthPlace = bio.birth_location;
|
||||
if (bio.status_married_or_single) profile.relationship = bio.status_married_or_single;
|
||||
|
||||
if (bio.eye_color) profile.eyes = bio.eye_color;
|
||||
if (bio.eye_color) profile.eyes = bio.eye_color;
|
||||
|
||||
const avatar = qu.img('.tac img');
|
||||
profile.avatar = getFallbacks(avatar);
|
||||
const avatar = qu.img('.tac img');
|
||||
profile.avatar = getFallbacks(avatar);
|
||||
|
||||
profile.releases = scrapeAll(initAll(el, '.featured-video'));
|
||||
profile.releases = scrapeAll(initAll(el, '.featured-video'));
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page) {
|
||||
const url = `${site.url}/tour/categories/movies_${page}_d.html`;
|
||||
const res = await getAll(url, '.featured-video');
|
||||
const url = `${site.url}/tour/categories/movies_${page}_d.html`;
|
||||
const res = await getAll(url, '.featured-video');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, site);
|
||||
}
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await get(url, '.page-content .row');
|
||||
const res = await get(url, '.page-content .row');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, site);
|
||||
}
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug, site) {
|
||||
const actorSlug = slugify(actorName, '');
|
||||
const url = `${site.url}/tour/models/${actorSlug}.html`;
|
||||
const res = await get(url, '.page-content .row');
|
||||
const actorSlug = slugify(actorName, '');
|
||||
const url = `${site.url}/tour/models/${actorSlug}.html`;
|
||||
const res = await get(url, '.page-content .row');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item);
|
||||
}
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'babes');
|
||||
return fetchProfile(actorName, 'babes');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -6,144 +6,144 @@ const slugify = require('../utils/slugify');
|
||||
const { feetInchesToCm } = require('../utils/convert');
|
||||
|
||||
function scrapeAll(scenes, site) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = qu.q('h3 a', true);
|
||||
release.url = qu.url('h3 a');
|
||||
release.title = qu.q('h3 a', true);
|
||||
release.url = qu.url('h3 a');
|
||||
|
||||
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.duration = qu.dur('.item-meta li:nth-child(2)');
|
||||
release.description = qu.q('.description', true);
|
||||
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.duration = qu.dur('.item-meta li:nth-child(2)');
|
||||
release.description = qu.q('.description', true);
|
||||
|
||||
release.actors = qu.all('a[href*="/models"]', true);
|
||||
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
release.actors = qu.all('a[href*="/models"]', true);
|
||||
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
|
||||
[release.poster, ...release.photos] = qu.all('.item-thumbs img')
|
||||
.map(source => [
|
||||
source.getAttribute('src0_3x'),
|
||||
source.getAttribute('src0_2x'),
|
||||
source.getAttribute('src0_1x'),
|
||||
]
|
||||
.filter(Boolean)
|
||||
.map(fallback => (/^http/.test(fallback) ? fallback : `${site.url}${fallback}`)));
|
||||
[release.poster, ...release.photos] = qu.all('.item-thumbs img')
|
||||
.map(source => [
|
||||
source.getAttribute('src0_3x'),
|
||||
source.getAttribute('src0_2x'),
|
||||
source.getAttribute('src0_1x'),
|
||||
]
|
||||
.filter(Boolean)
|
||||
.map(fallback => (/^http/.test(fallback) ? fallback : `${site.url}${fallback}`)));
|
||||
|
||||
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
||||
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ html, qu }, url, site) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
release.title = qu.q('.item-episode h4 a', true);
|
||||
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.duration = qu.dur('.item-meta li:nth-child(2)');
|
||||
release.description = qu.q('.description', true);
|
||||
release.title = qu.q('.item-episode h4 a', true);
|
||||
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.duration = qu.dur('.item-meta li:nth-child(2)');
|
||||
release.description = qu.q('.description', true);
|
||||
|
||||
release.actors = qu.all('.item-episode a[href*="/models"]', true);
|
||||
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
release.actors = qu.all('.item-episode a[href*="/models"]', true);
|
||||
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
|
||||
const posterPath = html.match(/poster="(.*.jpg)"/)?.[1];
|
||||
const trailerPath = html.match(/video src="(.*.mp4)"/)?.[1];
|
||||
const posterPath = html.match(/poster="(.*.jpg)"/)?.[1];
|
||||
const trailerPath = html.match(/video src="(.*.mp4)"/)?.[1];
|
||||
|
||||
if (posterPath) {
|
||||
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-3x'),
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
}
|
||||
if (posterPath) {
|
||||
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-3x'),
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
}
|
||||
|
||||
if (trailerPath) {
|
||||
const trailer = /^http/.test(trailerPath) ? trailerPath : `${site.url}${trailerPath}`;
|
||||
release.trailer = { src: trailer };
|
||||
}
|
||||
if (trailerPath) {
|
||||
const trailer = /^http/.test(trailerPath) ? trailerPath : `${site.url}${trailerPath}`;
|
||||
release.trailer = { src: trailer };
|
||||
}
|
||||
|
||||
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
||||
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchActorReleases(actorId, site, page = 1, accScenes = []) {
|
||||
const url = `${site.url}/sets.php?id=${actorId}&page=${page}`;
|
||||
const res = await get(url);
|
||||
const url = `${site.url}/sets.php?id=${actorId}&page=${page}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (!res.ok) return [];
|
||||
if (!res.ok) return [];
|
||||
|
||||
const quReleases = initAll(res.item.el, '.item-episode');
|
||||
const releases = scrapeAll(quReleases, site);
|
||||
const quReleases = initAll(res.item.el, '.item-episode');
|
||||
const releases = scrapeAll(quReleases, site);
|
||||
|
||||
const nextPage = res.item.qu.q(`a[href*="page=${page + 1}"]`);
|
||||
const nextPage = res.item.qu.q(`a[href*="page=${page + 1}"]`);
|
||||
|
||||
if (nextPage) {
|
||||
return fetchActorReleases(actorId, site, page + 1, accScenes.concat(releases));
|
||||
}
|
||||
if (nextPage) {
|
||||
return fetchActorReleases(actorId, site, page + 1, accScenes.concat(releases));
|
||||
}
|
||||
|
||||
return accScenes.concat(releases);
|
||||
return accScenes.concat(releases);
|
||||
}
|
||||
|
||||
async function scrapeProfile({ qu }, site, withScenes) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const bio = qu.all('.stats li', true).reduce((acc, row) => {
|
||||
const [key, value] = row.split(':');
|
||||
return { ...acc, [slugify(key, '_')]: value.trim() };
|
||||
}, {});
|
||||
const bio = qu.all('.stats li', true).reduce((acc, row) => {
|
||||
const [key, value] = row.split(':');
|
||||
return { ...acc, [slugify(key, '_')]: value.trim() };
|
||||
}, {});
|
||||
|
||||
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
profile.avatar = [
|
||||
qu.q('.profile-pic img', 'src0_3x'),
|
||||
qu.q('.profile-pic img', 'src0_2x'),
|
||||
qu.q('.profile-pic img', 'src0_1x'),
|
||||
].filter(Boolean).map(source => (/^http/.test(source) ? source : `${site.url}${source}`));
|
||||
profile.avatar = [
|
||||
qu.q('.profile-pic img', 'src0_3x'),
|
||||
qu.q('.profile-pic img', 'src0_2x'),
|
||||
qu.q('.profile-pic img', 'src0_1x'),
|
||||
].filter(Boolean).map(source => (/^http/.test(source) ? source : `${site.url}${source}`));
|
||||
|
||||
if (withScenes) {
|
||||
const actorId = qu.q('.profile-pic img', 'id')?.match(/set-target-(\d+)/)?.[1];
|
||||
if (withScenes) {
|
||||
const actorId = qu.q('.profile-pic img', 'id')?.match(/set-target-(\d+)/)?.[1];
|
||||
|
||||
if (actorId) {
|
||||
profile.releases = await fetchActorReleases(actorId, site);
|
||||
}
|
||||
}
|
||||
if (actorId) {
|
||||
profile.releases = await fetchActorReleases(actorId, site);
|
||||
}
|
||||
}
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/categories/movies/${page}/latest/`;
|
||||
const res = await geta(url, '.item-episode');
|
||||
const url = `${site.url}/categories/movies/${page}/latest/`;
|
||||
const res = await geta(url, '.item-episode');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug, site, include) {
|
||||
const actorSlugA = slugify(actorName, '');
|
||||
const actorSlugB = slugify(actorName);
|
||||
const actorSlugA = slugify(actorName, '');
|
||||
const actorSlugB = slugify(actorName);
|
||||
|
||||
const resA = await get(`${site.url}/models/${actorSlugA}.html`);
|
||||
const res = resA.ok ? resA : await get(`${site.url}/models/${actorSlugB}.html`);
|
||||
const resA = await get(`${site.url}/models/${actorSlugA}.html`);
|
||||
const res = resA.ok ? resA : await get(`${site.url}/models/${actorSlugB}.html`);
|
||||
|
||||
return res.ok ? scrapeProfile(res.item, site, include.scenes) : res.status;
|
||||
return res.ok ? scrapeProfile(res.item, site, include.scenes) : res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -8,99 +8,99 @@ const clusterId = '617fb597b659459bafe6472470d9073a';
|
||||
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
|
||||
|
||||
const genderMap = {
|
||||
M: 'male',
|
||||
F: 'female',
|
||||
M: 'male',
|
||||
F: 'female',
|
||||
};
|
||||
|
||||
function getScreenUrl(item, scene) {
|
||||
return `https://i.bang.com/screenshots/${scene.dvd.id}/movie/${scene.order}/${item.screenId}.jpg`;
|
||||
return `https://i.bang.com/screenshots/${scene.dvd.id}/movie/${scene.order}/${item.screenId}.jpg`;
|
||||
}
|
||||
|
||||
function encodeId(id) {
|
||||
return Buffer
|
||||
.from(id, 'hex')
|
||||
.toString('base64')
|
||||
.replace(/\+/g, '-')
|
||||
.replace(/\//g, '_')
|
||||
.replace(/=/g, ',');
|
||||
return Buffer
|
||||
.from(id, 'hex')
|
||||
.toString('base64')
|
||||
.replace(/\+/g, '-')
|
||||
.replace(/\//g, '_')
|
||||
.replace(/=/g, ',');
|
||||
}
|
||||
|
||||
function decodeId(id) {
|
||||
const restoredId = id
|
||||
.replace(/-/g, '+')
|
||||
.replace(/_/g, '/')
|
||||
.replace(/,/g, '=');
|
||||
const restoredId = id
|
||||
.replace(/-/g, '+')
|
||||
.replace(/_/g, '/')
|
||||
.replace(/,/g, '=');
|
||||
|
||||
return Buffer
|
||||
.from(restoredId, 'base64')
|
||||
.toString('hex');
|
||||
return Buffer
|
||||
.from(restoredId, 'base64')
|
||||
.toString('hex');
|
||||
}
|
||||
|
||||
function scrapeScene(scene, site) {
|
||||
const release = {
|
||||
site,
|
||||
entryId: scene.id,
|
||||
title: scene.name,
|
||||
description: scene.description,
|
||||
tags: scene.genres.concat(scene.actions).map(genre => genre.name),
|
||||
duration: scene.duration,
|
||||
};
|
||||
const release = {
|
||||
site,
|
||||
entryId: scene.id,
|
||||
title: scene.name,
|
||||
description: scene.description,
|
||||
tags: scene.genres.concat(scene.actions).map(genre => genre.name),
|
||||
duration: scene.duration,
|
||||
};
|
||||
|
||||
const slug = slugify(release.title);
|
||||
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
|
||||
const slug = slugify(release.title);
|
||||
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
|
||||
|
||||
const date = new Date(scene.releaseDate);
|
||||
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
|
||||
const date = new Date(scene.releaseDate);
|
||||
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
|
||||
|
||||
release.actors = scene.actors.map(actor => ({ name: actor.name, gender: genderMap[actor.gender] }));
|
||||
release.actors = scene.actors.map(actor => ({ name: actor.name, gender: genderMap[actor.gender] }));
|
||||
|
||||
if (scene.is4k) release.tags.push('4k');
|
||||
if (scene.gay) release.tags.push('gay');
|
||||
if (scene.is4k) release.tags.push('4k');
|
||||
if (scene.gay) release.tags.push('gay');
|
||||
|
||||
const defaultPoster = scene.screenshots.find(photo => photo.default === true);
|
||||
const photoset = scene.screenshots.filter(photo => photo.default === false);
|
||||
const defaultPoster = scene.screenshots.find(photo => photo.default === true);
|
||||
const photoset = scene.screenshots.filter(photo => photo.default === false);
|
||||
|
||||
const photos = defaultPoster ? photoset : photoset.slice(1);
|
||||
const poster = defaultPoster || photoset[0];
|
||||
const photos = defaultPoster ? photoset : photoset.slice(1);
|
||||
const poster = defaultPoster || photoset[0];
|
||||
|
||||
release.poster = getScreenUrl(poster, scene);
|
||||
release.photos = photos.map(photo => getScreenUrl(photo, scene));
|
||||
release.poster = getScreenUrl(poster, scene);
|
||||
release.photos = photos.map(photo => getScreenUrl(photo, scene));
|
||||
|
||||
release.trailer = {
|
||||
src: `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`,
|
||||
};
|
||||
release.trailer = {
|
||||
src: `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`,
|
||||
};
|
||||
|
||||
release.channel = scene.series.name
|
||||
.replace(/[! .]/g, '')
|
||||
.replace('&', 'and');
|
||||
release.channel = scene.series.name
|
||||
.replace(/[! .]/g, '')
|
||||
.replace('&', 'and');
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeLatest(scenes, site) {
|
||||
return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
|
||||
return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
from: (page - 1) * 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: {
|
||||
releaseDate: {
|
||||
lte: 'now',
|
||||
},
|
||||
},
|
||||
},
|
||||
/*
|
||||
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
from: (page - 1) * 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: {
|
||||
releaseDate: {
|
||||
lte: 'now',
|
||||
},
|
||||
},
|
||||
},
|
||||
/*
|
||||
* global fetch
|
||||
{
|
||||
nested: {
|
||||
@@ -122,66 +122,66 @@ async function fetchLatest(site, page = 1) {
|
||||
},
|
||||
},
|
||||
*/
|
||||
{
|
||||
nested: {
|
||||
path: 'series',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'series.id': {
|
||||
operator: 'AND',
|
||||
query: site.parameters.siteId,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
{
|
||||
nested: {
|
||||
path: 'series',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'series.id': {
|
||||
operator: 'AND',
|
||||
query: site.parameters.siteId,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeLatest(res.body.hits.hits, site);
|
||||
return scrapeLatest(res.body.hits.hits, site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const encodedId = new URL(url).pathname.split('/')[2];
|
||||
const entryId = decodeId(encodedId);
|
||||
const encodedId = new URL(url).pathname.split('/')[2];
|
||||
const entryId = decodeId(encodedId);
|
||||
|
||||
const res = await bhttp.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
const res = await bhttp.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
|
||||
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -10,44 +10,44 @@ const slugify = require('../utils/slugify');
|
||||
const { ex } = require('../utils/q');
|
||||
|
||||
function scrape(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.echThumb').toArray();
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.echThumb').toArray();
|
||||
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.thmb_lnk');
|
||||
const title = sceneLinkElement.attr('title');
|
||||
const url = `https://bangbros.com${sceneLinkElement.attr('href')}`;
|
||||
const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
|
||||
const entryId = url.split('/')[3].slice(5);
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.thmb_lnk');
|
||||
const title = sceneLinkElement.attr('title');
|
||||
const url = `https://bangbros.com${sceneLinkElement.attr('href')}`;
|
||||
const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
|
||||
const entryId = url.split('/')[3].slice(5);
|
||||
|
||||
const date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
|
||||
const actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
|
||||
const date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
|
||||
const actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
|
||||
|
||||
const photoElement = $(element).find('.rollover-image');
|
||||
const poster = `https:${photoElement.attr('data-original')}`;
|
||||
const photoElement = $(element).find('.rollover-image');
|
||||
const poster = `https:${photoElement.attr('data-original')}`;
|
||||
|
||||
const photosUrl = photoElement.attr('data-rollover-url');
|
||||
const photosMaxIndex = photoElement.attr('data-rollover-max-index');
|
||||
const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
|
||||
const photosUrl = photoElement.attr('data-rollover-url');
|
||||
const photosMaxIndex = photoElement.attr('data-rollover-max-index');
|
||||
const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
|
||||
|
||||
const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
|
||||
const channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
|
||||
const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
|
||||
const channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
shootId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
poster,
|
||||
photos,
|
||||
rating: null,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
});
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
shootId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
poster,
|
||||
photos,
|
||||
rating: null,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/* no dates available, breaks database
|
||||
@@ -80,63 +80,63 @@ function scrapeUpcoming(html, site) {
|
||||
*/
|
||||
|
||||
function scrapeScene(html, url, _site) {
|
||||
const { qu } = ex(html, '.playerSection');
|
||||
const release = {};
|
||||
const { qu } = ex(html, '.playerSection');
|
||||
const release = {};
|
||||
|
||||
[release.shootId] = qu.q('.vdoTags + .vdoCast', true).match(/\w+$/);
|
||||
[release.entryId] = url.split('/')[3].match(/\d+$/);
|
||||
release.title = qu.q('.ps-vdoHdd h1', true);
|
||||
release.description = qu.q('.vdoDesc', true);
|
||||
[release.shootId] = qu.q('.vdoTags + .vdoCast', true).match(/\w+$/);
|
||||
[release.entryId] = url.split('/')[3].match(/\d+$/);
|
||||
release.title = qu.q('.ps-vdoHdd h1', true);
|
||||
release.description = qu.q('.vdoDesc', true);
|
||||
|
||||
release.actors = qu.all('a[href*="/model"]', true);
|
||||
release.tags = qu.all('.vdoTags a', true);
|
||||
release.actors = qu.all('a[href*="/model"]', true);
|
||||
release.tags = qu.all('.vdoTags a', true);
|
||||
|
||||
release.stars = Number(qu.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
|
||||
release.stars = Number(qu.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
|
||||
|
||||
const poster = qu.img('img#player-overlay-image');
|
||||
release.poster = [
|
||||
poster,
|
||||
poster.replace('/big_trailer', '/members/450x340'), // load error fallback
|
||||
];
|
||||
const poster = qu.img('img#player-overlay-image');
|
||||
release.poster = [
|
||||
poster,
|
||||
poster.replace('/big_trailer', '/members/450x340'), // load error fallback
|
||||
];
|
||||
|
||||
release.trailer = { src: qu.trailer() };
|
||||
release.trailer = { src: qu.trailer() };
|
||||
|
||||
// all scenes seem to have 12 album photos available, not always included on the page
|
||||
const firstPhotoUrl = ex(html).qu.img('img[data-slider-index="1"]');
|
||||
release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
|
||||
// all scenes seem to have 12 album photos available, not always included on the page
|
||||
const firstPhotoUrl = ex(html).qu.img('img[data-slider-index="1"]');
|
||||
release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
|
||||
|
||||
const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/);
|
||||
const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/);
|
||||
|
||||
if (channel === 'bangcasting') release.channel = 'bangbroscasting';
|
||||
if (channel === 'remaster') release.channel = 'bangbrosremastered';
|
||||
else release.channel = channel;
|
||||
if (channel === 'bangcasting') release.channel = 'bangbroscasting';
|
||||
if (channel === 'remaster') release.channel = 'bangbrosremastered';
|
||||
else release.channel = channel;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile(html) {
|
||||
const { q } = ex(html);
|
||||
const profile = {};
|
||||
const { q } = ex(html);
|
||||
const profile = {};
|
||||
|
||||
const avatar = q('.profilePic img', 'src');
|
||||
if (avatar) profile.avatar = `https:${avatar}`;
|
||||
const avatar = q('.profilePic img', 'src');
|
||||
if (avatar) profile.avatar = `https:${avatar}`;
|
||||
|
||||
profile.releases = scrape(html);
|
||||
profile.releases = scrape(html);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeProfileSearch(html, actorName) {
|
||||
const { qu } = ex(html);
|
||||
const actorLink = qu.url(`a[title="${actorName}" i][href*="model"]`);
|
||||
const { qu } = ex(html);
|
||||
const actorLink = qu.url(`a[title="${actorName}" i][href*="model"]`);
|
||||
|
||||
return actorLink ? `https://bangbros.com${actorLink}` : null;
|
||||
return actorLink ? `https://bangbros.com${actorLink}` : null;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/${page}`);
|
||||
const res = await bhttp.get(`${site.url}/${page}`);
|
||||
|
||||
return scrape(res.body.toString(), site);
|
||||
return scrape(res.body.toString(), site);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -148,43 +148,43 @@ async function fetchUpcoming(site) {
|
||||
*/
|
||||
|
||||
async function fetchScene(url, site, release) {
|
||||
if (!release?.date) {
|
||||
logger.warn(`Scraping Bang Bros scene from URL without release date: ${url}`);
|
||||
}
|
||||
if (!release?.date) {
|
||||
logger.warn(`Scraping Bang Bros scene from URL without release date: ${url}`);
|
||||
}
|
||||
|
||||
const { origin } = new URL(url);
|
||||
const res = await bhttp.get(url);
|
||||
const { origin } = new URL(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
|
||||
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
|
||||
}
|
||||
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
|
||||
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
|
||||
}
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const url = `https://bangbros.com/search/${actorSlug}`;
|
||||
const res = await bhttp.get(url);
|
||||
const actorSlug = slugify(actorName);
|
||||
const url = `https://bangbros.com/search/${actorSlug}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const actorUrl = scrapeProfileSearch(res.body.toString(), actorName);
|
||||
if (res.statusCode === 200) {
|
||||
const actorUrl = scrapeProfileSearch(res.body.toString(), actorName);
|
||||
|
||||
if (actorUrl) {
|
||||
const actorRes = await bhttp.get(actorUrl);
|
||||
if (actorUrl) {
|
||||
const actorRes = await bhttp.get(actorUrl);
|
||||
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
// fetchUpcoming, no dates available
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
// fetchUpcoming, no dates available
|
||||
};
|
||||
|
||||
@@ -5,33 +5,33 @@
|
||||
const { fetchScene, fetchLatest, fetchUpcoming, fetchProfile } = require('./gamma');
|
||||
|
||||
async function fetchSceneWrapper(url, site, baseRelease) {
|
||||
const release = await fetchScene(url, site, baseRelease);
|
||||
const release = await fetchScene(url, site, baseRelease);
|
||||
|
||||
if (site.isFallback && release.channel) {
|
||||
const channelUrl = url.replace('blowpass.com', `${release.channel}.com`);
|
||||
if (site.isNetwork && release.channel) {
|
||||
const channelUrl = url.replace('blowpass.com', `${release.channel}.com`);
|
||||
|
||||
if (['onlyteenblowjobs', 'mommyblowsbest'].includes(release.channel)) {
|
||||
release.url = channelUrl.replace(/video\/\w+\//, 'scene/');
|
||||
return release;
|
||||
}
|
||||
if (['onlyteenblowjobs', 'mommyblowsbest'].includes(release.channel)) {
|
||||
release.url = channelUrl.replace(/video\/\w+\//, 'scene/');
|
||||
return release;
|
||||
}
|
||||
|
||||
release.url = channelUrl.replace(/video\/\w+\//, 'video/');
|
||||
}
|
||||
release.url = channelUrl.replace(/video\/\w+\//, 'video/');
|
||||
}
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function getActorReleasesUrl(actorPath, page = 1) {
|
||||
return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`;
|
||||
return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`;
|
||||
}
|
||||
|
||||
async function networkFetchProfile(actorName, scraperSlug, site, include) {
|
||||
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
|
||||
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchUpcoming,
|
||||
fetchScene: fetchSceneWrapper,
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchUpcoming,
|
||||
fetchScene: fetchSceneWrapper,
|
||||
};
|
||||
|
||||
@@ -5,90 +5,90 @@ const bhttp = require('bhttp');
|
||||
const { ex } = require('../utils/q');
|
||||
|
||||
function scrapeProfile(html) {
|
||||
const { qu } = ex(html); /* eslint-disable-line object-curly-newline */
|
||||
const profile = {};
|
||||
const { qu } = ex(html); /* eslint-disable-line object-curly-newline */
|
||||
const profile = {};
|
||||
|
||||
const bio = qu.all('.infobox tr[valign="top"]')
|
||||
.map(detail => qu.all(detail, 'td', true))
|
||||
.reduce((acc, [key, value]) => ({ ...acc, [key.slice(0, -1).replace(/[\s+|/]/g, '_')]: value }), {});
|
||||
const bio = qu.all('.infobox tr[valign="top"]')
|
||||
.map(detail => qu.all(detail, 'td', true))
|
||||
.reduce((acc, [key, value]) => ({ ...acc, [key.slice(0, -1).replace(/[\s+|/]/g, '_')]: value }), {});
|
||||
|
||||
|
||||
/* unreliable, see: Syren De Mer
|
||||
/* unreliable, see: Syren De Mer
|
||||
const catlinks = qa('#mw-normal-catlinks a', true);
|
||||
const isTrans = catlinks.some(link => link.match(/shemale|transgender/i));
|
||||
profile.gender = isTrans ? 'transsexual' : 'female';
|
||||
*/
|
||||
|
||||
profile.birthdate = qu.date('.bday', 'YYYY-MM-DD');
|
||||
profile.birthdate = qu.date('.bday', 'YYYY-MM-DD');
|
||||
|
||||
profile.description = qu.q('#mw-content-text > p', true);
|
||||
profile.description = qu.q('#mw-content-text > p', true);
|
||||
|
||||
if (bio.Born) profile.birthPlace = bio.Born.slice(bio.Born.lastIndexOf(')') + 1);
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
if (bio.Born) profile.birthPlace = bio.Born.slice(bio.Born.lastIndexOf(')') + 1);
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
|
||||
if (bio.Measurements) {
|
||||
const measurements = bio.Measurements
|
||||
.match(/\d+(\w+)?-\d+-\d+/g)
|
||||
if (bio.Measurements) {
|
||||
const measurements = bio.Measurements
|
||||
.match(/\d+(\w+)?-\d+-\d+/g)
|
||||
?.slice(-1)[0] // allow for both '34C-25-36' and '86-64-94 cm / 34-25-37 in'
|
||||
.split('-');
|
||||
|
||||
// account for measuemrents being just e.g. '32EE'
|
||||
if (measurements) {
|
||||
const [bust, waist, hip] = measurements;
|
||||
// account for measuemrents being just e.g. '32EE'
|
||||
if (measurements) {
|
||||
const [bust, waist, hip] = measurements;
|
||||
|
||||
if (/[a-zA-Z]/.test(bust)) profile.bust = bust; // only use bust if cup size is included
|
||||
if (/[a-zA-Z]/.test(bust)) profile.bust = bust; // only use bust if cup size is included
|
||||
|
||||
profile.waist = Number(waist);
|
||||
profile.hip = Number(hip);
|
||||
}
|
||||
profile.waist = Number(waist);
|
||||
profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (/^\d+\w+$/.test(bio.Measurements)) profile.bust = bio.Measurements;
|
||||
}
|
||||
if (/^\d+\w+$/.test(bio.Measurements)) profile.bust = bio.Measurements;
|
||||
}
|
||||
|
||||
if (bio.Bra_cup_size) {
|
||||
const bust = bio.Bra_cup_size.match(/^\d+\w+/);
|
||||
if (bust) [profile.bust] = bust;
|
||||
}
|
||||
if (bio.Bra_cup_size) {
|
||||
const bust = bio.Bra_cup_size.match(/^\d+\w+/);
|
||||
if (bust) [profile.bust] = bust;
|
||||
}
|
||||
|
||||
if (bio.Boobs === 'Enhanced') profile.naturalBoobs = false;
|
||||
if (bio.Boobs === 'Natural') profile.naturalBoobs = true;
|
||||
if (bio.Boobs === 'Enhanced') profile.naturalBoobs = false;
|
||||
if (bio.Boobs === 'Natural') profile.naturalBoobs = true;
|
||||
|
||||
if (bio.Height) profile.height = Number(bio.Height.match(/\d+\.\d+/g).slice(-1)[0]) * 100;
|
||||
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\d+/g)[1]);
|
||||
if (bio.Height) profile.height = Number(bio.Height.match(/\d+\.\d+/g).slice(-1)[0]) * 100;
|
||||
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\d+/g)[1]);
|
||||
|
||||
if (bio.Eye_color) profile.eyes = bio.Eye_color;
|
||||
if (bio.Hair) [profile.hair] = bio.Hair.split(',');
|
||||
if (bio.Eye_color) profile.eyes = bio.Eye_color;
|
||||
if (bio.Hair) [profile.hair] = bio.Hair.split(',');
|
||||
|
||||
if (bio.Blood_group) profile.blood = bio.Blood_group;
|
||||
if (bio.Also_known_as) profile.aliases = bio.Also_known_as.split(', ');
|
||||
if (bio.Blood_group) profile.blood = bio.Blood_group;
|
||||
if (bio.Also_known_as) profile.aliases = bio.Also_known_as.split(', ');
|
||||
|
||||
const avatarThumbPath = qu.img('.image img');
|
||||
const avatarThumbPath = qu.img('.image img');
|
||||
|
||||
if (avatarThumbPath && !/NoImageAvailable/.test(avatarThumbPath)) {
|
||||
const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', '');
|
||||
if (avatarThumbPath && !/NoImageAvailable/.test(avatarThumbPath)) {
|
||||
const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', '');
|
||||
|
||||
profile.avatar = {
|
||||
src: `http://www.boobpedia.com${avatarPath}`,
|
||||
copyright: null,
|
||||
};
|
||||
}
|
||||
profile.avatar = {
|
||||
src: `http://www.boobpedia.com${avatarPath}`,
|
||||
copyright: null,
|
||||
};
|
||||
}
|
||||
|
||||
profile.social = qu.urls('.infobox a.external');
|
||||
profile.social = qu.urls('.infobox a.external');
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = actorName.replace(/\s+/, '_');
|
||||
const res = await bhttp.get(`http://www.boobpedia.com/boobs/${actorSlug}`);
|
||||
const actorSlug = actorName.replace(/\s+/, '_');
|
||||
const res = await bhttp.get(`http://www.boobpedia.com/boobs/${actorSlug}`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString());
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString());
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -11,216 +11,216 @@ const slugify = require('../utils/slugify');
|
||||
const { heightToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
const hairMap = {
|
||||
Blonde: 'blonde',
|
||||
Brunette: 'brown',
|
||||
'Black Hair': 'black',
|
||||
Redhead: 'red',
|
||||
Blonde: 'blonde',
|
||||
Brunette: 'brown',
|
||||
'Black Hair': 'black',
|
||||
Redhead: 'red',
|
||||
};
|
||||
|
||||
function scrapeAll(html, site, upcoming) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.release-card.scene').toArray();
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.release-card.scene').toArray();
|
||||
|
||||
return sceneElements.reduce((acc, element) => {
|
||||
const isUpcoming = $(element).find('.icon-upcoming.active').length === 1;
|
||||
return sceneElements.reduce((acc, element) => {
|
||||
const isUpcoming = $(element).find('.icon-upcoming.active').length === 1;
|
||||
|
||||
if ((upcoming && !isUpcoming) || (!upcoming && isUpcoming)) {
|
||||
return acc;
|
||||
}
|
||||
if ((upcoming && !isUpcoming) || (!upcoming && isUpcoming)) {
|
||||
return acc;
|
||||
}
|
||||
|
||||
const sceneLinkElement = $(element).find('a');
|
||||
const sceneLinkElement = $(element).find('a');
|
||||
|
||||
const url = `https://www.brazzers.com${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title');
|
||||
const entryId = url.split('/').slice(-3, -2)[0];
|
||||
const url = `https://www.brazzers.com${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title');
|
||||
const entryId = url.split('/').slice(-3, -2)[0];
|
||||
|
||||
const date = moment.utc($(element).find('time').text(), 'MMMM DD, YYYY').toDate();
|
||||
const actors = $(element).find('.model-names a').map((actorIndex, actorElement) => $(actorElement).attr('title')).toArray();
|
||||
const date = moment.utc($(element).find('time').text(), 'MMMM DD, YYYY').toDate();
|
||||
const actors = $(element).find('.model-names a').map((actorIndex, actorElement) => $(actorElement).attr('title')).toArray();
|
||||
|
||||
const likes = Number($(element).find('.label-rating .like-amount').text());
|
||||
const dislikes = Number($(element).find('.label-rating .dislike-amount').text());
|
||||
const likes = Number($(element).find('.label-rating .like-amount').text());
|
||||
const dislikes = Number($(element).find('.label-rating .dislike-amount').text());
|
||||
|
||||
const poster = `https:${$(element).find('.card-main-img').attr('data-src')}`;
|
||||
const photos = $(element).find('.card-overlay .image-under').map((photoIndex, photoElement) => `https:${$(photoElement).attr('data-src')}`).toArray();
|
||||
const poster = `https:${$(element).find('.card-main-img').attr('data-src')}`;
|
||||
const photos = $(element).find('.card-overlay .image-under').map((photoIndex, photoElement) => `https:${$(photoElement).attr('data-src')}`).toArray();
|
||||
|
||||
const channel = slugify($(element).find('.collection').attr('title'), '');
|
||||
const channel = slugify($(element).find('.collection').attr('title'), '');
|
||||
|
||||
return acc.concat({
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
likes,
|
||||
dislikes,
|
||||
},
|
||||
channel,
|
||||
site,
|
||||
});
|
||||
}, []);
|
||||
return acc.concat({
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
likes,
|
||||
dislikes,
|
||||
},
|
||||
channel,
|
||||
site,
|
||||
});
|
||||
}, []);
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, _site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const release = {};
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const release = {};
|
||||
|
||||
const videoJson = $('script:contains("window.videoUiOptions")').html();
|
||||
const videoString = videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('},') + 1);
|
||||
const videoData = JSON.parse(videoString);
|
||||
const videoJson = $('script:contains("window.videoUiOptions")').html();
|
||||
const videoString = videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('},') + 1);
|
||||
const videoData = JSON.parse(videoString);
|
||||
|
||||
[release.entryId] = url.split('/').slice(-3, -2);
|
||||
release.title = $('.scene-title[itemprop="name"]').text();
|
||||
[release.entryId] = url.split('/').slice(-3, -2);
|
||||
release.title = $('.scene-title[itemprop="name"]').text();
|
||||
|
||||
release.description = $('#scene-description p[itemprop="description"]')
|
||||
.contents()
|
||||
.first()
|
||||
.text()
|
||||
.trim();
|
||||
release.description = $('#scene-description p[itemprop="description"]')
|
||||
.contents()
|
||||
.first()
|
||||
.text()
|
||||
.trim();
|
||||
|
||||
release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
|
||||
release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
|
||||
release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
|
||||
release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
|
||||
|
||||
const actorsFromCards = $('.featured-model .card-image a').map((actorIndex, actorElement) => {
|
||||
const avatar = `https:${$(actorElement).find('img').attr('data-src')}`;
|
||||
const actorsFromCards = $('.featured-model .card-image a').map((actorIndex, actorElement) => {
|
||||
const avatar = `https:${$(actorElement).find('img').attr('data-src')}`;
|
||||
|
||||
return {
|
||||
name: $(actorElement).attr('title'),
|
||||
avatar: [avatar.replace('medium.jpg', 'large.jpg'), avatar],
|
||||
};
|
||||
}).toArray();
|
||||
return {
|
||||
name: $(actorElement).attr('title'),
|
||||
avatar: [avatar.replace('medium.jpg', 'large.jpg'), avatar],
|
||||
};
|
||||
}).toArray();
|
||||
|
||||
release.actors = actorsFromCards || $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
release.actors = actorsFromCards || $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
|
||||
release.likes = Number($('.label-rating .like').text());
|
||||
release.dislikes = Number($('.label-rating .dislike').text());
|
||||
release.likes = Number($('.label-rating .like').text());
|
||||
release.dislikes = Number($('.label-rating .dislike').text());
|
||||
|
||||
const siteElement = $('.niche-site-logo');
|
||||
// const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
|
||||
const siteName = siteElement.attr('title');
|
||||
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
const siteElement = $('.niche-site-logo');
|
||||
// const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
|
||||
const siteName = siteElement.attr('title');
|
||||
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
|
||||
release.tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
release.photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
|
||||
release.tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
release.photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
|
||||
|
||||
const posterPath = videoData?.poster || $('meta[itemprop="thumbnailUrl"]').attr('content') || $('#trailer-player-container').attr('data-player-img');
|
||||
if (posterPath) release.poster = `https:${posterPath}`;
|
||||
const posterPath = videoData?.poster || $('meta[itemprop="thumbnailUrl"]').attr('content') || $('#trailer-player-container').attr('data-player-img');
|
||||
if (posterPath) release.poster = `https:${posterPath}`;
|
||||
|
||||
if (videoData) {
|
||||
release.trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
|
||||
src: `https:${path}`,
|
||||
quality: Number(quality.match(/\d{3,}/)[0]),
|
||||
}));
|
||||
}
|
||||
if (videoData) {
|
||||
release.trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
|
||||
src: `https:${path}`,
|
||||
quality: Number(quality.match(/\d{3,}/)[0]),
|
||||
}));
|
||||
}
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeActorSearch(html, url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const actorLink = document.querySelector(`a[title="${actorName}" i]`);
|
||||
const { document } = new JSDOM(html).window;
|
||||
const actorLink = document.querySelector(`a[title="${actorName}" i]`);
|
||||
|
||||
return actorLink ? actorLink.href : null;
|
||||
return actorLink ? actorLink.href : null;
|
||||
}
|
||||
|
||||
async function fetchActorReleases({ qu, html }, accReleases = []) {
|
||||
const releases = scrapeAll(html);
|
||||
const next = qu.url('.pagination .next a');
|
||||
const releases = scrapeAll(html);
|
||||
const next = qu.url('.pagination .next a');
|
||||
|
||||
if (next) {
|
||||
const url = `https://www.brazzers.com${next}`;
|
||||
const res = await get(url);
|
||||
if (next) {
|
||||
const url = `https://www.brazzers.com${next}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return fetchActorReleases(res.item, accReleases.concat(releases));
|
||||
}
|
||||
}
|
||||
if (res.ok) {
|
||||
return fetchActorReleases(res.item, accReleases.concat(releases));
|
||||
}
|
||||
}
|
||||
|
||||
return accReleases.concat(releases);
|
||||
return accReleases.concat(releases);
|
||||
}
|
||||
|
||||
async function scrapeProfile(html, url, actorName) {
|
||||
const qProfile = ex(html);
|
||||
const { q, qa } = qProfile;
|
||||
const qProfile = ex(html);
|
||||
const { q, qa } = qProfile;
|
||||
|
||||
const bioKeys = qa('.profile-spec-list label', true).map(key => key.replace(/\n+|\s{2,}/g, '').trim());
|
||||
const bioValues = qa('.profile-spec-list var', true).map(value => value.replace(/\n+|\s{2,}/g, '').trim());
|
||||
const bioKeys = qa('.profile-spec-list label', true).map(key => key.replace(/\n+|\s{2,}/g, '').trim());
|
||||
const bioValues = qa('.profile-spec-list var', true).map(value => value.replace(/\n+|\s{2,}/g, '').trim());
|
||||
|
||||
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
|
||||
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
profile.description = q('.model-profile-specs p', true);
|
||||
profile.description = q('.model-profile-specs p', true);
|
||||
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
if (bio.Measurements && bio.Measurements.match(/\d+[A-Z]+-\d+-\d+/)) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = moment.utc(bio['Date of Birth'], 'MMMM DD, YYYY').toDate();
|
||||
if (bio['Birth Location']) profile.birthPlace = bio['Birth Location'];
|
||||
if (bio['Pussy Type']) profile.pussy = bio['Pussy Type'].split(',').slice(-1)[0].toLowerCase();
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
if (bio.Measurements && bio.Measurements.match(/\d+[A-Z]+-\d+-\d+/)) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = moment.utc(bio['Date of Birth'], 'MMMM DD, YYYY').toDate();
|
||||
if (bio['Birth Location']) profile.birthPlace = bio['Birth Location'];
|
||||
if (bio['Pussy Type']) profile.pussy = bio['Pussy Type'].split(',').slice(-1)[0].toLowerCase();
|
||||
|
||||
if (bio.Height) profile.height = heightToCm(bio.Height);
|
||||
if (bio.Weight) profile.weight = lbsToKg(bio.Weight.match(/\d+/)[0]);
|
||||
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
|
||||
if (bio.Height) profile.height = heightToCm(bio.Height);
|
||||
if (bio.Weight) profile.weight = lbsToKg(bio.Weight.match(/\d+/)[0]);
|
||||
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
|
||||
|
||||
if (bio['Tits Type'] && bio['Tits Type'].match('Natural')) profile.naturalBoobs = true;
|
||||
if (bio['Tits Type'] && bio['Tits Type'].match('Enhanced')) profile.naturalBoobs = false;
|
||||
if (bio['Tits Type'] && bio['Tits Type'].match('Natural')) profile.naturalBoobs = true;
|
||||
if (bio['Tits Type'] && bio['Tits Type'].match('Enhanced')) profile.naturalBoobs = false;
|
||||
|
||||
if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true;
|
||||
if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true;
|
||||
if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true;
|
||||
if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true;
|
||||
|
||||
const avatarEl = q('.big-pic-model-container img');
|
||||
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
|
||||
const avatarEl = q('.big-pic-model-container img');
|
||||
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
|
||||
|
||||
profile.releases = await fetchActorReleases(qProfile);
|
||||
profile.releases = await fetchActorReleases(qProfile);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/page/${page}/`);
|
||||
const res = await bhttp.get(`${site.url}/page/${page}/`);
|
||||
|
||||
return scrapeAll(res.body.toString(), site, false);
|
||||
return scrapeAll(res.body.toString(), site, false);
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
const res = await bhttp.get(`${site.url}/`);
|
||||
const res = await bhttp.get(`${site.url}/`);
|
||||
|
||||
return scrapeAll(res.body.toString(), site, true);
|
||||
return scrapeAll(res.body.toString(), site, true);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const searchUrl = 'https://brazzers.com/pornstars-search/';
|
||||
const searchRes = await bhttp.get(searchUrl, {
|
||||
headers: {
|
||||
Cookie: `textSearch=${encodeURIComponent(actorName)};`,
|
||||
},
|
||||
});
|
||||
const searchUrl = 'https://brazzers.com/pornstars-search/';
|
||||
const searchRes = await bhttp.get(searchUrl, {
|
||||
headers: {
|
||||
Cookie: `textSearch=${encodeURIComponent(actorName)};`,
|
||||
},
|
||||
});
|
||||
|
||||
const actorLink = scrapeActorSearch(searchRes.body.toString(), searchUrl, actorName);
|
||||
const actorLink = scrapeActorSearch(searchRes.body.toString(), searchUrl, actorName);
|
||||
|
||||
if (actorLink) {
|
||||
const url = `https://brazzers.com${actorLink}`;
|
||||
const res = await bhttp.get(url);
|
||||
if (actorLink) {
|
||||
const url = `https://brazzers.com${actorLink}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeProfile(res.body.toString(), url, actorName);
|
||||
}
|
||||
return scrapeProfile(res.body.toString(), url, actorName);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
};
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
};
|
||||
|
||||
@@ -4,139 +4,139 @@ const { get, geta, ctxa, ed } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(scenes, site) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const url = qu.url('.text-thumb a');
|
||||
const { pathname } = new URL(url);
|
||||
const channelUrl = qu.url('.badge');
|
||||
return scenes.map(({ qu }) => {
|
||||
const url = qu.url('.text-thumb a');
|
||||
const { pathname } = new URL(url);
|
||||
const channelUrl = qu.url('.badge');
|
||||
|
||||
if (site?.parameters?.extract && qu.q('.badge', true) !== site.name) {
|
||||
return null;
|
||||
}
|
||||
if (site?.parameters?.extract && qu.q('.badge', true) !== site.name) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const release = {};
|
||||
const release = {};
|
||||
|
||||
release.url = channelUrl ? `${channelUrl}${pathname}` : url;
|
||||
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
|
||||
release.title = qu.q('.text-thumb a', true);
|
||||
release.url = channelUrl ? `${channelUrl}${pathname}` : url;
|
||||
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
|
||||
release.title = qu.q('.text-thumb a', true);
|
||||
|
||||
release.date = qu.date('.date', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
||||
release.duration = qu.dur('.date', /(\d{2}:)?\d{2}:\d{2}/);
|
||||
release.date = qu.date('.date', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
||||
release.duration = qu.dur('.date', /(\d{2}:)?\d{2}:\d{2}/);
|
||||
|
||||
release.actors = qu.all('.category a', true);
|
||||
release.actors = qu.all('.category a', true);
|
||||
|
||||
release.poster = qu.img('img.video_placeholder, .video-images img');
|
||||
release.teaser = { src: qu.trailer() };
|
||||
release.poster = qu.img('img.video_placeholder, .video-images img');
|
||||
release.teaser = { src: qu.trailer() };
|
||||
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
function scrapeScene({ q, qd, qa }, url, _site, baseRelease) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
const { pathname } = new URL(url);
|
||||
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
|
||||
const { pathname } = new URL(url);
|
||||
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
|
||||
|
||||
release.title = q('.trailer-block_title', true);
|
||||
release.description = q('.info-block:nth-child(3) .text', true);
|
||||
release.date = qd('.info-block_data .text', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.title = q('.trailer-block_title', true);
|
||||
release.description = q('.info-block:nth-child(3) .text', true);
|
||||
release.date = qd('.info-block_data .text', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
|
||||
const duration = baseRelease?.duration || Number(q('.info-block_data .text', true).match(/(\d+)\s+min/)?.[1]) * 60;
|
||||
if (duration) release.duration = duration;
|
||||
const duration = baseRelease?.duration || Number(q('.info-block_data .text', true).match(/(\d+)\s+min/)?.[1]) * 60;
|
||||
if (duration) release.duration = duration;
|
||||
|
||||
release.actors = qa('.info-block_data a[href*="/models"]', true);
|
||||
release.tags = qa('.info-block a[href*="/categories"]', true);
|
||||
release.actors = qa('.info-block_data a[href*="/models"]', true);
|
||||
release.tags = qa('.info-block a[href*="/categories"]', true);
|
||||
|
||||
const posterEl = q('.update_thumb');
|
||||
const poster = posterEl.getAttribute('src0_3x') || posterEl.getAttribute('src0_2x') || posterEl.dataset.src;
|
||||
const posterEl = q('.update_thumb');
|
||||
const poster = posterEl.getAttribute('src0_3x') || posterEl.getAttribute('src0_2x') || posterEl.dataset.src;
|
||||
|
||||
if (poster && baseRelease?.poster) release.photos = [poster];
|
||||
else if (poster) release.poster = poster;
|
||||
if (poster && baseRelease?.poster) release.photos = [poster];
|
||||
else if (poster) release.poster = poster;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ q, qa, qtx }) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const keys = qa('.model-descr_line:not(.model-descr_rait) p.text span', true);
|
||||
const values = qa('.model-descr_line:not(.model-descr_rait) p.text').map(el => qtx(el));
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
|
||||
const keys = qa('.model-descr_line:not(.model-descr_rait) p.text span', true);
|
||||
const values = qa('.model-descr_line:not(.model-descr_rait) p.text').map(el => qtx(el));
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
|
||||
|
||||
if (bio.height) profile.height = Number(bio.height.match(/\((\d+)cm\)/)[1]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/\((\d+)kg\)/)[1]);
|
||||
if (bio.race) profile.ethnicity = bio.race;
|
||||
if (bio.height) profile.height = Number(bio.height.match(/\((\d+)cm\)/)[1]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/\((\d+)kg\)/)[1]);
|
||||
if (bio.race) profile.ethnicity = bio.race;
|
||||
|
||||
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
|
||||
if (bio.birthplace) profile.birthPlace = bio.birthplace;
|
||||
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
|
||||
if (bio.birthplace) profile.birthPlace = bio.birthplace;
|
||||
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
if (!/\?/.test(bust)) profile.bust = bust;
|
||||
if (!/\?/.test(waist)) profile.waist = waist;
|
||||
if (!/\?/.test(hip)) profile.hip = hip;
|
||||
}
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
if (!/\?/.test(bust)) profile.bust = bust;
|
||||
if (!/\?/.test(waist)) profile.waist = waist;
|
||||
if (!/\?/.test(hip)) profile.hip = hip;
|
||||
}
|
||||
|
||||
if (bio.hair) profile.hair = bio.hair;
|
||||
if (bio.eyes) profile.eyes = bio.eyes;
|
||||
if (bio.hair) profile.hair = bio.hair;
|
||||
if (bio.eyes) profile.eyes = bio.eyes;
|
||||
|
||||
if (/various/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
else if (/none/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||
else if (bio.tattoos) {
|
||||
profile.hasTattoos = true;
|
||||
profile.tattoos = bio.tattoos;
|
||||
}
|
||||
if (/various/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
else if (/none/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||
else if (bio.tattoos) {
|
||||
profile.hasTattoos = true;
|
||||
profile.tattoos = bio.tattoos;
|
||||
}
|
||||
|
||||
if (/various/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
else if (/none/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||
else if (bio.piercings) {
|
||||
profile.hasPiercings = true;
|
||||
profile.piercings = bio.piercings;
|
||||
}
|
||||
if (/various/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
else if (/none/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||
else if (bio.piercings) {
|
||||
profile.hasPiercings = true;
|
||||
profile.piercings = bio.piercings;
|
||||
}
|
||||
|
||||
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
|
||||
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
|
||||
|
||||
const avatar = q('.model-img img');
|
||||
profile.avatar = avatar.getAttribute('src0_3x') || avatar.getAttribute('src0_2x') || avatar.dataset.src;
|
||||
const avatar = q('.model-img img');
|
||||
profile.avatar = avatar.getAttribute('src0_3x') || avatar.getAttribute('src0_2x') || avatar.dataset.src;
|
||||
|
||||
const releases = qa('.video-thumb');
|
||||
profile.releases = scrapeAll(ctxa(releases));
|
||||
const releases = qa('.video-thumb');
|
||||
profile.releases = scrapeAll(ctxa(releases));
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = site.parameters?.extract
|
||||
? `https://cherrypimps.com/categories/movies_${page}.html`
|
||||
: `${site.url}/categories/movies_${page}.html`;
|
||||
const res = await geta(url, 'div.video-thumb');
|
||||
const url = site.parameters?.extract
|
||||
? `https://cherrypimps.com/categories/movies_${page}.html`
|
||||
: `${site.url}/categories/movies_${page}.html`;
|
||||
const res = await geta(url, 'div.video-thumb');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, release) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, site, release) : res.status;
|
||||
return res.ok ? scrapeScene(res.item, url, site, release) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const actorSlug2 = slugify(actorName, '');
|
||||
const actorSlug = slugify(actorName);
|
||||
const actorSlug2 = slugify(actorName, '');
|
||||
|
||||
const [url, url2] = ['cherrypimps', 'wildoncam'].includes(scraperSlug)
|
||||
? [`https://${scraperSlug}.com/models/${actorSlug}.html`, `https://${scraperSlug}.com/models/${actorSlug2}.html`]
|
||||
: [`https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug}.html`, `https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug2}.html`];
|
||||
const [url, url2] = ['cherrypimps', 'wildoncam'].includes(scraperSlug)
|
||||
? [`https://${scraperSlug}.com/models/${actorSlug}.html`, `https://${scraperSlug}.com/models/${actorSlug2}.html`]
|
||||
: [`https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug}.html`, `https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug2}.html`];
|
||||
|
||||
const res = await get(url);
|
||||
if (res.ok) return scrapeProfile(res.item);
|
||||
const res = await get(url);
|
||||
if (res.ok) return scrapeProfile(res.item);
|
||||
|
||||
const res2 = await get(url2);
|
||||
return res2.ok ? scrapeProfile(res2.item) : res2.status;
|
||||
const res2 = await get(url2);
|
||||
return res2.ok ? scrapeProfile(res2.item) : res2.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -7,182 +7,182 @@ const slugify = require('../utils/slugify');
|
||||
|
||||
/* eslint-disable newline-per-chained-call */
|
||||
function scrapeAll(html, site, origin) {
|
||||
return exa(html, '.card.m-1:not(.pornstar-card)').map(({ q, qa, qd }) => {
|
||||
const release = {};
|
||||
return exa(html, '.card.m-1:not(.pornstar-card)').map(({ q, qa, qd }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = q('a', 'title');
|
||||
release.url = `${site?.url || origin || 'https://ddfnetwork.com'}${q('a', 'href')}`;
|
||||
[release.entryId] = release.url.split('/').slice(-1);
|
||||
release.title = q('a', 'title');
|
||||
release.url = `${site?.url || origin || 'https://ddfnetwork.com'}${q('a', 'href')}`;
|
||||
[release.entryId] = release.url.split('/').slice(-1);
|
||||
|
||||
release.date = qd('small[datetime]', 'YYYY-MM-DD HH:mm:ss', null, 'datetime');
|
||||
release.actors = qa('.card-subtitle a', true).filter(Boolean);
|
||||
release.date = qd('small[datetime]', 'YYYY-MM-DD HH:mm:ss', null, 'datetime');
|
||||
release.actors = qa('.card-subtitle a', true).filter(Boolean);
|
||||
|
||||
const duration = parseInt(q('.card-info div:nth-child(2) .card-text', true), 10) * 60;
|
||||
if (duration) release.duration = duration;
|
||||
const duration = parseInt(q('.card-info div:nth-child(2) .card-text', true), 10) * 60;
|
||||
if (duration) release.duration = duration;
|
||||
|
||||
release.poster = q('img').dataset.src;
|
||||
release.poster = q('img').dataset.src;
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, _site) {
|
||||
const { qu } = ex(html);
|
||||
const release = {};
|
||||
const { qu } = ex(html);
|
||||
const release = {};
|
||||
|
||||
[release.entryId] = url.split('/').slice(-1);
|
||||
[release.entryId] = url.split('/').slice(-1);
|
||||
|
||||
release.title = qu.meta('itemprop=name');
|
||||
release.description = qu.q('.descr-box p', true);
|
||||
release.date = qu.date('meta[itemprop=uploadDate]', 'YYYY-MM-DD', null, 'content')
|
||||
release.title = qu.meta('itemprop=name');
|
||||
release.description = qu.q('.descr-box p', true);
|
||||
release.date = qu.date('meta[itemprop=uploadDate]', 'YYYY-MM-DD', null, 'content')
|
||||
|| qu.date('.title-border:nth-child(2) p', 'MM.DD.YYYY');
|
||||
|
||||
release.actors = qu.all('.pornstar-card > a', 'title');
|
||||
release.tags = qu.all('.tags-tab .tags a', true);
|
||||
release.actors = qu.all('.pornstar-card > a', 'title');
|
||||
release.tags = qu.all('.tags-tab .tags a', true);
|
||||
|
||||
release.duration = parseInt(qu.q('.icon-video-red + span', true), 10) * 60;
|
||||
release.likes = Number(qu.q('.icon-like-red + span', true));
|
||||
release.duration = parseInt(qu.q('.icon-video-red + span', true), 10) * 60;
|
||||
release.likes = Number(qu.q('.icon-like-red + span', true));
|
||||
|
||||
release.poster = qu.poster();
|
||||
release.photos = qu.urls('.photo-slider-guest .card a');
|
||||
release.poster = qu.poster();
|
||||
release.photos = qu.urls('.photo-slider-guest .card a');
|
||||
|
||||
release.trailer = qu.all('source[type="video/mp4"]').map(trailer => ({
|
||||
src: trailer.src,
|
||||
quality: Number(trailer.attributes.res.value),
|
||||
}));
|
||||
release.trailer = qu.all('source[type="video/mp4"]').map(trailer => ({
|
||||
src: trailer.src,
|
||||
quality: Number(trailer.attributes.res.value),
|
||||
}));
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchActorReleases(urls) {
|
||||
// DDF Network and DDF Network Stream list all scenes, exclude
|
||||
const sources = urls.filter(url => !/ddfnetwork/.test(url));
|
||||
// DDF Network and DDF Network Stream list all scenes, exclude
|
||||
const sources = urls.filter(url => !/ddfnetwork/.test(url));
|
||||
|
||||
const releases = await Promise.all(sources.map(async (url) => {
|
||||
const { html } = await get(url);
|
||||
const releases = await Promise.all(sources.map(async (url) => {
|
||||
const { html } = await get(url);
|
||||
|
||||
return scrapeAll(html, null, new URL(url).origin);
|
||||
}));
|
||||
return scrapeAll(html, null, new URL(url).origin);
|
||||
}));
|
||||
|
||||
// DDF cross-releases scenes between sites, filter duplicates by entryId
|
||||
return Object.values(releases
|
||||
.flat()
|
||||
.sort((releaseA, releaseB) => releaseB.date - releaseA.date) // sort by date so earliest scene remains
|
||||
.reduce((acc, release) => ({ ...acc, [release.entryId]: release }), {}));
|
||||
// DDF cross-releases scenes between sites, filter duplicates by entryId
|
||||
return Object.values(releases
|
||||
.flat()
|
||||
.sort((releaseA, releaseB) => releaseB.date - releaseA.date) // sort by date so earliest scene remains
|
||||
.reduce((acc, release) => ({ ...acc, [release.entryId]: release }), {}));
|
||||
}
|
||||
|
||||
async function scrapeProfile(html, _url, actorName) {
|
||||
const { qu } = ex(html);
|
||||
const { qu } = ex(html);
|
||||
|
||||
const keys = qu.all('.about-title', true).map(key => slugify(key, '_'));
|
||||
const values = qu.all('.about-info').map((el) => {
|
||||
if (el.children.length > 0) {
|
||||
return Array.from(el.children, child => child.textContent.trim()).join(', ');
|
||||
}
|
||||
const keys = qu.all('.about-title', true).map(key => slugify(key, '_'));
|
||||
const values = qu.all('.about-info').map((el) => {
|
||||
if (el.children.length > 0) {
|
||||
return Array.from(el.children, child => child.textContent.trim()).join(', ');
|
||||
}
|
||||
|
||||
return el.textContent.trim();
|
||||
});
|
||||
return el.textContent.trim();
|
||||
});
|
||||
|
||||
const bio = keys.reduce((acc, key, index) => {
|
||||
if (values[index] === '-') return acc;
|
||||
const bio = keys.reduce((acc, key, index) => {
|
||||
if (values[index] === '-') return acc;
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[key]: values[index],
|
||||
};
|
||||
}, {});
|
||||
return {
|
||||
...acc,
|
||||
[key]: values[index],
|
||||
};
|
||||
}, {});
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
profile.description = qu.q('.description-box', true);
|
||||
profile.birthdate = ed(bio.birthday, 'MMMM DD, YYYY');
|
||||
profile.description = qu.q('.description-box', true);
|
||||
profile.birthdate = ed(bio.birthday, 'MMMM DD, YYYY');
|
||||
|
||||
if (bio.nationality) profile.nationality = bio.nationality;
|
||||
if (bio.nationality) profile.nationality = bio.nationality;
|
||||
|
||||
if (bio.bra_size) [profile.bust] = bio.bra_size.match(/\d+\w+/);
|
||||
if (bio.waist) profile.waist = Number(bio.waist.match(/\d+/)[0]);
|
||||
if (bio.hips) profile.hip = Number(bio.hips.match(/\d+/)[0]);
|
||||
if (bio.bra_size) [profile.bust] = bio.bra_size.match(/\d+\w+/);
|
||||
if (bio.waist) profile.waist = Number(bio.waist.match(/\d+/)[0]);
|
||||
if (bio.hips) profile.hip = Number(bio.hips.match(/\d+/)[0]);
|
||||
|
||||
if (bio.height) profile.height = Number(bio.height.match(/\d{2,}/)[0]);
|
||||
if (bio.height) profile.height = Number(bio.height.match(/\d{2,}/)[0]);
|
||||
|
||||
if (bio.tit_style && /Enhanced/.test(bio.tit_style)) profile.naturalBoobs = false;
|
||||
if (bio.tit_style && /Natural/.test(bio.tit_style)) profile.naturalBoobs = true;
|
||||
if (bio.tit_style && /Enhanced/.test(bio.tit_style)) profile.naturalBoobs = false;
|
||||
if (bio.tit_style && /Natural/.test(bio.tit_style)) profile.naturalBoobs = true;
|
||||
|
||||
if (bio.body_art && /Tattoo/.test(bio.body_art)) profile.hasTattoos = true;
|
||||
if (bio.body_art && /Piercing/.test(bio.body_art)) profile.hasPiercings = true;
|
||||
if (bio.body_art && /Tattoo/.test(bio.body_art)) profile.hasTattoos = true;
|
||||
if (bio.body_art && /Piercing/.test(bio.body_art)) profile.hasPiercings = true;
|
||||
|
||||
if (bio.hair_style) profile.hair = bio.hair_style.split(',')[0].trim().toLowerCase();
|
||||
if (bio.eye_color) profile.eyes = bio.eye_color.match(/\w+/)[0].toLowerCase();
|
||||
if (bio.hair_style) profile.hair = bio.hair_style.split(',')[0].trim().toLowerCase();
|
||||
if (bio.eye_color) profile.eyes = bio.eye_color.match(/\w+/)[0].toLowerCase();
|
||||
|
||||
if (bio.shoe_size) profile.shoes = Number(bio.shoe_size.split('|')[1]);
|
||||
if (bio.shoe_size) profile.shoes = Number(bio.shoe_size.split('|')[1]);
|
||||
|
||||
const avatarEl = qu.q('.pornstar-details .card-img-top');
|
||||
if (avatarEl && avatarEl.dataset.src.match('^//')) profile.avatar = `https:${avatarEl.dataset.src}`;
|
||||
const avatarEl = qu.q('.pornstar-details .card-img-top');
|
||||
if (avatarEl && avatarEl.dataset.src.match('^//')) profile.avatar = `https:${avatarEl.dataset.src}`;
|
||||
|
||||
profile.releases = await fetchActorReleases(qu.urls('.find-me-tab li a'));
|
||||
profile.releases = await fetchActorReleases(qu.urls('.find-me-tab li a'));
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = site.parameters?.native
|
||||
? `${site.url}/videos/search/latest/ever/allsite/-/${page}`
|
||||
: `https://ddfnetwork.com/videos/search/latest/ever/${new URL(site.url).hostname}/-/${page}`;
|
||||
const url = site.parameters?.native
|
||||
? `${site.url}/videos/search/latest/ever/allsite/-/${page}`
|
||||
: `https://ddfnetwork.com/videos/search/latest/ever/${new URL(site.url).hostname}/-/${page}`;
|
||||
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeAll(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeAll(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return res.statusCode;
|
||||
return res.statusCode;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
// DDF's main site moved to Porn World
|
||||
// const res = await bhttp.get(`https://ddfnetwork.com${new URL(url).pathname}`);
|
||||
const res = await bhttp.get(url);
|
||||
// DDF's main site moved to Porn World
|
||||
// const res = await bhttp.get(`https://ddfnetwork.com${new URL(url).pathname}`);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const resSearch = await bhttp.post('https://ddfnetwork.com/search/ajax',
|
||||
{
|
||||
type: 'hints',
|
||||
word: actorName,
|
||||
},
|
||||
{
|
||||
decodeJSON: true,
|
||||
headers: {
|
||||
'x-requested-with': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
const resSearch = await bhttp.post('https://ddfnetwork.com/search/ajax',
|
||||
{
|
||||
type: 'hints',
|
||||
word: actorName,
|
||||
},
|
||||
{
|
||||
decodeJSON: true,
|
||||
headers: {
|
||||
'x-requested-with': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
|
||||
if (resSearch.statusCode !== 200 || Array.isArray(resSearch.body.list)) {
|
||||
return null;
|
||||
}
|
||||
if (resSearch.statusCode !== 200 || Array.isArray(resSearch.body.list)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!resSearch.body.list.pornstarsName || resSearch.body.list.pornstarsName.length === 0) {
|
||||
return null;
|
||||
}
|
||||
if (!resSearch.body.list.pornstarsName || resSearch.body.list.pornstarsName.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const [actor] = resSearch.body.list.pornstarsName;
|
||||
const url = `https://ddfnetwork.com${actor.href}`;
|
||||
const [actor] = resSearch.body.list.pornstarsName;
|
||||
const url = `https://ddfnetwork.com${actor.href}`;
|
||||
|
||||
const resActor = await bhttp.get(url);
|
||||
const resActor = await bhttp.get(url);
|
||||
|
||||
if (resActor.statusCode !== 200) {
|
||||
return null;
|
||||
}
|
||||
if (resActor.statusCode !== 200) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return scrapeProfile(resActor.body.toString(), url, actorName);
|
||||
return scrapeProfile(resActor.body.toString(), url, actorName);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'digitalplayground', 'modelprofile');
|
||||
return fetchProfile(actorName, 'digitalplayground', 'modelprofile');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -7,136 +7,136 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
async function getPhotos(albumUrl) {
|
||||
const res = await bhttp.get(albumUrl);
|
||||
const html = res.body.toString();
|
||||
const { document } = new JSDOM(html).window;
|
||||
const res = await bhttp.get(albumUrl);
|
||||
const html = res.body.toString();
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const lastPhotoPage = Array.from(document.querySelectorAll('.preview-image-container a')).slice(-1)[0].href;
|
||||
const lastPhotoIndex = parseInt(lastPhotoPage.match(/\d+.jpg/)[0], 10);
|
||||
const lastPhotoPage = Array.from(document.querySelectorAll('.preview-image-container a')).slice(-1)[0].href;
|
||||
const lastPhotoIndex = parseInt(lastPhotoPage.match(/\d+.jpg/)[0], 10);
|
||||
|
||||
const photoUrls = Array.from({ length: lastPhotoIndex }, (value, index) => {
|
||||
const pageUrl = `https://blacksonblondes.com${lastPhotoPage.replace(/\d+.jpg/, `${(index + 1).toString().padStart(3, '0')}.jpg`)}`;
|
||||
const photoUrls = Array.from({ length: lastPhotoIndex }, (value, index) => {
|
||||
const pageUrl = `https://blacksonblondes.com${lastPhotoPage.replace(/\d+.jpg/, `${(index + 1).toString().padStart(3, '0')}.jpg`)}`;
|
||||
|
||||
return {
|
||||
url: pageUrl,
|
||||
extract: ({ qu }) => qu.q('.scenes-module img', 'src'),
|
||||
};
|
||||
});
|
||||
return {
|
||||
url: pageUrl,
|
||||
extract: ({ qu }) => qu.q('.scenes-module img', 'src'),
|
||||
};
|
||||
});
|
||||
|
||||
return photoUrls;
|
||||
return photoUrls;
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const sceneElements = Array.from(document.querySelectorAll('.recent-updates'));
|
||||
const { document } = new JSDOM(html).window;
|
||||
const sceneElements = Array.from(document.querySelectorAll('.recent-updates'));
|
||||
|
||||
return sceneElements.reduce((acc, element) => {
|
||||
const siteUrl = element.querySelector('.help-block').textContent;
|
||||
return sceneElements.reduce((acc, element) => {
|
||||
const siteUrl = element.querySelector('.help-block').textContent;
|
||||
|
||||
if (`www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) {
|
||||
// different dogfart site
|
||||
return acc;
|
||||
}
|
||||
if (`www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) {
|
||||
// different dogfart site
|
||||
return acc;
|
||||
}
|
||||
|
||||
const sceneLinkElement = element.querySelector('.thumbnail');
|
||||
const url = `https://dogfartnetwork.com${sceneLinkElement.href}`;
|
||||
const { pathname } = new URL(url);
|
||||
const entryId = `${site.slug}_${pathname.split('/')[4]}`;
|
||||
const sceneLinkElement = element.querySelector('.thumbnail');
|
||||
const url = `https://dogfartnetwork.com${sceneLinkElement.href}`;
|
||||
const { pathname } = new URL(url);
|
||||
const entryId = `${site.slug}_${pathname.split('/')[4]}`;
|
||||
|
||||
const title = element.querySelector('.scene-title').textContent;
|
||||
const actors = title.split(/[,&]|\band\b/).map(actor => actor.trim());
|
||||
const title = element.querySelector('.scene-title').textContent;
|
||||
const actors = title.split(/[,&]|\band\b/).map(actor => actor.trim());
|
||||
|
||||
const poster = `https:${element.querySelector('img').src}`;
|
||||
const teaser = sceneLinkElement.dataset.preview_clip_url;
|
||||
const poster = `https:${element.querySelector('img').src}`;
|
||||
const teaser = sceneLinkElement.dataset.preview_clip_url;
|
||||
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
poster,
|
||||
teaser: {
|
||||
src: teaser,
|
||||
},
|
||||
site,
|
||||
},
|
||||
];
|
||||
}, []);
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
poster,
|
||||
teaser: {
|
||||
src: teaser,
|
||||
},
|
||||
site,
|
||||
},
|
||||
];
|
||||
}, []);
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const title = document.querySelector('.description-title').textContent;
|
||||
const actors = Array.from(document.querySelectorAll('.more-scenes a')).map(({ textContent }) => textContent);
|
||||
const metaDescription = document.querySelector('meta[itemprop="description"]').content;
|
||||
const description = metaDescription
|
||||
? metaDescription.content
|
||||
: document.querySelector('.description')
|
||||
.textContent
|
||||
.replace(/[ \t\n]{2,}/g, ' ')
|
||||
.replace('...read more', '')
|
||||
.trim();
|
||||
const title = document.querySelector('.description-title').textContent;
|
||||
const actors = Array.from(document.querySelectorAll('.more-scenes a')).map(({ textContent }) => textContent);
|
||||
const metaDescription = document.querySelector('meta[itemprop="description"]').content;
|
||||
const description = metaDescription
|
||||
? metaDescription.content
|
||||
: document.querySelector('.description')
|
||||
.textContent
|
||||
.replace(/[ \t\n]{2,}/g, ' ')
|
||||
.replace('...read more', '')
|
||||
.trim();
|
||||
|
||||
const channel = document.querySelector('.site-name').textContent.split('.')[0].toLowerCase();
|
||||
const { origin, pathname } = new URL(url);
|
||||
const entryId = `${channel}_${pathname.split('/').slice(-2)[0]}`;
|
||||
const channel = document.querySelector('.site-name').textContent.split('.')[0].toLowerCase();
|
||||
const { origin, pathname } = new URL(url);
|
||||
const entryId = `${channel}_${pathname.split('/').slice(-2)[0]}`;
|
||||
|
||||
const date = new Date(document.querySelector('meta[itemprop="uploadDate"]').content);
|
||||
const duration = moment
|
||||
.duration(`00:${document
|
||||
.querySelectorAll('.extra-info p')[1]
|
||||
.textContent
|
||||
.match(/\d+:\d+$/)[0]}`)
|
||||
.asSeconds();
|
||||
const date = new Date(document.querySelector('meta[itemprop="uploadDate"]').content);
|
||||
const duration = moment
|
||||
.duration(`00:${document
|
||||
.querySelectorAll('.extra-info p')[1]
|
||||
.textContent
|
||||
.match(/\d+:\d+$/)[0]}`)
|
||||
.asSeconds();
|
||||
|
||||
const trailerElement = document.querySelector('.html5-video');
|
||||
const poster = `https:${trailerElement.dataset.poster}`;
|
||||
const { trailer } = trailerElement.dataset;
|
||||
const trailerElement = document.querySelector('.html5-video');
|
||||
const poster = `https:${trailerElement.dataset.poster}`;
|
||||
const { trailer } = trailerElement.dataset;
|
||||
|
||||
const lastPhotosUrl = Array.from(document.querySelectorAll('.pagination a')).slice(-1)[0].href;
|
||||
const photos = await getPhotos(`${origin}${pathname}${lastPhotosUrl}`, site, url);
|
||||
const lastPhotosUrl = Array.from(document.querySelectorAll('.pagination a')).slice(-1)[0].href;
|
||||
const photos = await getPhotos(`${origin}${pathname}${lastPhotosUrl}`, site, url);
|
||||
|
||||
const stars = Math.floor(Number(document.querySelector('span[itemprop="average"]')?.textContent || document.querySelector('span[itemprop="ratingValue"]')?.textContent) / 2);
|
||||
const tags = Array.from(document.querySelectorAll('.scene-details .categories a')).map(({ textContent }) => textContent);
|
||||
const stars = Math.floor(Number(document.querySelector('span[itemprop="average"]')?.textContent || document.querySelector('span[itemprop="ratingValue"]')?.textContent) / 2);
|
||||
const tags = Array.from(document.querySelectorAll('.scene-details .categories a')).map(({ textContent }) => textContent);
|
||||
|
||||
return {
|
||||
entryId,
|
||||
url: `${origin}${pathname}`,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
poster,
|
||||
photos,
|
||||
trailer: {
|
||||
src: trailer,
|
||||
},
|
||||
tags,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
return {
|
||||
entryId,
|
||||
url: `${origin}${pathname}`,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
poster,
|
||||
photos,
|
||||
trailer: {
|
||||
src: trailer,
|
||||
},
|
||||
tags,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`https://dogfartnetwork.com/tour/scenes/?p=${page}`);
|
||||
const res = await bhttp.get(`https://dogfartnetwork.com/tour/scenes/?p=${page}`);
|
||||
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
};
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'fakehub', 'modelprofile');
|
||||
return fetchProfile(actorName, 'fakehub', 'modelprofile');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -1,115 +1,115 @@
|
||||
'use strict';
|
||||
|
||||
const {
|
||||
fetchLatest,
|
||||
fetchApiLatest,
|
||||
fetchUpcoming,
|
||||
fetchApiUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchApiProfile,
|
||||
scrapeAll,
|
||||
fetchLatest,
|
||||
fetchApiLatest,
|
||||
fetchUpcoming,
|
||||
fetchApiUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchApiProfile,
|
||||
scrapeAll,
|
||||
} = require('./gamma');
|
||||
const { get } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function extractLowArtActors(release) {
|
||||
const actors = release.title
|
||||
.replace(/solo/i, '')
|
||||
.split(/,|\band\b/ig)
|
||||
.map(actor => actor.trim());
|
||||
const actors = release.title
|
||||
.replace(/solo/i, '')
|
||||
.split(/,|\band\b/ig)
|
||||
.map(actor => actor.trim());
|
||||
|
||||
return {
|
||||
...release,
|
||||
actors,
|
||||
};
|
||||
return {
|
||||
...release,
|
||||
actors,
|
||||
};
|
||||
}
|
||||
|
||||
async function networkFetchLatest(site, page = 1) {
|
||||
if (site.parameters?.api) return fetchApiLatest(site, page, false);
|
||||
if (site.parameters?.api) return fetchApiLatest(site, page, false);
|
||||
|
||||
const releases = await fetchLatest(site, page);
|
||||
const releases = await fetchLatest(site, page);
|
||||
|
||||
if (site.slug === 'lowartfilms') {
|
||||
return releases.map(release => extractLowArtActors(release));
|
||||
}
|
||||
if (site.slug === 'lowartfilms') {
|
||||
return releases.map(release => extractLowArtActors(release));
|
||||
}
|
||||
|
||||
return releases;
|
||||
return releases;
|
||||
}
|
||||
|
||||
async function networkFetchScene(url, site) {
|
||||
const release = await fetchScene(url, site);
|
||||
const release = await fetchScene(url, site);
|
||||
|
||||
if (site.slug === 'lowartfilms') {
|
||||
return extractLowArtActors(release);
|
||||
}
|
||||
if (site.slug === 'lowartfilms') {
|
||||
return extractLowArtActors(release);
|
||||
}
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function networkFetchUpcoming(site, page = 1) {
|
||||
if (site.parameters?.api) return fetchApiUpcoming(site, page, true);
|
||||
if (site.parameters?.api) return fetchApiUpcoming(site, page, true);
|
||||
|
||||
return fetchUpcoming(site, page);
|
||||
return fetchUpcoming(site, page);
|
||||
}
|
||||
|
||||
function getActorReleasesUrl(actorPath, page = 1) {
|
||||
return `https://www.peternorth.com/en/videos/All-Categories/0${actorPath}/All-Dvds/0/latest/${page}`;
|
||||
return `https://www.peternorth.com/en/videos/All-Categories/0${actorPath}/All-Dvds/0/latest/${page}`;
|
||||
}
|
||||
|
||||
async function fetchClassicProfile(actorName, siteSlug) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const actorSlug = slugify(actorName);
|
||||
|
||||
const url = `https://${siteSlug}.com/en/pornstars`;
|
||||
const pornstarsRes = await get(url);
|
||||
const url = `https://${siteSlug}.com/en/pornstars`;
|
||||
const pornstarsRes = await get(url);
|
||||
|
||||
if (!pornstarsRes.ok) return null;
|
||||
if (!pornstarsRes.ok) return null;
|
||||
|
||||
const actorPath = pornstarsRes.item.qa('option[value*="/pornstar"]')
|
||||
.find(el => slugify(el.textContent) === actorSlug)
|
||||
const actorPath = pornstarsRes.item.qa('option[value*="/pornstar"]')
|
||||
.find(el => slugify(el.textContent) === actorSlug)
|
||||
?.value;
|
||||
|
||||
if (actorPath) {
|
||||
const actorUrl = `https://${siteSlug}.com${actorPath}`;
|
||||
const res = await get(actorUrl);
|
||||
if (actorPath) {
|
||||
const actorUrl = `https://${siteSlug}.com${actorPath}`;
|
||||
const res = await get(actorUrl);
|
||||
|
||||
if (res.ok) {
|
||||
const releases = scrapeAll(res.item, null, `https://www.${siteSlug}.com`, false);
|
||||
if (res.ok) {
|
||||
const releases = scrapeAll(res.item, null, `https://www.${siteSlug}.com`, false);
|
||||
|
||||
return { releases };
|
||||
}
|
||||
}
|
||||
return { releases };
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function networkFetchProfile(actorName, scraperSlug, site, include) {
|
||||
// not all Fame Digital sites offer Gamma actors
|
||||
const [devils, rocco, peter, silvia] = await Promise.all([
|
||||
fetchApiProfile(actorName, 'devilsfilm', true),
|
||||
fetchApiProfile(actorName, 'roccosiffredi'),
|
||||
include.scenes ? fetchProfile(actorName, 'peternorth', true, getActorReleasesUrl, include) : [],
|
||||
include.scenes ? fetchClassicProfile(actorName, 'silviasaint') : [],
|
||||
include.scenes ? fetchClassicProfile(actorName, 'silverstonedvd') : [],
|
||||
]);
|
||||
// not all Fame Digital sites offer Gamma actors
|
||||
const [devils, rocco, peter, silvia] = await Promise.all([
|
||||
fetchApiProfile(actorName, 'devilsfilm', true),
|
||||
fetchApiProfile(actorName, 'roccosiffredi'),
|
||||
include.scenes ? fetchProfile(actorName, 'peternorth', true, getActorReleasesUrl, include) : [],
|
||||
include.scenes ? fetchClassicProfile(actorName, 'silviasaint') : [],
|
||||
include.scenes ? fetchClassicProfile(actorName, 'silverstonedvd') : [],
|
||||
]);
|
||||
|
||||
if (devils || rocco || peter) {
|
||||
const releases = [].concat(devils?.releases || [], rocco?.releases || [], peter?.releases || [], silvia?.releases || []);
|
||||
if (devils || rocco || peter) {
|
||||
const releases = [].concat(devils?.releases || [], rocco?.releases || [], peter?.releases || [], silvia?.releases || []);
|
||||
|
||||
return {
|
||||
...peter,
|
||||
...rocco,
|
||||
...devils,
|
||||
releases,
|
||||
};
|
||||
}
|
||||
return {
|
||||
...peter,
|
||||
...rocco,
|
||||
...devils,
|
||||
releases,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: networkFetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene: networkFetchScene,
|
||||
fetchUpcoming: networkFetchUpcoming,
|
||||
fetchLatest: networkFetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene: networkFetchScene,
|
||||
fetchUpcoming: networkFetchUpcoming,
|
||||
};
|
||||
|
||||
@@ -4,7 +4,7 @@ const { fetchLatest, fetchUpcoming, fetchScene } = require('./gamma');
|
||||
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
};
|
||||
|
||||
@@ -5,89 +5,89 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
function scrapeProfile(html, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const profile = { name: actorName };
|
||||
const { document } = new JSDOM(html).window;
|
||||
const profile = { name: actorName };
|
||||
|
||||
const bio = Array.from(document.querySelectorAll('a[href^="/babes"]'), el => decodeURI(el.href)).reduce((acc, item) => {
|
||||
const keyMatch = item.match(/\[\w+\]/);
|
||||
const bio = Array.from(document.querySelectorAll('a[href^="/babes"]'), el => decodeURI(el.href)).reduce((acc, item) => {
|
||||
const keyMatch = item.match(/\[\w+\]/);
|
||||
|
||||
if (keyMatch) {
|
||||
const key = keyMatch[0].slice(1, -1);
|
||||
const [, value] = item.split('=');
|
||||
if (keyMatch) {
|
||||
const key = keyMatch[0].slice(1, -1);
|
||||
const [, value] = item.split('=');
|
||||
|
||||
// both hip and waist link to 'waist', assume biggest value is hip
|
||||
if (key === 'waist' && acc.waist) {
|
||||
if (acc.waist > value) {
|
||||
acc.hip = acc.waist;
|
||||
acc.waist = value;
|
||||
// both hip and waist link to 'waist', assume biggest value is hip
|
||||
if (key === 'waist' && acc.waist) {
|
||||
if (acc.waist > value) {
|
||||
acc.hip = acc.waist;
|
||||
acc.waist = value;
|
||||
|
||||
return acc;
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc.hip = value;
|
||||
acc.hip = value;
|
||||
|
||||
return acc;
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc[key] = value;
|
||||
}
|
||||
acc[key] = value;
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
if (bio.dateOfBirth) profile.birthdate = moment.utc(bio.dateOfBirth, 'YYYY-MM-DD').toDate();
|
||||
if (bio.dateOfBirth) profile.birthdate = moment.utc(bio.dateOfBirth, 'YYYY-MM-DD').toDate();
|
||||
|
||||
if (profile.placeOfBirth || bio.country) profile.birthPlace = `${bio.placeOfBirth}, ${bio.country}`;
|
||||
profile.eyes = bio.eyeColor;
|
||||
profile.hair = bio.hairColor;
|
||||
profile.ethnicity = bio.ethnicity;
|
||||
if (profile.placeOfBirth || bio.country) profile.birthPlace = `${bio.placeOfBirth}, ${bio.country}`;
|
||||
profile.eyes = bio.eyeColor;
|
||||
profile.hair = bio.hairColor;
|
||||
profile.ethnicity = bio.ethnicity;
|
||||
|
||||
profile.bust = bio.bra;
|
||||
if (bio.waist) profile.waist = Number(bio.waist.split(',')[0]);
|
||||
if (bio.hip) profile.hip = Number(bio.hip.split(',')[0]);
|
||||
profile.bust = bio.bra;
|
||||
if (bio.waist) profile.waist = Number(bio.waist.split(',')[0]);
|
||||
if (bio.hip) profile.hip = Number(bio.hip.split(',')[0]);
|
||||
|
||||
if (bio.height) profile.height = Number(bio.height.split(',')[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.split(',')[0]);
|
||||
if (bio.height) profile.height = Number(bio.height.split(',')[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.split(',')[0]);
|
||||
|
||||
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), el => el.href);
|
||||
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), el => el.href);
|
||||
|
||||
const avatar = document.querySelector('.profile-image-large img').src;
|
||||
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, copyright: null };
|
||||
const avatar = document.querySelector('.profile-image-large img').src;
|
||||
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, copyright: null };
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeSearch(html) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
return document.querySelector('a.image-link')?.href || null;
|
||||
return document.querySelector('a.image-link')?.href || null;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
|
||||
const res = await bhttp.get(`https://freeones.nl/${actorSlug}/profile`);
|
||||
const res = await bhttp.get(`https://freeones.nl/${actorSlug}/profile`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), actorName);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), actorName);
|
||||
}
|
||||
|
||||
const searchRes = await bhttp.get(`https://freeones.nl/babes?q=${actorName}`);
|
||||
const actorPath = scrapeSearch(searchRes.body.toString());
|
||||
const searchRes = await bhttp.get(`https://freeones.nl/babes?q=${actorName}`);
|
||||
const actorPath = scrapeSearch(searchRes.body.toString());
|
||||
|
||||
if (actorPath) {
|
||||
const actorRes = await bhttp.get(`https://freeones.nl${actorPath}/profile`);
|
||||
if (actorPath) {
|
||||
const actorRes = await bhttp.get(`https://freeones.nl${actorPath}/profile`);
|
||||
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString(), actorName);
|
||||
}
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString(), actorName);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -6,135 +6,135 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
async function scrapeProfileFrontpage(html, url, name) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const bioEl = document.querySelector('.dashboard-bio-list');
|
||||
const { document } = new JSDOM(html).window;
|
||||
const bioEl = document.querySelector('.dashboard-bio-list');
|
||||
|
||||
const bioUrl = `https:${document.querySelector('.seemore a').href}`;
|
||||
const bioUrl = `https:${document.querySelector('.seemore a').href}`;
|
||||
|
||||
const keys = Array.from(bioEl.querySelectorAll('dt'), el => el.textContent.trim());
|
||||
const values = Array.from(bioEl.querySelectorAll('dd'), el => el.textContent.trim());
|
||||
const keys = Array.from(bioEl.querySelectorAll('dt'), el => el.textContent.trim());
|
||||
const values = Array.from(bioEl.querySelectorAll('dd'), el => el.textContent.trim());
|
||||
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
|
||||
|
||||
const profile = {
|
||||
name,
|
||||
gender: 'female',
|
||||
};
|
||||
const profile = {
|
||||
name,
|
||||
gender: 'female',
|
||||
};
|
||||
|
||||
const birthdateString = bio['Date of Birth:'];
|
||||
const measurementsString = bio['Measurements:'];
|
||||
const birthdateString = bio['Date of Birth:'];
|
||||
const measurementsString = bio['Measurements:'];
|
||||
|
||||
const birthCityString = bio['Place of Birth:'];
|
||||
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
|
||||
const birthCityString = bio['Place of Birth:'];
|
||||
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
|
||||
|
||||
const birthCountryString = bio['Country of Origin:'];
|
||||
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
|
||||
const birthCountryString = bio['Country of Origin:'];
|
||||
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
|
||||
|
||||
const piercingsString = bio['Piercings:'];
|
||||
const tattoosString = bio['Tattoos:'];
|
||||
const piercingsString = bio['Piercings:'];
|
||||
const tattoosString = bio['Tattoos:'];
|
||||
|
||||
if (birthdateString && birthdateString !== 'Unknown (add)') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
|
||||
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
|
||||
if (birthdateString && birthdateString !== 'Unknown (add)') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
|
||||
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
|
||||
|
||||
if (bio['Fake Boobs:']) profile.naturalBoobs = bio['Fake Boobs:'] === 'No';
|
||||
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
|
||||
if (bio['Fake Boobs:']) profile.naturalBoobs = bio['Fake Boobs:'] === 'No';
|
||||
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
|
||||
|
||||
profile.hair = bio['Hair Color:'].toLowerCase();
|
||||
profile.eyes = bio['Eye Color:'].toLowerCase();
|
||||
profile.hair = bio['Hair Color:'].toLowerCase();
|
||||
profile.eyes = bio['Eye Color:'].toLowerCase();
|
||||
|
||||
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
|
||||
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
|
||||
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
|
||||
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
|
||||
|
||||
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
|
||||
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
|
||||
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
|
||||
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
|
||||
|
||||
profile.social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href);
|
||||
profile.social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href);
|
||||
|
||||
return {
|
||||
profile,
|
||||
url: bioUrl,
|
||||
};
|
||||
return {
|
||||
profile,
|
||||
url: bioUrl,
|
||||
};
|
||||
}
|
||||
|
||||
async function scrapeProfileBio(html, frontpageProfile, url, name) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const bioEl = document.querySelector('#biographyTable');
|
||||
const { document } = new JSDOM(html).window;
|
||||
const bioEl = document.querySelector('#biographyTable');
|
||||
|
||||
const keys = Array.from(bioEl.querySelectorAll('td:nth-child(1)'), el => el.textContent.trim());
|
||||
const values = Array.from(bioEl.querySelectorAll('td:nth-child(2)'), el => el.textContent.trim());
|
||||
const keys = Array.from(bioEl.querySelectorAll('td:nth-child(1)'), el => el.textContent.trim());
|
||||
const values = Array.from(bioEl.querySelectorAll('td:nth-child(2)'), el => el.textContent.trim());
|
||||
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
|
||||
|
||||
const profile = {
|
||||
...frontpageProfile,
|
||||
name,
|
||||
gender: 'female',
|
||||
};
|
||||
const profile = {
|
||||
...frontpageProfile,
|
||||
name,
|
||||
gender: 'female',
|
||||
};
|
||||
|
||||
const birthdateString = bio['Date of Birth:'];
|
||||
const measurementsString = bio['Measurements:'];
|
||||
const birthdateString = bio['Date of Birth:'];
|
||||
const measurementsString = bio['Measurements:'];
|
||||
|
||||
const birthCityString = bio['Place of Birth:'];
|
||||
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
|
||||
const birthCityString = bio['Place of Birth:'];
|
||||
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
|
||||
|
||||
const birthCountryString = bio['Country of Origin:'];
|
||||
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
|
||||
const birthCountryString = bio['Country of Origin:'];
|
||||
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
|
||||
|
||||
const piercingsString = bio['Piercings:'];
|
||||
const tattoosString = bio['Tattoos:'];
|
||||
const piercingsString = bio['Piercings:'];
|
||||
const tattoosString = bio['Tattoos:'];
|
||||
|
||||
if (birthdateString && birthdateString !== 'Unknown') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
|
||||
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
|
||||
if (birthdateString && birthdateString !== 'Unknown') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
|
||||
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
|
||||
|
||||
if (bio['Fake boobs']) profile.naturalBoobs = bio['Fake boobs:'] === 'No';
|
||||
profile.ethnicity = bio['Ethnicity:'];
|
||||
if (bio['Fake boobs']) profile.naturalBoobs = bio['Fake boobs:'] === 'No';
|
||||
profile.ethnicity = bio['Ethnicity:'];
|
||||
|
||||
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
|
||||
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
|
||||
|
||||
profile.hair = bio['Hair Color:'].toLowerCase();
|
||||
profile.eyes = bio['Eye Color:'].toLowerCase();
|
||||
profile.height = Number(bio['Height:'].match(/\d+/)[0]);
|
||||
profile.weight = Number(bio['Weight:'].match(/\d+/)[0]);
|
||||
profile.hair = bio['Hair Color:'].toLowerCase();
|
||||
profile.eyes = bio['Eye Color:'].toLowerCase();
|
||||
profile.height = Number(bio['Height:'].match(/\d+/)[0]);
|
||||
profile.weight = Number(bio['Weight:'].match(/\d+/)[0]);
|
||||
|
||||
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
|
||||
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
|
||||
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
|
||||
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
|
||||
|
||||
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
|
||||
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
|
||||
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
|
||||
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
|
||||
|
||||
profile.social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href);
|
||||
profile.social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const slug = actorName.replace(' ', '_');
|
||||
const frontpageUrl = `https://www.freeones.com/html/v_links/${slug}`;
|
||||
const slug = actorName.replace(' ', '_');
|
||||
const frontpageUrl = `https://www.freeones.com/html/v_links/${slug}`;
|
||||
|
||||
const resFrontpage = await bhttp.get(frontpageUrl);
|
||||
const resFrontpage = await bhttp.get(frontpageUrl);
|
||||
|
||||
if (resFrontpage.statusCode === 200) {
|
||||
const { url, bio } = await scrapeProfileFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
|
||||
const resBio = await bhttp.get(url);
|
||||
if (resFrontpage.statusCode === 200) {
|
||||
const { url, bio } = await scrapeProfileFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
|
||||
const resBio = await bhttp.get(url);
|
||||
|
||||
return scrapeProfileBio(resBio.body.toString(), bio, url, actorName);
|
||||
}
|
||||
return scrapeProfileBio(resBio.body.toString(), bio, url, actorName);
|
||||
}
|
||||
|
||||
// apparently some actors are appended 'Babe' as their surname...
|
||||
const fallbackSlug = `${slug}_Babe`;
|
||||
const fallbackUrl = `https://www.freeones.com/html/s_links/${fallbackSlug}`;
|
||||
const resFallback = await bhttp.get(fallbackUrl);
|
||||
// apparently some actors are appended 'Babe' as their surname...
|
||||
const fallbackSlug = `${slug}_Babe`;
|
||||
const fallbackUrl = `https://www.freeones.com/html/s_links/${fallbackSlug}`;
|
||||
const resFallback = await bhttp.get(fallbackUrl);
|
||||
|
||||
if (resFallback.statusCode === 200) {
|
||||
const { url, profile } = await scrapeProfileFrontpage(resFallback.body.toString(), fallbackUrl, actorName);
|
||||
const resBio = await bhttp.get(url);
|
||||
if (resFallback.statusCode === 200) {
|
||||
const { url, profile } = await scrapeProfileFrontpage(resFallback.body.toString(), fallbackUrl, actorName);
|
||||
const resBio = await bhttp.get(url);
|
||||
|
||||
return scrapeProfileBio(resBio.body.toString(), profile, url, actorName);
|
||||
}
|
||||
return scrapeProfileBio(resBio.body.toString(), profile, url, actorName);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -4,93 +4,93 @@ const { get, geta, ctxa } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map(({ el, qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ el, qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = el.dataset.setid || qu.q('.update_thumb', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
||||
release.url = qu.url('.title');
|
||||
release.entryId = el.dataset.setid || qu.q('.update_thumb', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
||||
release.url = qu.url('.title');
|
||||
|
||||
release.title = qu.q('.title', true);
|
||||
release.description = qu.q('.title', 'title');
|
||||
release.title = qu.q('.title', true);
|
||||
release.description = qu.q('.title', 'title');
|
||||
|
||||
release.date = qu.date('.video-data > span:last-child', 'YYYY-MM-DD');
|
||||
release.duration = qu.dur('.video-data > span');
|
||||
release.date = qu.date('.video-data > span:last-child', 'YYYY-MM-DD');
|
||||
release.duration = qu.dur('.video-data > span');
|
||||
|
||||
release.actors = qu.all('.update_models a', true);
|
||||
release.actors = qu.all('.update_models a', true);
|
||||
|
||||
const poster = qu.q('.update_thumb', 'src0_1x');
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
const poster = qu.q('.update_thumb', 'src0_1x');
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ q, qa, qd, qtx }, url, _site) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
release.entryId = q('#image_parent img', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
||||
release.entryId = q('#image_parent img', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
||||
|
||||
release.title = q('.trailer_title', true);
|
||||
release.description = qtx('.text p');
|
||||
release.date = qd('span[data-dateadded]', 'YYYY-MM-DD', null, 'data-dateadded');
|
||||
release.title = q('.trailer_title', true);
|
||||
release.description = qtx('.text p');
|
||||
release.date = qd('span[data-dateadded]', 'YYYY-MM-DD', null, 'data-dateadded');
|
||||
|
||||
release.actors = qa('.update_models a', true);
|
||||
release.tags = qa('.video-info a[href*="/categories"]', true);
|
||||
release.actors = qa('.update_models a', true);
|
||||
release.tags = qa('.video-info a[href*="/categories"]', true);
|
||||
|
||||
const poster = q('#image_parent img', 'src0_1x');
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
const poster = q('#image_parent img', 'src0_1x');
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ el, q, qtx }) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const description = qtx('.model-bio');
|
||||
if (description) profile.description = description;
|
||||
const description = qtx('.model-bio');
|
||||
if (description) profile.description = description;
|
||||
|
||||
profile.avatar = [
|
||||
q('.model-image img', 'src0_2x'),
|
||||
q('.model-image img', 'src0_1x'),
|
||||
];
|
||||
profile.avatar = [
|
||||
q('.model-image img', 'src0_2x'),
|
||||
q('.model-image img', 'src0_1x'),
|
||||
];
|
||||
|
||||
profile.releases = scrapeAll(ctxa(el, '.update'));
|
||||
profile.releases = scrapeAll(ctxa(el, '.update'));
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/categories/movies_${page}_d.html`;
|
||||
const res = await geta(url, '.latest-updates .update');
|
||||
const url = `${site.url}/categories/movies_${page}_d.html`;
|
||||
const res = await geta(url, '.latest-updates .update');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await get(url, '.content-wrapper');
|
||||
const res = await get(url, '.content-wrapper');
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug) {
|
||||
const actorSlug = slugify(actorName, '');
|
||||
const url = scraperSlug === 'povperverts'
|
||||
? `https://povperverts.net/models/${actorSlug}.html`
|
||||
: `https://${scraperSlug}.com/models/${actorSlug}.html`;
|
||||
const actorSlug = slugify(actorName, '');
|
||||
const url = scraperSlug === 'povperverts'
|
||||
? `https://povperverts.net/models/${actorSlug}.html`
|
||||
: `https://${scraperSlug}.com/models/${actorSlug}.html`;
|
||||
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeProfile(res.item, actorName) : res.status;
|
||||
return res.ok ? scrapeProfile(res.item, actorName) : res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -4,7 +4,7 @@ const { fetchApiLatest, fetchApiUpcoming, fetchScene } = require('./gamma');
|
||||
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
};
|
||||
|
||||
@@ -8,404 +8,403 @@ const slugify = require('../utils/slugify');
|
||||
const { feetInchesToCm } = require('../utils/convert');
|
||||
|
||||
async function getChannelRegExp(site) {
|
||||
if (!['hushpass', 'interracialpass'].includes(site.network.slug)) return null;
|
||||
if (!['hushpass', 'interracialpass'].includes(site.network.slug)) return null;
|
||||
|
||||
const sites = await knex('sites').where('network_id', site.network.id);
|
||||
const sites = await knex('sites').where('network_id', site.network.id);
|
||||
|
||||
return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i');
|
||||
return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i');
|
||||
}
|
||||
|
||||
function deriveEntryId(release) {
|
||||
if (release.date && release.title) {
|
||||
return `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
|
||||
}
|
||||
if (release.date && release.title) {
|
||||
return `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractPoster(posterPath, site, baseRelease) {
|
||||
if (posterPath && !/400.jpg/.test(posterPath)) {
|
||||
const poster = `${site.parameters?.media || site.url}${posterPath}`;
|
||||
const posterSources = [
|
||||
poster,
|
||||
// upscaled
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster.replace('-1x', '-3x'),
|
||||
];
|
||||
if (posterPath && !/400.jpg/.test(posterPath)) {
|
||||
const poster = `${site.parameters?.media || site.url}${posterPath}`;
|
||||
const posterSources = [
|
||||
poster,
|
||||
// upscaled
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster.replace('-1x', '-3x'),
|
||||
];
|
||||
|
||||
if (baseRelease?.poster) {
|
||||
return [posterSources, [baseRelease.poster]];
|
||||
}
|
||||
if (baseRelease?.poster) {
|
||||
return [posterSources, [baseRelease.poster]];
|
||||
}
|
||||
|
||||
return [posterSources, []];
|
||||
}
|
||||
return [posterSources, []];
|
||||
}
|
||||
|
||||
return [baseRelease?.poster || null, []];
|
||||
return [baseRelease?.poster || null, []];
|
||||
}
|
||||
|
||||
function getImageWithFallbacks(q, selector, site, el) {
|
||||
const sources = el
|
||||
? [
|
||||
q(el, selector, 'src0_3x'),
|
||||
q(el, selector, 'src0_2x'),
|
||||
q(el, selector, 'src0_1x'),
|
||||
]
|
||||
: [
|
||||
q(selector, 'src0_3x'),
|
||||
q(selector, 'src0_2x'),
|
||||
q(selector, 'src0_1x'),
|
||||
];
|
||||
const sources = el
|
||||
? [
|
||||
q(el, selector, 'src0_3x'),
|
||||
q(el, selector, 'src0_2x'),
|
||||
q(el, selector, 'src0_1x'),
|
||||
]
|
||||
: [
|
||||
q(selector, 'src0_3x'),
|
||||
q(selector, 'src0_2x'),
|
||||
q(selector, 'src0_1x'),
|
||||
];
|
||||
|
||||
return sources.filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`);
|
||||
return sources.filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`);
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = qu.q('h3 a', 'title') || qu.q('h3 a', true);
|
||||
release.url = qu.url('h3 a');
|
||||
release.title = qu.q('h3 a', 'title') || qu.q('h3 a', true);
|
||||
release.url = qu.url('h3 a');
|
||||
|
||||
release.date = qu.date('.modeldata p', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
||||
release.duration = qu.dur('.modeldata p');
|
||||
release.date = qu.date('.modeldata p', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
||||
release.duration = qu.dur('.modeldata p');
|
||||
|
||||
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
|
||||
release.poster = getImageWithFallbacks(qu.q, '.modelimg img', site);
|
||||
release.poster = getImageWithFallbacks(qu.q, '.modelimg img', site);
|
||||
|
||||
// release.entryId = q('.modelimg img', 'id').match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
// release.entryId = q('.modelimg img', 'id').match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeAllT1(scenes, site, accSiteReleases) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = qu.q('h4 a', 'title') || qu.q('h4 a', true);
|
||||
release.url = qu.url('h4 a');
|
||||
release.title = qu.q('h4 a', 'title') || qu.q('h4 a', true);
|
||||
release.url = qu.url('h4 a');
|
||||
|
||||
release.date = qu.date('.more-info-div', 'MMM D, YYYY');
|
||||
release.duration = qu.dur('.more-info-div');
|
||||
release.date = qu.date('.more-info-div', 'MMM D, YYYY');
|
||||
release.duration = qu.dur('.more-info-div');
|
||||
|
||||
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
|
||||
const posterPath = qu.q('.img-div img', 'src0_1x') || qu.img('img.video_placeholder');
|
||||
const posterPath = qu.q('.img-div img', 'src0_1x') || qu.img('img.video_placeholder');
|
||||
|
||||
if (posterPath) {
|
||||
const poster = /^http/.test(posterPath) ? posterPath : `${site.parameters?.media || site.url}${posterPath}`;
|
||||
if (posterPath) {
|
||||
const poster = /^http/.test(posterPath) ? posterPath : `${site.parameters?.media || site.url}${posterPath}`;
|
||||
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-3x'),
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
}
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-3x'),
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
}
|
||||
|
||||
// release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
// release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
|
||||
if (site.parameters?.accFilter && accSiteReleases?.map(accRelease => accRelease.entryId).includes(release.entryId)) {
|
||||
// filter out releases that were already scraped from a categorized site
|
||||
return null;
|
||||
}
|
||||
if (site.parameters?.accFilter && accSiteReleases?.map(accRelease => accRelease.entryId).includes(release.entryId)) {
|
||||
// filter out releases that were already scraped from a categorized site
|
||||
return null;
|
||||
}
|
||||
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
function scrapeAllTour(scenes) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = qu.q('h4 a', true);
|
||||
release.url = qu.url('a');
|
||||
release.date = qu.date('.tour_update_models + span', 'YYYY-MM-DD');
|
||||
release.title = qu.q('h4 a', true);
|
||||
release.url = qu.url('a');
|
||||
release.date = qu.date('.tour_update_models + span', 'YYYY-MM-DD');
|
||||
|
||||
release.actors = qu.all('.tour_update_models a', true);
|
||||
release.actors = qu.all('.tour_update_models a', true);
|
||||
|
||||
release.poster = qu.img('a img');
|
||||
release.poster = qu.img('a img');
|
||||
|
||||
release.entryId = deriveEntryId(release);
|
||||
release.entryId = deriveEntryId(release);
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ html, qu }, site, url, baseRelease) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
release.title = qu.q('.centerwrap h2', true);
|
||||
release.description = qu.q('.videocontent p', true);
|
||||
release.title = qu.q('.centerwrap h2', true);
|
||||
release.description = qu.q('.videocontent p', true);
|
||||
|
||||
release.date = qu.date('.videodetails .date', 'MM/DD/YYYY');
|
||||
release.duration = qu.dur('.videodetails .date');
|
||||
release.date = qu.date('.videodetails .date', 'MM/DD/YYYY');
|
||||
release.duration = qu.dur('.videodetails .date');
|
||||
|
||||
release.actors = qu.all('.modelname a', true);
|
||||
release.actors = qu.all('.modelname a', true);
|
||||
|
||||
const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1];
|
||||
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
|
||||
const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1];
|
||||
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
|
||||
|
||||
const trailerPath = html.match(/\/trailers\/.*.mp4/);
|
||||
if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
|
||||
const trailerPath = html.match(/\/trailers\/.*.mp4/);
|
||||
if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
|
||||
|
||||
const stars = qu.q('.modelrates + p', true).match(/\d.\d/)?.[0];
|
||||
if (stars) release.stars = Number(stars);
|
||||
const stars = qu.q('.modelrates + p', true).match(/\d.\d/)?.[0];
|
||||
if (stars) release.stars = Number(stars);
|
||||
|
||||
// release.entryId = html.match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
// release.entryId = html.match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeSceneT1({ html, qu }, site, url, baseRelease, channelRegExp) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
release.title = qu.q('.trailer-section-head .section-title', true);
|
||||
release.description = qu.text('.row .update-info-block');
|
||||
release.title = qu.q('.trailer-section-head .section-title', true);
|
||||
release.description = qu.text('.row .update-info-block');
|
||||
|
||||
release.date = qu.date('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.duration = qu.dur('.update-info-row:nth-child(2)');
|
||||
release.date = qu.date('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.duration = qu.dur('.update-info-row:nth-child(2)');
|
||||
|
||||
release.actors = qu.all('.models-list-thumbs a').map(el => ({
|
||||
name: qu.q(el, 'span', true),
|
||||
avatar: getImageWithFallbacks(qu.q, 'img', site, el),
|
||||
}));
|
||||
release.actors = qu.all('.models-list-thumbs a').map(el => ({
|
||||
name: qu.q(el, 'span', true),
|
||||
avatar: getImageWithFallbacks(qu.q, 'img', site, el),
|
||||
}));
|
||||
|
||||
release.tags = qu.all('.tags a', true);
|
||||
release.tags = qu.all('.tags a', true);
|
||||
|
||||
// const posterPath = html.match(/poster="(.*\.jpg)/)?.[1];
|
||||
const posterPath = qu.q('.player-thumb img', 'src0_1x');
|
||||
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
|
||||
// const posterPath = html.match(/poster="(.*\.jpg)/)?.[1];
|
||||
const posterPath = qu.q('.player-thumb img', 'src0_1x');
|
||||
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
|
||||
|
||||
const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1];
|
||||
if (trailer && /^http/.test(trailer)) release.trailer = { src: trailer, referer: url };
|
||||
else if (trailer) release.trailer = { src: `${site.parameters?.media || site.url}${trailer}`, referer: url };
|
||||
const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1];
|
||||
if (trailer && /^http/.test(trailer)) release.trailer = { src: trailer, referer: url };
|
||||
else if (trailer) release.trailer = { src: `${site.parameters?.media || site.url}${trailer}`, referer: url };
|
||||
|
||||
const stars = qu.q('.update-rating', true).match(/\d.\d/)?.[0];
|
||||
if (stars) release.stars = Number(stars);
|
||||
const stars = qu.q('.update-rating', true).match(/\d.\d/)?.[0];
|
||||
if (stars) release.stars = Number(stars);
|
||||
|
||||
if (channelRegExp) {
|
||||
const channel = release.tags.find(tag => channelRegExp.test(tag));
|
||||
if (channelRegExp) {
|
||||
const channel = release.tags.find(tag => channelRegExp.test(tag));
|
||||
|
||||
if (channel) {
|
||||
release.channel = {
|
||||
force: true,
|
||||
slug: slugify(channel, ''),
|
||||
};
|
||||
}
|
||||
}
|
||||
if (channel) {
|
||||
release.channel = {
|
||||
force: true,
|
||||
slug: slugify(channel, ''),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// release.entryId = q('.player-thumb img', 'id')?.match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
// release.entryId = q('.player-thumb img', 'id')?.match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeSceneTour({ html, qu }, site, url) {
|
||||
const release = {};
|
||||
const release = {};
|
||||
|
||||
if (url) release.url = url;
|
||||
release.title = qu.q('.update_title, .video-title', true);
|
||||
release.description = qu.q('.latest_update_description, .video-summary', true);
|
||||
if (url) release.url = url;
|
||||
release.title = qu.q('.update_title, .video-title', true);
|
||||
release.description = qu.q('.latest_update_description, .video-summary', true);
|
||||
|
||||
const date = qu.date('.availdate, .update_date', 'YYYY-MM-DD');
|
||||
if (date) release.date = date;
|
||||
const date = qu.date('.availdate, .update_date', 'YYYY-MM-DD');
|
||||
if (date) release.date = date;
|
||||
|
||||
release.actors = qu.all('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true);
|
||||
release.tags = qu.all('.update_tags a, .tour_update_tags a', true);
|
||||
release.actors = qu.all('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true);
|
||||
release.tags = qu.all('.update_tags a, .tour_update_tags a', true);
|
||||
|
||||
const [photo, poster, ...photos] = qu.imgs('.update_image img:not(.play_icon_overlay)');
|
||||
if (poster || photo) release.poster = poster || photo;
|
||||
if ((photo && poster) || photos) release.photos = poster ? [photo, ...photos] : photos; // don't use first photo when already used as fallback poster
|
||||
const [photo, poster, ...photos] = qu.imgs('.update_image img:not(.play_icon_overlay)');
|
||||
if (poster || photo) release.poster = poster || photo;
|
||||
if ((photo && poster) || photos) release.photos = poster ? [photo, ...photos] : photos; // don't use first photo when already used as fallback poster
|
||||
|
||||
if (release.date) release.entryId = deriveEntryId(release);
|
||||
if (release.date) release.entryId = deriveEntryId(release);
|
||||
|
||||
const trailerCode = qu.q('.update_image a', 'onclick');
|
||||
const trailerPath = trailerCode?.match(/tload\('(.*)'\)/)?.[1] || html.match(/\/trailer\/.*\.mp4/)?.[0];
|
||||
if (trailerPath && /^http/.test(trailerPath)) release.trailer = { src: trailerPath };
|
||||
else if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
|
||||
const trailerCode = qu.q('.update_image a', 'onclick');
|
||||
const trailerPath = trailerCode?.match(/tload\('(.*)'\)/)?.[1] || html.match(/\/trailer\/.*\.mp4/)?.[0];
|
||||
if (trailerPath && /^http/.test(trailerPath)) release.trailer = { src: trailerPath };
|
||||
else if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ el, qu }, site) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const bio = qu.texts('.stats p').reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
const bio = qu.texts('.stats p').reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (bio.age) profile.age = Number(bio.age);
|
||||
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
||||
if (bio.age) profile.age = Number(bio.age);
|
||||
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
||||
|
||||
profile.avatar = getImageWithFallbacks(qu.q, '.profileimg img', site);
|
||||
profile.avatar = getImageWithFallbacks(qu.q, '.profileimg img', site);
|
||||
|
||||
const qReleases = ctxa(el, '.modelFeatures .modelfeature');
|
||||
profile.releases = scrapeAll(qReleases, site);
|
||||
const qReleases = ctxa(el, '.modelFeatures .modelfeature');
|
||||
profile.releases = scrapeAll(qReleases, site);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeProfileT1({ el, qu }, site) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const bio = qu.all('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
const bio = qu.all('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
|
||||
if (!value) return acc;
|
||||
if (!value) return acc;
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (bio.fun_fact) profile.description = bio.fun_fact;
|
||||
if (bio.age) profile.age = Number(bio.age);
|
||||
if (bio.fun_fact) profile.description = bio.fun_fact;
|
||||
if (bio.age) profile.age = Number(bio.age);
|
||||
|
||||
const heightMetric = bio.height?.match(/(\d{3})(\b|c)/);
|
||||
const heightImperial = bio.height?.match(/\d{1}(\.\d)?/g);
|
||||
if (heightMetric) profile.height = Number(heightMetric[1]);
|
||||
if (heightImperial) profile.height = feetInchesToCm(Number(heightImperial[0]), Number(heightImperial[1]));
|
||||
const heightMetric = bio.height?.match(/(\d{3})(\b|c)/);
|
||||
const heightImperial = bio.height?.match(/\d{1}(\.\d)?/g);
|
||||
if (heightMetric) profile.height = Number(heightMetric[1]);
|
||||
if (heightImperial) profile.height = feetInchesToCm(Number(heightImperial[0]), Number(heightImperial[1]));
|
||||
|
||||
profile.avatar = getImageWithFallbacks(qu.q, '.img-div img', site);
|
||||
profile.avatar = getImageWithFallbacks(qu.q, '.img-div img', site);
|
||||
|
||||
const qReleases = ctxa(el, '.item-video');
|
||||
profile.releases = scrapeAllT1(qReleases, site);
|
||||
const qReleases = ctxa(el, '.item-video');
|
||||
profile.releases = scrapeAllT1(qReleases, site);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeProfileTour({ el, qu }, site) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const bio = qu.texts('.model_bio').reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
const bio = qu.texts('.model_bio').reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
|
||||
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
|
||||
if (bio.birthplace) profile.birthPlace = bio.birthplace;
|
||||
if (bio.fun_fact) profile.description = bio.fun_fact;
|
||||
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
|
||||
if (bio.birthplace) profile.birthPlace = bio.birthplace;
|
||||
if (bio.fun_fact) profile.description = bio.fun_fact;
|
||||
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
|
||||
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
|
||||
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
|
||||
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
|
||||
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
|
||||
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
|
||||
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
|
||||
|
||||
if (bio.tattoos && /yes/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||
if (bio.piercings && /yes/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||
if (bio.tattoos && /yes/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||
if (bio.piercings && /yes/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||
|
||||
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
|
||||
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
|
||||
|
||||
profile.avatar = getImageWithFallbacks(qu.q, '.model_picture img', site);
|
||||
profile.avatar = getImageWithFallbacks(qu.q, '.model_picture img', site);
|
||||
|
||||
const qReleases = ctxa(el, '.update_block');
|
||||
profile.releases = qReleases.map((qRelease) => {
|
||||
const url = qRelease.qu.url('.update_image a[href]');
|
||||
const release = scrapeSceneTour(qRelease, site);
|
||||
const qReleases = ctxa(el, '.update_block');
|
||||
profile.releases = qReleases.map((qRelease) => {
|
||||
const url = qRelease.qu.url('.update_image a[href]');
|
||||
const release = scrapeSceneTour(qRelease, site);
|
||||
|
||||
if (!/\/(signup|join)/i.test(url)) release.url = url;
|
||||
release.entryId = deriveEntryId(release);
|
||||
release.site = site;
|
||||
if (!/\/(signup|join)/i.test(url)) release.url = url;
|
||||
release.entryId = deriveEntryId(release);
|
||||
release.site = site;
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1, _beforeFetchLatest, accSiteReleases) {
|
||||
const url = (site.parameters?.latest && util.format(site.parameters.latest, page))
|
||||
const url = (site.parameters?.latest && util.format(site.parameters.latest, page))
|
||||
|| (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`)
|
||||
|| `${site.url}/categories/movies_${page}_d.html`;
|
||||
|
||||
const res = await geta(url, '.modelfeature, .item-video, .updateItem');
|
||||
const res = await geta(url, '.modelfeature, .item-video, .updateItem');
|
||||
|
||||
if (!res.ok) return res.status;
|
||||
if (site.parameters?.t1) return scrapeAllT1(res.items, site, accSiteReleases);
|
||||
if (site.parameters?.tour) return scrapeAllTour(res.items, site, accSiteReleases);
|
||||
if (!res.ok) return res.status;
|
||||
if (site.parameters?.t1) return scrapeAllT1(res.items, site, accSiteReleases);
|
||||
if (site.parameters?.tour) return scrapeAllTour(res.items, site, accSiteReleases);
|
||||
|
||||
return scrapeAll(res.items, site, accSiteReleases);
|
||||
return scrapeAll(res.items, site, accSiteReleases);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, baseRelease, beforeFetchLatest) {
|
||||
const channelRegExp = beforeFetchLatest || await getChannelRegExp(site);
|
||||
const res = await get(url);
|
||||
const channelRegExp = beforeFetchLatest || await getChannelRegExp(site);
|
||||
const res = await get(url);
|
||||
|
||||
if (!res.ok) return res.status;
|
||||
if (site.parameters?.t1) return scrapeSceneT1(res.item, site, url, baseRelease, channelRegExp);
|
||||
if (site.parameters?.tour) return scrapeSceneTour(res.item, site, url, baseRelease);
|
||||
if (!res.ok) return res.status;
|
||||
if (site.parameters?.t1) return scrapeSceneT1(res.item, site, url, baseRelease, channelRegExp);
|
||||
if (site.parameters?.tour) return scrapeSceneTour(res.item, site, url, baseRelease);
|
||||
|
||||
return scrapeScene(res.item, site, url, baseRelease);
|
||||
return scrapeScene(res.item, site, url, baseRelease);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug, site) {
|
||||
const actorSlugA = slugify(actorName, '');
|
||||
const actorSlugB = slugify(actorName);
|
||||
const actorSlugA = slugify(actorName, '');
|
||||
const actorSlugB = slugify(actorName);
|
||||
|
||||
const t1 = site.parameters?.t1 ? 't1/' : '';
|
||||
const t1 = site.parameters?.t1 ? 't1/' : '';
|
||||
|
||||
const res1 = site.parameters?.profile
|
||||
? await get(util.format(site.parameters.profile, actorSlugA))
|
||||
: await get(`${site.url}/${t1}models/${actorSlugA}.html`);
|
||||
const res1 = site.parameters?.profile
|
||||
? await get(util.format(site.parameters.profile, actorSlugA))
|
||||
: await get(`${site.url}/${t1}models/${actorSlugA}.html`);
|
||||
|
||||
const res = (res1.ok && res1)
|
||||
|| (site.parameters?.profile
|
||||
? await get(util.format(site.parameters.profile, actorSlugB))
|
||||
: await get(`${site.url}/${t1}models/${actorSlugB}.html`));
|
||||
const res = (res1.ok && res1)
|
||||
|| (site.parameters?.profile && await get(util.format(site.parameters.profile, actorSlugB)))
|
||||
|| await get(`${site.url}/${t1}models/${actorSlugB}.html`);
|
||||
|
||||
if (!res.ok) return res.status;
|
||||
if (site.parameters?.t1) return scrapeProfileT1(res.item, site);
|
||||
if (site.parameters?.tour) return scrapeProfileTour(res.item, site);
|
||||
if (!res.ok) return res.status;
|
||||
if (site.parameters?.t1) return scrapeProfileT1(res.item, site);
|
||||
if (site.parameters?.tour) return scrapeProfileTour(res.item, site);
|
||||
|
||||
return scrapeProfile(res.item, site);
|
||||
return scrapeProfile(res.item, site);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
beforeFetchLatest: getChannelRegExp,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
beforeFetchLatest: getChannelRegExp,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
const { fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'iconmale');
|
||||
return fetchProfile(actorName, 'iconmale');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchProfile: networkFetchProfile,
|
||||
};
|
||||
|
||||
@@ -4,104 +4,104 @@ const bhttp = require('bhttp');
|
||||
const { get, exa, ed } = require('../utils/q');
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const scenes = site.slug === 'paintoy'
|
||||
? exa(html, '#articleTable table[cellspacing="2"]')
|
||||
: exa(html, 'body > table');
|
||||
const scenes = site.slug === 'paintoy'
|
||||
? exa(html, '#articleTable table[cellspacing="2"]')
|
||||
: exa(html, 'body > table');
|
||||
|
||||
return scenes.map(({ qu }) => {
|
||||
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
|
||||
const release = {};
|
||||
return scenes.map(({ qu }) => {
|
||||
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
|
||||
const release = {};
|
||||
|
||||
const titleEl = qu.q('.galleryTitleText, .articleTitleText');
|
||||
const [title, ...actors] = titleEl.textContent.split('|');
|
||||
const date = qu.date('.articlePostDateText td', 'MMM D, YYYY');
|
||||
const titleEl = qu.q('.galleryTitleText, .articleTitleText');
|
||||
const [title, ...actors] = titleEl.textContent.split('|');
|
||||
const date = qu.date('.articlePostDateText td', 'MMM D, YYYY');
|
||||
|
||||
const url = qu.url(titleEl, 'a');
|
||||
[release.entryId] = url.split('/').slice(-2);
|
||||
release.url = `${site.url}${url}`;
|
||||
const url = qu.url(titleEl, 'a');
|
||||
[release.entryId] = url.split('/').slice(-2);
|
||||
release.url = `${site.url}${url}`;
|
||||
|
||||
if (date) {
|
||||
release.title = title.trim();
|
||||
release.date = date;
|
||||
} else {
|
||||
// title should contain date instead, not applicable in brief mode
|
||||
release.title = title.slice(title.indexOf(':') + 1).trim();
|
||||
release.date = ed(title.slice(0, title.indexOf(':')), 'MMM D, YYYY');
|
||||
}
|
||||
if (date) {
|
||||
release.title = title.trim();
|
||||
release.date = date;
|
||||
} else {
|
||||
// title should contain date instead, not applicable in brief mode
|
||||
release.title = title.slice(title.indexOf(':') + 1).trim();
|
||||
release.date = ed(title.slice(0, title.indexOf(':')), 'MMM D, YYYY');
|
||||
}
|
||||
|
||||
release.actors = actors.map(actor => actor.trim());
|
||||
release.actors = actors.map(actor => actor.trim());
|
||||
|
||||
const description = qu.q('.articleCopyText', true);
|
||||
if (description) release.description = description.slice(0, description.lastIndexOf('('));
|
||||
const description = qu.q('.articleCopyText', true);
|
||||
if (description) release.description = description.slice(0, description.lastIndexOf('('));
|
||||
|
||||
const duration = qu.dur('.articleCopyText a:nth-child(2)');
|
||||
if (duration) release.duration = duration;
|
||||
const duration = qu.dur('.articleCopyText a:nth-child(2)');
|
||||
if (duration) release.duration = duration;
|
||||
|
||||
release.likes = parseInt(qu.q('.articlePostDateText td:nth-child(3)', true), 10);
|
||||
release.likes = parseInt(qu.q('.articlePostDateText td:nth-child(3)', true), 10);
|
||||
|
||||
const cover = qu.img('a img');
|
||||
release.covers = [[
|
||||
cover.replace('_thumbnail', ''),
|
||||
cover,
|
||||
]];
|
||||
const cover = qu.img('a img');
|
||||
release.covers = [[
|
||||
cover.replace('_thumbnail', ''),
|
||||
cover,
|
||||
]];
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ qu }, site) {
|
||||
const release = {};
|
||||
const release = {};
|
||||
|
||||
const titleEl = qu.q('.articleTitleText');
|
||||
const [title, ...actors] = titleEl.textContent.split('|');
|
||||
const titleEl = qu.q('.articleTitleText');
|
||||
const [title, ...actors] = titleEl.textContent.split('|');
|
||||
|
||||
const url = qu.url(titleEl, 'a');
|
||||
[release.entryId] = url.split('/').slice(-2);
|
||||
release.url = `${site.url}${url}`;
|
||||
const url = qu.url(titleEl, 'a');
|
||||
[release.entryId] = url.split('/').slice(-2);
|
||||
release.url = `${site.url}${url}`;
|
||||
|
||||
release.title = title.trim();
|
||||
release.description = qu.q('.articleCopyText', true);
|
||||
release.title = title.trim();
|
||||
release.description = qu.q('.articleCopyText', true);
|
||||
|
||||
release.actors = actors.map(actor => actor.trim());
|
||||
release.date = qu.date('.articlePostDateText', 'MMMM D, YYYY');
|
||||
release.duration = qu.dur('.articlePostDateText a:nth-child(2)');
|
||||
release.actors = actors.map(actor => actor.trim());
|
||||
release.date = qu.date('.articlePostDateText', 'MMMM D, YYYY');
|
||||
release.duration = qu.dur('.articlePostDateText a:nth-child(2)');
|
||||
|
||||
const [cover, ...photos] = qu.imgs('img[src*="images"]');
|
||||
release.covers = [cover];
|
||||
release.photos = photos;
|
||||
const [cover, ...photos] = qu.imgs('img[src*="images"]');
|
||||
release.covers = [cover];
|
||||
release.photos = photos;
|
||||
|
||||
release.poster = qu.poster();
|
||||
release.poster = qu.poster();
|
||||
|
||||
const trailer = qu.trailer();
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
const trailer = qu.trailer();
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = site.slug === 'paintoy' // paintoy's site is partially broken, use front page
|
||||
? `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`
|
||||
: `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
|
||||
const url = site.slug === 'paintoy' // paintoy's site is partially broken, use front page
|
||||
? `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`
|
||||
: `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
|
||||
|
||||
const res = await bhttp.get(url, {
|
||||
type: 'brief',
|
||||
page,
|
||||
});
|
||||
const res = await bhttp.get(url, {
|
||||
type: 'brief',
|
||||
page,
|
||||
});
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(site.slug === 'paintoy' ? res.body.toString() : res.body.html, site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(site.slug === 'paintoy' ? res.body.toString() : res.body.html, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeScene(res.item, site) : res.status;
|
||||
return res.ok ? scrapeScene(res.item, site) : res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -9,116 +9,116 @@ const slugify = require('../utils/slugify');
|
||||
const { fetchApiLatest, fetchScene } = require('./gamma');
|
||||
|
||||
async function fetchToken(site) {
|
||||
const res = await bhttp.get(site.url);
|
||||
const html = res.body.toString();
|
||||
const res = await bhttp.get(site.url);
|
||||
const html = res.body.toString();
|
||||
|
||||
const time = html.match(/"aet":\d+/)[0].split(':')[1];
|
||||
const ah = html.match(/"ah":"[\w-]+"/)[0].split(':')[1].slice(1, -1);
|
||||
const token = ah.split('').reverse().join('');
|
||||
const time = html.match(/"aet":\d+/)[0].split(':')[1];
|
||||
const ah = html.match(/"ah":"[\w-]+"/)[0].split(':')[1].slice(1, -1);
|
||||
const token = ah.split('').reverse().join('');
|
||||
|
||||
return { time, token };
|
||||
return { time, token };
|
||||
}
|
||||
|
||||
async function fetchActors(entryId, site, { token, time }) {
|
||||
const url = `${site.url}/sapi/${token}/${time}/model.getModelContent?_method=model.getModelContent&tz=1&fields[0]=modelId.stageName&fields[1]=_last&fields[2]=modelId.upsellLink&fields[3]=modelId.upsellText&limit=25&transitParameters[contentId]=${entryId}`;
|
||||
const res = await bhttp.get(url);
|
||||
const url = `${site.url}/sapi/${token}/${time}/model.getModelContent?_method=model.getModelContent&tz=1&fields[0]=modelId.stageName&fields[1]=_last&fields[2]=modelId.upsellLink&fields[3]=modelId.upsellText&limit=25&transitParameters[contentId]=${entryId}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200 && res.body.status === true) {
|
||||
return Object.values(res.body.response.collection).map(actor => Object.values(actor.modelId.collection)[0].stageName);
|
||||
}
|
||||
if (res.statusCode === 200 && res.body.status === true) {
|
||||
return Object.values(res.body.response.collection).map(actor => Object.values(actor.modelId.collection)[0].stageName);
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchTrailerLocation(entryId, site) {
|
||||
const url = `${site.url}/api/download/${entryId}/hd1080/stream`;
|
||||
const url = `${site.url}/api/download/${entryId}/hd1080/stream`;
|
||||
|
||||
try {
|
||||
const res = await bhttp.get(url, {
|
||||
followRedirects: false,
|
||||
});
|
||||
try {
|
||||
const res = await bhttp.get(url, {
|
||||
followRedirects: false,
|
||||
});
|
||||
|
||||
if (res.statusCode === 302) {
|
||||
return res.headers.location;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn(`${site.name}: Unable to fetch trailer at '${url}': ${error.message}`);
|
||||
}
|
||||
if (res.statusCode === 302) {
|
||||
return res.headers.location;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn(`${site.name}: Unable to fetch trailer at '${url}': ${error.message}`);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeScene(scene, site, tokens) {
|
||||
const release = {
|
||||
entryId: scene.id,
|
||||
title: scene.title,
|
||||
duration: scene.length,
|
||||
site,
|
||||
meta: {
|
||||
tokens, // attach tokens to reduce number of requests required for deep fetching
|
||||
},
|
||||
};
|
||||
const release = {
|
||||
entryId: scene.id,
|
||||
title: scene.title,
|
||||
duration: scene.length,
|
||||
site,
|
||||
meta: {
|
||||
tokens, // attach tokens to reduce number of requests required for deep fetching
|
||||
},
|
||||
};
|
||||
|
||||
release.url = `${site.url}/scene/${release.entryId}/${slugify(release.title, { encode: true })}`;
|
||||
release.date = new Date(scene.sites.collection[scene.id].publishDate);
|
||||
release.poster = scene._resources.primary[0].url;
|
||||
release.url = `${site.url}/scene/${release.entryId}/${slugify(release.title, { encode: true })}`;
|
||||
release.date = new Date(scene.sites.collection[scene.id].publishDate);
|
||||
release.poster = scene._resources.primary[0].url;
|
||||
|
||||
if (scene.tags) release.tags = Object.values(scene.tags.collection).map(tag => tag.alias);
|
||||
if (scene._resources.base) release.photos = scene._resources.base.map(resource => resource.url);
|
||||
if (scene.tags) release.tags = Object.values(scene.tags.collection).map(tag => tag.alias);
|
||||
if (scene._resources.base) release.photos = scene._resources.base.map(resource => resource.url);
|
||||
|
||||
const [actors, trailer] = await Promise.all([
|
||||
fetchActors(release.entryId, site, tokens),
|
||||
fetchTrailerLocation(release.entryId, site),
|
||||
]);
|
||||
const [actors, trailer] = await Promise.all([
|
||||
fetchActors(release.entryId, site, tokens),
|
||||
fetchTrailerLocation(release.entryId, site),
|
||||
]);
|
||||
|
||||
release.actors = actors;
|
||||
if (trailer) release.trailer = { src: trailer, quality: 1080 };
|
||||
release.actors = actors;
|
||||
if (trailer) release.trailer = { src: trailer, quality: 1080 };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeLatest(scenes, site, tokens) {
|
||||
return Promise.map(scenes, async scene => scrapeScene(scene, site, tokens), { concurrency: 10 });
|
||||
return Promise.map(scenes, async scene => scrapeScene(scene, site, tokens), { concurrency: 10 });
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiLatest(site, page);
|
||||
}
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiLatest(site, page);
|
||||
}
|
||||
|
||||
const { time, token } = await fetchToken(site);
|
||||
const { time, token } = await fetchToken(site);
|
||||
|
||||
// transParameters[v1] includes _resources, [v2] includes photos, [preset] is mandatory
|
||||
const url = `${site.url}/sapi/${token}/${time}/content.load?limit=50&offset=${(page - 1) * 50}&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`;
|
||||
const res = await bhttp.get(url);
|
||||
// transParameters[v1] includes _resources, [v2] includes photos, [preset] is mandatory
|
||||
const url = `${site.url}/sapi/${token}/${time}/content.load?limit=50&offset=${(page - 1) * 50}&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200 && res.body.status) {
|
||||
return scrapeLatest(res.body.response.collection, site, { time, token });
|
||||
}
|
||||
if (res.statusCode === 200 && res.body.status) {
|
||||
return scrapeLatest(res.body.response.collection, site, { time, token });
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchNetworkScene(url, site, release) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchScene(url, site, release);
|
||||
}
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchScene(url, site, release);
|
||||
}
|
||||
|
||||
const { time, token } = release?.meta.tokens || await fetchToken(site); // use attached tokens when deep fetching
|
||||
const { pathname } = new URL(url);
|
||||
const entryId = pathname.split('/')[2];
|
||||
const { time, token } = release?.meta.tokens || await fetchToken(site); // use attached tokens when deep fetching
|
||||
const { pathname } = new URL(url);
|
||||
const entryId = pathname.split('/')[2];
|
||||
|
||||
const apiUrl = `${site.url}/sapi/${token}/${time}/content.load?filter[id][fields][0]=id&filter[id][values][0]=${entryId}&transitParameters[v1]=ykYa8ALmUD&transitParameters[preset]=scene`;
|
||||
const res = await bhttp.get(apiUrl);
|
||||
const apiUrl = `${site.url}/sapi/${token}/${time}/content.load?filter[id][fields][0]=id&filter[id][values][0]=${entryId}&transitParameters[v1]=ykYa8ALmUD&transitParameters[preset]=scene`;
|
||||
const res = await bhttp.get(apiUrl);
|
||||
|
||||
if (res.statusCode === 200 && res.body.status) {
|
||||
return scrapeScene(res.body.response.collection[0], site, { time, token });
|
||||
}
|
||||
if (res.statusCode === 200 && res.body.status) {
|
||||
return scrapeScene(res.body.response.collection[0], site, { time, token });
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene: fetchNetworkScene,
|
||||
fetchLatest,
|
||||
fetchScene: fetchNetworkScene,
|
||||
};
|
||||
|
||||
@@ -3,83 +3,83 @@
|
||||
const { get, initAll } = require('../utils/qu');
|
||||
|
||||
function scrapeLatest(scenes, dates, site) {
|
||||
return scenes.map(({ qu }, index) => {
|
||||
const release = {};
|
||||
return scenes.map(({ qu }, index) => {
|
||||
const release = {};
|
||||
|
||||
const path = qu.url('a');
|
||||
release.url = `${site.url}/visitors/${path}`;
|
||||
release.entryId = path.match(/videos\/([a-zA-Z0-9]+)(?:_hd)?_trailer/)?.[1];
|
||||
const path = qu.url('a');
|
||||
release.url = `${site.url}/visitors/${path}`;
|
||||
release.entryId = path.match(/videos\/([a-zA-Z0-9]+)(?:_hd)?_trailer/)?.[1];
|
||||
|
||||
if (dates && dates[index]) {
|
||||
release.date = dates[index].qu.date(null, 'MM/DD/YYYY');
|
||||
}
|
||||
if (dates && dates[index]) {
|
||||
release.date = dates[index].qu.date(null, 'MM/DD/YYYY');
|
||||
}
|
||||
|
||||
release.description = qu.q('tbody tr:nth-child(3) font', true);
|
||||
release.description = qu.q('tbody tr:nth-child(3) font', true);
|
||||
|
||||
const infoLine = qu.q('font[color="#663366"]', true);
|
||||
if (infoLine) release.duration = Number(infoLine.match(/(\d+) min/)[1]) * 60;
|
||||
const infoLine = qu.q('font[color="#663366"]', true);
|
||||
if (infoLine) release.duration = Number(infoLine.match(/(\d+) min/)[1]) * 60;
|
||||
|
||||
const poster = qu.img('img[src*="photos/"][width="400"]');
|
||||
release.poster = `${site.url}/visitors/${poster}`;
|
||||
release.photos = qu.imgs('img[src*="photos/"]:not([width="400"])').map(source => `${site.url}/visitors/${source}`);
|
||||
const poster = qu.img('img[src*="photos/"][width="400"]');
|
||||
release.poster = `${site.url}/visitors/${poster}`;
|
||||
release.photos = qu.imgs('img[src*="photos/"]:not([width="400"])').map(source => `${site.url}/visitors/${source}`);
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ qu }, url, site) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
const { pathname } = new URL(url);
|
||||
release.entryId = pathname.match(/videos\/(\w+)_hd_trailer/)[1];
|
||||
const { pathname } = new URL(url);
|
||||
release.entryId = pathname.match(/videos\/(\w+)_hd_trailer/)[1];
|
||||
|
||||
const actor = qu.q('font[color="#990033"] strong', true);
|
||||
release.actors = [actor];
|
||||
const actor = qu.q('font[color="#990033"] strong', true);
|
||||
release.actors = [actor];
|
||||
|
||||
const hdTrailer = qu.url('a[href*="hd_trailer.mp4"]');
|
||||
const sdTrailer = qu.url('a[href*="hd_trailer_mobile.mp4"]');
|
||||
const hdTrailer = qu.url('a[href*="hd_trailer.mp4"]');
|
||||
const sdTrailer = qu.url('a[href*="hd_trailer_mobile.mp4"]');
|
||||
|
||||
release.trailer = [
|
||||
{
|
||||
src: `${site.url}/visitors/videos/${hdTrailer}`,
|
||||
quality: 1080,
|
||||
},
|
||||
{
|
||||
src: `${site.url}/visitors/videos/${sdTrailer}`,
|
||||
quality: 270,
|
||||
},
|
||||
];
|
||||
release.trailer = [
|
||||
{
|
||||
src: `${site.url}/visitors/videos/${hdTrailer}`,
|
||||
quality: 1080,
|
||||
},
|
||||
{
|
||||
src: `${site.url}/visitors/videos/${sdTrailer}`,
|
||||
quality: 270,
|
||||
},
|
||||
];
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `https://jesseloadsmonsterfacials.com/visitors/tour_${page.toString().padStart(2, '0')}.html`;
|
||||
const res = await get(url);
|
||||
const url = `https://jesseloadsmonsterfacials.com/visitors/tour_${page.toString().padStart(2, '0')}.html`;
|
||||
const res = await get(url);
|
||||
|
||||
if (!res.ok) {
|
||||
return res.status;
|
||||
}
|
||||
if (!res.ok) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
const { el } = res.item;
|
||||
const { el } = res.item;
|
||||
|
||||
const scenes = initAll(el, 'table[width="880"]');
|
||||
const dates = initAll(el, 'font[color="#000000"] strong:not(:empty)');
|
||||
const scenes = initAll(el, 'table[width="880"]');
|
||||
const dates = initAll(el, 'font[color="#000000"] strong:not(:empty)');
|
||||
|
||||
return scrapeLatest(scenes, dates, site);
|
||||
return scrapeLatest(scenes, dates, site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, site);
|
||||
}
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -13,406 +13,406 @@ const { heightToCm } = require('../utils/convert');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
async function fetchPhotos(url) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return res.body.toString();
|
||||
return res.body.toString();
|
||||
}
|
||||
|
||||
function scrapePhotos(html, type) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
|
||||
.toArray()
|
||||
.map((photoElement) => {
|
||||
const src = $(photoElement).attr('src');
|
||||
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
|
||||
.toArray()
|
||||
.map((photoElement) => {
|
||||
const src = $(photoElement).attr('src');
|
||||
|
||||
// high res often available in alternative directories, but not always, provide original as fallback
|
||||
if (type === 'caps') {
|
||||
return [
|
||||
src.replace('capthumbs/', 'caps/'),
|
||||
src,
|
||||
];
|
||||
}
|
||||
// high res often available in alternative directories, but not always, provide original as fallback
|
||||
if (type === 'caps') {
|
||||
return [
|
||||
src.replace('capthumbs/', 'caps/'),
|
||||
src,
|
||||
];
|
||||
}
|
||||
|
||||
return [
|
||||
src.replace('thumbs/', 'photos/'),
|
||||
src.replace('thumbs/', '1600watermarked/'),
|
||||
src.replace('thumbs/', '1280watermarked/'),
|
||||
src.replace('thumbs/', '1024watermarked/'),
|
||||
src,
|
||||
];
|
||||
});
|
||||
return [
|
||||
src.replace('thumbs/', 'photos/'),
|
||||
src.replace('thumbs/', '1600watermarked/'),
|
||||
src.replace('thumbs/', '1280watermarked/'),
|
||||
src.replace('thumbs/', '1024watermarked/'),
|
||||
src,
|
||||
];
|
||||
});
|
||||
|
||||
return photos;
|
||||
return photos;
|
||||
}
|
||||
|
||||
async function getPhotosLegacy(entryId, site, type = 'highres', page = 1) {
|
||||
const albumUrl = `${site.url}/trial/gallery.php?id=${entryId}&type=${type}&page=${page}`;
|
||||
const albumUrl = `${site.url}/trial/gallery.php?id=${entryId}&type=${type}&page=${page}`;
|
||||
|
||||
logger.warn(`Jules Jordan is using legacy photo scraper for ${albumUrl} (page ${page})`);
|
||||
logger.warn(`Jules Jordan is using legacy photo scraper for ${albumUrl} (page ${page})`);
|
||||
|
||||
const html = await fetchPhotos(albumUrl);
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const html = await fetchPhotos(albumUrl);
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
// don't add first URL to pages to prevent unnecessary duplicate request
|
||||
const photos = scrapePhotos(html, type);
|
||||
const pages = Array.from(new Set($('.page_numbers a').toArray().map(el => $(el).attr('href'))));
|
||||
// don't add first URL to pages to prevent unnecessary duplicate request
|
||||
const photos = scrapePhotos(html, type);
|
||||
const pages = Array.from(new Set($('.page_numbers a').toArray().map(el => $(el).attr('href'))));
|
||||
|
||||
const otherPhotos = pages
|
||||
? await Promise.map(pages, async (pageX) => {
|
||||
const pageUrl = `https://www.julesjordan.com/trial/${pageX}`;
|
||||
const pageHtml = await fetchPhotos(pageUrl);
|
||||
const otherPhotos = pages
|
||||
? await Promise.map(pages, async (pageX) => {
|
||||
const pageUrl = `https://www.julesjordan.com/trial/${pageX}`;
|
||||
const pageHtml = await fetchPhotos(pageUrl);
|
||||
|
||||
return scrapePhotos(pageHtml, type);
|
||||
}, {
|
||||
concurrency: 2,
|
||||
})
|
||||
: [];
|
||||
return scrapePhotos(pageHtml, type);
|
||||
}, {
|
||||
concurrency: 2,
|
||||
})
|
||||
: [];
|
||||
|
||||
const allPhotos = photos.concat(otherPhotos.flat());
|
||||
const allPhotos = photos.concat(otherPhotos.flat());
|
||||
|
||||
if (allPhotos.length === 0 && type === 'highres') {
|
||||
// photos not available, try for screencaps instead
|
||||
return getPhotosLegacy(entryId, site, 'caps', 1);
|
||||
}
|
||||
if (allPhotos.length === 0 && type === 'highres') {
|
||||
// photos not available, try for screencaps instead
|
||||
return getPhotosLegacy(entryId, site, 'caps', 1);
|
||||
}
|
||||
|
||||
return allPhotos;
|
||||
return allPhotos;
|
||||
}
|
||||
|
||||
async function getPhotos(entryId, site, type = 'highres', page = 1) {
|
||||
const albumUrl = `${site.parameters?.photos || `${site.url}/gallery.php`}?id=${entryId}&type=${type}&page=${page}`;
|
||||
const albumUrl = `${site.parameters?.photos || `${site.url}/gallery.php`}?id=${entryId}&type=${type}&page=${page}`;
|
||||
|
||||
const res = await bhttp.get(albumUrl);
|
||||
const html = res.body.toString();
|
||||
const res = await bhttp.get(albumUrl);
|
||||
const html = res.body.toString();
|
||||
|
||||
const sourceLines = html.split(/\n/).filter(line => line.match(/ptx\["\w+"\]/));
|
||||
const sources = sourceLines.reduce((acc, sourceLine) => {
|
||||
const quality = sourceLine.match(/\["\w+"\]/)[0].slice(2, -2);
|
||||
const sourceStart = sourceLine.match(/\/trial|\/tour|\/content/);
|
||||
const sourceLines = html.split(/\n/).filter(line => line.match(/ptx\["\w+"\]/));
|
||||
const sources = sourceLines.reduce((acc, sourceLine) => {
|
||||
const quality = sourceLine.match(/\["\w+"\]/)[0].slice(2, -2);
|
||||
const sourceStart = sourceLine.match(/\/trial|\/tour|\/content/);
|
||||
|
||||
if (!sourceStart) return acc;
|
||||
const source = sourceLine.slice(sourceStart.index, sourceLine.indexOf('.jpg') + 4);
|
||||
if (!sourceStart) return acc;
|
||||
const source = sourceLine.slice(sourceStart.index, sourceLine.indexOf('.jpg') + 4);
|
||||
|
||||
if (!source) return acc;
|
||||
if (!acc[quality]) acc[quality] = [];
|
||||
if (!source) return acc;
|
||||
if (!acc[quality]) acc[quality] = [];
|
||||
|
||||
acc[quality].push(`${site.url}${source}`);
|
||||
acc[quality].push(`${site.url}${source}`);
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
if (type === 'highres') {
|
||||
if (sources['1600'] && sources['1600'].length > 0) return sources['1600'];
|
||||
if (sources['1280'] && sources['1280'].length > 0) return sources['1280'];
|
||||
if (sources['1024'] && sources['1024'].length > 0) return sources['1024'];
|
||||
if (sources.Thumbs && sources.Thumbs.length > 0) return sources.Thumbs;
|
||||
if (type === 'highres') {
|
||||
if (sources['1600'] && sources['1600'].length > 0) return sources['1600'];
|
||||
if (sources['1280'] && sources['1280'].length > 0) return sources['1280'];
|
||||
if (sources['1024'] && sources['1024'].length > 0) return sources['1024'];
|
||||
if (sources.Thumbs && sources.Thumbs.length > 0) return sources.Thumbs;
|
||||
|
||||
// no photos available, try for screencaps instead
|
||||
return getPhotos(entryId, site, 'caps', 1);
|
||||
}
|
||||
// no photos available, try for screencaps instead
|
||||
return getPhotos(entryId, site, 'caps', 1);
|
||||
}
|
||||
|
||||
if (sources.jpg && sources.jpg.length > 0) return sources.jpg;
|
||||
if (sources['Video Cap Thumbs'] && sources['Video Cap Thumbs'].length > 0) return sources['Video Cap Thumbs'];
|
||||
if (sources.jpg && sources.jpg.length > 0) return sources.jpg;
|
||||
if (sources['Video Cap Thumbs'] && sources['Video Cap Thumbs'].length > 0) return sources['Video Cap Thumbs'];
|
||||
|
||||
// no screencaps available either, try legacy scraper just in case
|
||||
return getPhotosLegacy(entryId, site, 'highres', 1);
|
||||
// no screencaps available either, try legacy scraper just in case
|
||||
return getPhotosLegacy(entryId, site, 'highres', 1);
|
||||
}
|
||||
|
||||
function getEntryId(html) {
|
||||
const entryId = html.match(/showtagform\((\d+)\)/);
|
||||
const entryId = html.match(/showtagform\((\d+)\)/);
|
||||
|
||||
if (entryId) {
|
||||
return entryId[1];
|
||||
}
|
||||
if (entryId) {
|
||||
return entryId[1];
|
||||
}
|
||||
|
||||
const setIdIndex = html.indexOf('setid:"');
|
||||
const setIdIndex = html.indexOf('setid:"');
|
||||
|
||||
if (setIdIndex) {
|
||||
return html.slice(setIdIndex, html.indexOf(',', setIdIndex)).match(/\d+/)[0];
|
||||
}
|
||||
if (setIdIndex) {
|
||||
return html.slice(setIdIndex, html.indexOf(',', setIdIndex)).match(/\d+/)[0];
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site) {
|
||||
return scenes.map(({ el, qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ el, qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = el.dataset.setid || qu.q('.rating_box')?.dataset.id;
|
||||
release.entryId = el.dataset.setid || qu.q('.rating_box')?.dataset.id;
|
||||
|
||||
release.url = qu.url('.update_title, .dvd_info > a, a ~ a');
|
||||
release.title = qu.q('.update_title, .dvd_info > a, a ~ a', true);
|
||||
release.date = qu.date('.update_date', 'MM/DD/YYYY');
|
||||
release.url = qu.url('.update_title, .dvd_info > a, a ~ a');
|
||||
release.title = qu.q('.update_title, .dvd_info > a, a ~ a', true);
|
||||
release.date = qu.date('.update_date', 'MM/DD/YYYY');
|
||||
|
||||
release.actors = qu.all('.update_models a', true);
|
||||
release.actors = qu.all('.update_models a', true);
|
||||
|
||||
const dvdPhotos = qu.imgs('.dvd_preview_thumb');
|
||||
const photoCount = Number(qu.q('a img.thumbs', 'cnt')) || 1;
|
||||
const dvdPhotos = qu.imgs('.dvd_preview_thumb');
|
||||
const photoCount = Number(qu.q('a img.thumbs', 'cnt')) || 1;
|
||||
|
||||
[release.poster, ...release.photos] = dvdPhotos.length
|
||||
? dvdPhotos
|
||||
: Array.from({ length: photoCount }).map((value, index) => {
|
||||
const src = qu.img('a img.thumbs', `src${index}_1x`) || qu.img('a img.thumbs', `src${index}`) || qu.img('a img.thumbs');
|
||||
[release.poster, ...release.photos] = dvdPhotos.length
|
||||
? dvdPhotos
|
||||
: Array.from({ length: photoCount }).map((value, index) => {
|
||||
const src = qu.img('a img.thumbs', `src${index}_1x`) || qu.img('a img.thumbs', `src${index}`) || qu.img('a img.thumbs');
|
||||
|
||||
return src ? {
|
||||
src: /^http/.test(src) ? src : `${site.url}${src}`,
|
||||
referer: site.url,
|
||||
} : null;
|
||||
}).filter(Boolean);
|
||||
return src ? {
|
||||
src: /^http/.test(src) ? src : `${site.url}${src}`,
|
||||
referer: site.url,
|
||||
} : null;
|
||||
}).filter(Boolean);
|
||||
|
||||
const teaserScript = qu.html('script');
|
||||
if (teaserScript) {
|
||||
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
||||
if (src) release.teaser = { src };
|
||||
}
|
||||
const teaserScript = qu.html('script');
|
||||
if (teaserScript) {
|
||||
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
||||
if (src) release.teaser = { src };
|
||||
}
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeUpcoming(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('#coming_soon_carousel').find('.table').toArray();
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('#coming_soon_carousel').find('.table').toArray();
|
||||
|
||||
return scenesElements.map((element) => {
|
||||
const entryId = $(element).find('.upcoming_updates_thumb').attr('id').match(/\d+/)[0];
|
||||
return scenesElements.map((element) => {
|
||||
const entryId = $(element).find('.upcoming_updates_thumb').attr('id').match(/\d+/)[0];
|
||||
|
||||
const details = $(element).find('.update_details_comingsoon')
|
||||
.eq(1)
|
||||
.children()
|
||||
.remove();
|
||||
const details = $(element).find('.update_details_comingsoon')
|
||||
.eq(1)
|
||||
.children()
|
||||
.remove();
|
||||
|
||||
const title = details
|
||||
.end()
|
||||
.text()
|
||||
.trim();
|
||||
const title = details
|
||||
.end()
|
||||
.text()
|
||||
.trim();
|
||||
|
||||
const actors = details
|
||||
.text()
|
||||
.trim()
|
||||
.split(', ');
|
||||
const actors = details
|
||||
.text()
|
||||
.trim()
|
||||
.split(', ');
|
||||
|
||||
const date = moment
|
||||
.utc($(element).find('.update_date_comingsoon').text().slice(7), 'MM/DD/YYYY')
|
||||
.toDate();
|
||||
const date = moment
|
||||
.utc($(element).find('.update_date_comingsoon').text().slice(7), 'MM/DD/YYYY')
|
||||
.toDate();
|
||||
|
||||
const photoElement = $(element).find('a img.thumbs');
|
||||
const posterPath = photoElement.attr('src');
|
||||
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
|
||||
const photoElement = $(element).find('a img.thumbs');
|
||||
const posterPath = photoElement.attr('src');
|
||||
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
|
||||
|
||||
const videoClass = $(element).find('.update_thumbnail div').attr('class');
|
||||
const videoScript = $(element).find(`script:contains(${videoClass})`).html();
|
||||
const teaser = videoScript.slice(videoScript.indexOf('https://'), videoScript.indexOf('.mp4') + 4);
|
||||
const videoClass = $(element).find('.update_thumbnail div').attr('class');
|
||||
const videoScript = $(element).find(`script:contains(${videoClass})`).html();
|
||||
const teaser = videoScript.slice(videoScript.indexOf('https://'), videoScript.indexOf('.mp4') + 4);
|
||||
|
||||
return {
|
||||
url: null,
|
||||
entryId,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
poster,
|
||||
teaser: {
|
||||
src: teaser,
|
||||
},
|
||||
rating: null,
|
||||
site,
|
||||
};
|
||||
});
|
||||
return {
|
||||
url: null,
|
||||
entryId,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
poster,
|
||||
teaser: {
|
||||
src: teaser,
|
||||
},
|
||||
rating: null,
|
||||
site,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene({ html, qu }, url, site, include) {
|
||||
const release = { url, site };
|
||||
const release = { url, site };
|
||||
|
||||
release.entryId = getEntryId(html);
|
||||
release.title = qu.q('.title_bar_hilite', true);
|
||||
release.description = qu.q('.update_description', true);
|
||||
release.entryId = getEntryId(html);
|
||||
release.title = qu.q('.title_bar_hilite', true);
|
||||
release.description = qu.q('.update_description', true);
|
||||
|
||||
release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
|
||||
release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
|
||||
|
||||
release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
|
||||
release.tags = qu.all('.update_tags a', true);
|
||||
release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
|
||||
release.tags = qu.all('.update_tags a', true);
|
||||
|
||||
const posterPath = html.match(/useimage = "(.*)"/)?.[1];
|
||||
const posterPath = html.match(/useimage = "(.*)"/)?.[1];
|
||||
|
||||
if (posterPath) {
|
||||
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
|
||||
if (posterPath) {
|
||||
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
|
||||
|
||||
if (poster) {
|
||||
release.poster = {
|
||||
src: poster,
|
||||
referer: site.url,
|
||||
};
|
||||
}
|
||||
}
|
||||
if (poster) {
|
||||
release.poster = {
|
||||
src: poster,
|
||||
referer: site.url,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (include.trailer && site.slug !== 'manuelferrara') {
|
||||
const trailerLines = html.split('\n').filter(line => /movie\["trailer\w*"\]\[/i.test(line));
|
||||
if (include.trailer && site.slug !== 'manuelferrara') {
|
||||
const trailerLines = html.split('\n').filter(line => /movie\["trailer\w*"\]\[/i.test(line));
|
||||
|
||||
if (trailerLines.length) {
|
||||
release.trailer = trailerLines.map((trailerLine) => {
|
||||
const src = trailerLine.match(/path:"([\w:/.&=?%]+)"/)?.[1];
|
||||
const quality = trailerLine.match(/movie_height:'(\d+)/)?.[1];
|
||||
if (trailerLines.length) {
|
||||
release.trailer = trailerLines.map((trailerLine) => {
|
||||
const src = trailerLine.match(/path:"([\w:/.&=?%]+)"/)?.[1];
|
||||
const quality = trailerLine.match(/movie_height:'(\d+)/)?.[1];
|
||||
|
||||
return src && {
|
||||
src: /^http/.test(src) ? src : `${site.url}${src}`,
|
||||
quality: quality && Number(quality.replace('558', '540')),
|
||||
};
|
||||
}).filter(Boolean);
|
||||
}
|
||||
}
|
||||
return src && {
|
||||
src: /^http/.test(src) ? src : `${site.url}${src}`,
|
||||
quality: quality && Number(quality.replace('558', '540')),
|
||||
};
|
||||
}).filter(Boolean);
|
||||
}
|
||||
}
|
||||
|
||||
if (include.photos) release.photos = await getPhotos(release.entryId, site);
|
||||
if (include.photos) release.photos = await getPhotos(release.entryId, site);
|
||||
|
||||
if (qu.exists('.update_dvds a')) {
|
||||
release.movie = {
|
||||
url: qu.url('.update_dvds a'),
|
||||
title: qu.q('.update_dvds a', true),
|
||||
};
|
||||
}
|
||||
if (qu.exists('.update_dvds a')) {
|
||||
release.movie = {
|
||||
url: qu.url('.update_dvds a'),
|
||||
title: qu.q('.update_dvds a', true),
|
||||
};
|
||||
}
|
||||
|
||||
const stars = Number(qu.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
|
||||
if (stars) release.stars = stars;
|
||||
const stars = Number(qu.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
|
||||
if (stars) release.stars = stars;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeMovie({ el, qu }, url, site) {
|
||||
const movie = { url, site };
|
||||
const movie = { url, site };
|
||||
|
||||
movie.entryId = qu.q('.dvd_details_overview .rating_box').dataset.id;
|
||||
movie.title = qu.q('.title_bar span', true);
|
||||
movie.covers = qu.urls('#dvd-cover-flip > a');
|
||||
movie.channel = slugify(qu.q('.update_date a', true), '');
|
||||
movie.entryId = qu.q('.dvd_details_overview .rating_box').dataset.id;
|
||||
movie.title = qu.q('.title_bar span', true);
|
||||
movie.covers = qu.urls('#dvd-cover-flip > a');
|
||||
movie.channel = slugify(qu.q('.update_date a', true), '');
|
||||
|
||||
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
|
||||
const sceneQus = ctxa(el, '.dvd_details');
|
||||
const scenes = scrapeAll(sceneQus, site);
|
||||
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
|
||||
const sceneQus = ctxa(el, '.dvd_details');
|
||||
const scenes = scrapeAll(sceneQus, site);
|
||||
|
||||
const curatedScenes = scenes
|
||||
const curatedScenes = scenes
|
||||
?.map(scene => ({ ...scene, movie }))
|
||||
.sort((sceneA, sceneB) => sceneA.date - sceneB.date);
|
||||
|
||||
movie.date = curatedScenes?.[0].date;
|
||||
movie.date = curatedScenes?.[0].date;
|
||||
|
||||
return {
|
||||
...movie,
|
||||
...(curatedScenes && { scenes: curatedScenes }),
|
||||
};
|
||||
return {
|
||||
...movie,
|
||||
...(curatedScenes && { scenes: curatedScenes }),
|
||||
};
|
||||
}
|
||||
|
||||
function scrapeProfile(html, url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const bio = document.querySelector('.model_bio').textContent;
|
||||
const avatarEl = document.querySelector('.model_bio_pic img');
|
||||
const bio = document.querySelector('.model_bio').textContent;
|
||||
const avatarEl = document.querySelector('.model_bio_pic img');
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
const heightString = bio.match(/\d+ feet \d+ inches/);
|
||||
const ageString = bio.match(/Age:\s*(\d{2})/);
|
||||
const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
|
||||
const measurementsString = bio.match(/\w+-\d+-\d+/);
|
||||
const heightString = bio.match(/\d+ feet \d+ inches/);
|
||||
const ageString = bio.match(/Age:\s*(\d{2})/);
|
||||
const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
|
||||
const measurementsString = bio.match(/\w+-\d+-\d+/);
|
||||
|
||||
if (birthDateString) profile.birthdate = parseDate(birthDateString[1], 'MMMM D, YYYY');
|
||||
if (ageString) profile.age = Number(ageString[1]);
|
||||
if (birthDateString) profile.birthdate = parseDate(birthDateString[1], 'MMMM D, YYYY');
|
||||
if (ageString) profile.age = Number(ageString[1]);
|
||||
|
||||
if (heightString) profile.height = heightToCm(heightString[0]);
|
||||
if (heightString) profile.height = heightToCm(heightString[0]);
|
||||
|
||||
if (measurementsString) {
|
||||
const [bust, waist, hip] = measurementsString[0].split('-');
|
||||
if (measurementsString) {
|
||||
const [bust, waist, hip] = measurementsString[0].split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (avatarEl) {
|
||||
const avatarSources = [
|
||||
avatarEl.getAttribute('src0_3x'),
|
||||
avatarEl.getAttribute('src0_2x'),
|
||||
avatarEl.getAttribute('src0_1x'),
|
||||
avatarEl.getAttribute('src0'),
|
||||
avatarEl.getAttribute('src'),
|
||||
].filter(Boolean);
|
||||
if (avatarEl) {
|
||||
const avatarSources = [
|
||||
avatarEl.getAttribute('src0_3x'),
|
||||
avatarEl.getAttribute('src0_2x'),
|
||||
avatarEl.getAttribute('src0_1x'),
|
||||
avatarEl.getAttribute('src0'),
|
||||
avatarEl.getAttribute('src'),
|
||||
].filter(Boolean);
|
||||
|
||||
if (avatarSources.length) profile.avatar = avatarSources;
|
||||
}
|
||||
if (avatarSources.length) profile.avatar = avatarSources;
|
||||
}
|
||||
|
||||
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
|
||||
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
|
||||
|
||||
console.log(profile);
|
||||
console.log(profile);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = site.parameters?.latest
|
||||
? util.format(site.parameters.latest, page)
|
||||
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
||||
const url = site.parameters?.latest
|
||||
? util.format(site.parameters.latest, page)
|
||||
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
||||
|
||||
// const res = await bhttp.get(url);
|
||||
const res = await geta(url, '.update_details');
|
||||
// const res = await bhttp.get(url);
|
||||
const res = await geta(url, '.update_details');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
if (site.parameters?.upcoming === false) return null;
|
||||
if (site.parameters?.upcoming === false) return null;
|
||||
|
||||
const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`;
|
||||
const res = await bhttp.get(url);
|
||||
const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeUpcoming(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeUpcoming(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return res.statusCode;
|
||||
return res.statusCode;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, baseRelease, preflight, include) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, site, include) : res.status;
|
||||
return res.ok ? scrapeScene(res.item, url, site, include) : res.status;
|
||||
}
|
||||
|
||||
async function fetchMovie(url, site) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
|
||||
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlugA = slugify(actorName, '-');
|
||||
const actorSlugB = slugify(actorName, '');
|
||||
const actorSlugA = slugify(actorName, '-');
|
||||
const actorSlugB = slugify(actorName, '');
|
||||
|
||||
const urlA = `https://julesjordan.com/trial/models/${actorSlugA}.html`;
|
||||
const urlB = `https://julesjordan.com/trial/models/${actorSlugB}.html`;
|
||||
const urlA = `https://julesjordan.com/trial/models/${actorSlugA}.html`;
|
||||
const urlB = `https://julesjordan.com/trial/models/${actorSlugB}.html`;
|
||||
|
||||
const resA = await bhttp.get(urlA);
|
||||
const resA = await bhttp.get(urlA);
|
||||
|
||||
if (resA.statusCode === 200) {
|
||||
const profile = scrapeProfile(resA.body.toString(), urlA, actorName);
|
||||
if (resA.statusCode === 200) {
|
||||
const profile = scrapeProfile(resA.body.toString(), urlA, actorName);
|
||||
|
||||
return profile;
|
||||
}
|
||||
return profile;
|
||||
}
|
||||
|
||||
const resB = await bhttp.get(urlB);
|
||||
const resB = await bhttp.get(urlB);
|
||||
|
||||
if (resB.statusCode === 200) {
|
||||
const profile = scrapeProfile(resB.body.toString(), urlB, actorName);
|
||||
if (resB.statusCode === 200) {
|
||||
const profile = scrapeProfile(resB.body.toString(), urlB, actorName);
|
||||
|
||||
return profile;
|
||||
}
|
||||
return profile;
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchMovie,
|
||||
fetchProfile,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchMovie,
|
||||
fetchProfile,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -7,184 +7,184 @@ const moment = require('moment');
|
||||
const { feetInchesToCm } = require('../utils/convert');
|
||||
|
||||
const siteMapByKey = {
|
||||
PF: 'pornfidelity',
|
||||
TF: 'teenfidelity',
|
||||
KM: 'kellymadison',
|
||||
PF: 'pornfidelity',
|
||||
TF: 'teenfidelity',
|
||||
KM: 'kellymadison',
|
||||
};
|
||||
|
||||
const siteMapBySlug = Object.entries(siteMapByKey).reduce((acc, [key, value]) => ({ ...acc, [value]: key }), {});
|
||||
|
||||
function extractTextNode(parentEl) {
|
||||
return Array.from(parentEl).reduce((acc, el) => (el.nodeType === 3 ? `${acc}${el.textContent.trim()}` : acc), '');
|
||||
return Array.from(parentEl).reduce((acc, el) => (el.nodeType === 3 ? `${acc}${el.textContent.trim()}` : acc), '');
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
return Array.from(document.querySelectorAll('.episode'), (scene) => {
|
||||
const release = { site };
|
||||
return Array.from(document.querySelectorAll('.episode'), (scene) => {
|
||||
const release = { site };
|
||||
|
||||
release.shootId = scene.querySelector('.card-meta .text-right').textContent.trim();
|
||||
release.shootId = scene.querySelector('.card-meta .text-right').textContent.trim();
|
||||
|
||||
const siteId = release.shootId.match(/\w{2}/)[0];
|
||||
const siteSlug = siteMapByKey[siteId];
|
||||
const siteId = release.shootId.match(/\w{2}/)[0];
|
||||
const siteSlug = siteMapByKey[siteId];
|
||||
|
||||
if (site.slug !== siteSlug) {
|
||||
// using generic network overview, scene is not from the site we want
|
||||
return null;
|
||||
}
|
||||
if (site.slug !== siteSlug) {
|
||||
// using generic network overview, scene is not from the site we want
|
||||
return null;
|
||||
}
|
||||
|
||||
const durationEl = scene.querySelector('.content a');
|
||||
const durationEl = scene.querySelector('.content a');
|
||||
|
||||
[release.entryId] = durationEl.href.match(/\d+$/);
|
||||
release.url = `${site.url}/episodes/${release.entryId}`;
|
||||
[release.entryId] = durationEl.href.match(/\d+$/);
|
||||
release.url = `${site.url}/episodes/${release.entryId}`;
|
||||
|
||||
release.title = scene.querySelector('h5 a').textContent.trim();
|
||||
release.title = scene.querySelector('h5 a').textContent.trim();
|
||||
|
||||
const dateEl = scene.querySelector('.card-meta .text-left').childNodes;
|
||||
const dateString = extractTextNode(dateEl);
|
||||
const dateEl = scene.querySelector('.card-meta .text-left').childNodes;
|
||||
const dateString = extractTextNode(dateEl);
|
||||
|
||||
release.date = moment.utc(dateString, ['MMM D', 'MMM D, YYYY']).toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.models a'), el => el.textContent);
|
||||
release.date = moment.utc(dateString, ['MMM D', 'MMM D, YYYY']).toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.models a'), el => el.textContent);
|
||||
|
||||
const durationString = durationEl.textContent.match(/\d+ min/);
|
||||
if (durationString) release.duration = Number(durationString[0].match(/\d+/)[0]) * 60;
|
||||
const durationString = durationEl.textContent.match(/\d+ min/);
|
||||
if (durationString) release.duration = Number(durationString[0].match(/\d+/)[0]) * 60;
|
||||
|
||||
release.poster = scene.querySelector('.card-img-top').dataset.src;
|
||||
release.teaser = {
|
||||
src: scene.querySelector('video').src,
|
||||
};
|
||||
release.poster = scene.querySelector('.card-img-top').dataset.src;
|
||||
release.teaser = {
|
||||
src: scene.querySelector('video').src,
|
||||
};
|
||||
|
||||
return release;
|
||||
}).filter(scene => scene);
|
||||
return release;
|
||||
}).filter(scene => scene);
|
||||
}
|
||||
|
||||
function scrapeScene(html, url, site, baseRelease) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { url, site };
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { url, site };
|
||||
|
||||
const titleEl = document.querySelector('.card-header.row h4').childNodes;
|
||||
const titleString = extractTextNode(titleEl);
|
||||
const titleEl = document.querySelector('.card-header.row h4').childNodes;
|
||||
const titleString = extractTextNode(titleEl);
|
||||
|
||||
if (!baseRelease) [release.entryId] = url.match(/\d+/);
|
||||
if (!baseRelease) [release.entryId] = url.match(/\d+/);
|
||||
|
||||
release.title = titleString
|
||||
.replace('Trailer: ', '')
|
||||
.replace(/- \w+ #\d+$/, '')
|
||||
.trim();
|
||||
release.title = titleString
|
||||
.replace('Trailer: ', '')
|
||||
.replace(/- \w+ #\d+$/, '')
|
||||
.trim();
|
||||
|
||||
release.channel = titleString.match(/\w+ #\d+$/)[0].match(/\w+/)[0].toLowerCase();
|
||||
release.channel = titleString.match(/\w+ #\d+$/)[0].match(/\w+/)[0].toLowerCase();
|
||||
|
||||
const episode = titleString.match(/#\d+$/)[0];
|
||||
const siteKey = siteMapBySlug[release.channel];
|
||||
const episode = titleString.match(/#\d+$/)[0];
|
||||
const siteKey = siteMapBySlug[release.channel];
|
||||
|
||||
release.shootId = `${siteKey} ${episode}`;
|
||||
release.description = document.querySelector('p.card-text').textContent.trim();
|
||||
release.shootId = `${siteKey} ${episode}`;
|
||||
release.description = document.querySelector('p.card-text').textContent.trim();
|
||||
|
||||
const dateEl = document.querySelector('.card-body h4.card-title:nth-child(3)').childNodes;
|
||||
const dateString = extractTextNode(dateEl);
|
||||
const dateEl = document.querySelector('.card-body h4.card-title:nth-child(3)').childNodes;
|
||||
const dateString = extractTextNode(dateEl);
|
||||
|
||||
release.date = moment.utc(dateString, 'YYYY-MM-DD').toDate();
|
||||
release.actors = Array.from(document.querySelectorAll('.card-body h4.card-title:nth-child(4) a'), el => el.textContent);
|
||||
release.date = moment.utc(dateString, 'YYYY-MM-DD').toDate();
|
||||
release.actors = Array.from(document.querySelectorAll('.card-body h4.card-title:nth-child(4) a'), el => el.textContent);
|
||||
|
||||
const durationRaw = document.querySelector('.card-body h4.card-title:nth-child(1)').textContent;
|
||||
const durationString = durationRaw.match(/\d+:\d+/)[0];
|
||||
const durationRaw = document.querySelector('.card-body h4.card-title:nth-child(1)').textContent;
|
||||
const durationString = durationRaw.match(/\d+:\d+/)[0];
|
||||
|
||||
release.duration = moment.duration(`00:${durationString}`).asSeconds();
|
||||
release.duration = moment.duration(`00:${durationString}`).asSeconds();
|
||||
|
||||
const trailerStart = document.body.innerHTML.indexOf('player.updateSrc');
|
||||
const trailerString = document.body.innerHTML.slice(trailerStart, document.body.innerHTML.indexOf(');', trailerStart));
|
||||
const trailerStart = document.body.innerHTML.indexOf('player.updateSrc');
|
||||
const trailerString = document.body.innerHTML.slice(trailerStart, document.body.innerHTML.indexOf(');', trailerStart));
|
||||
|
||||
const trailers = trailerString.match(/https:\/\/.*.mp4/g);
|
||||
const resolutions = trailerString.match(/res: '\d+'/g).map((res) => {
|
||||
const resolution = Number(res.match(/\d+/)[0]);
|
||||
const trailers = trailerString.match(/https:\/\/.*.mp4/g);
|
||||
const resolutions = trailerString.match(/res: '\d+'/g).map((res) => {
|
||||
const resolution = Number(res.match(/\d+/)[0]);
|
||||
|
||||
return resolution === 4000 ? 2160 : resolution; // 4k is not 4000 pixels high
|
||||
});
|
||||
return resolution === 4000 ? 2160 : resolution; // 4k is not 4000 pixels high
|
||||
});
|
||||
|
||||
release.trailer = trailers.map((trailer, index) => ({
|
||||
src: trailer,
|
||||
quality: resolutions[index],
|
||||
}));
|
||||
release.trailer = trailers.map((trailer, index) => ({
|
||||
src: trailer,
|
||||
quality: resolutions[index],
|
||||
}));
|
||||
|
||||
const posterPrefix = html.indexOf('poster:');
|
||||
const poster = html.slice(html.indexOf('http', posterPrefix), html.indexOf('.jpg', posterPrefix) + 4);
|
||||
const posterPrefix = html.indexOf('poster:');
|
||||
const poster = html.slice(html.indexOf('http', posterPrefix), html.indexOf('.jpg', posterPrefix) + 4);
|
||||
|
||||
if (baseRelease?.poster) release.photos = [poster];
|
||||
else release.poster = poster;
|
||||
if (baseRelease?.poster) release.photos = [poster];
|
||||
else release.poster = poster;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile(html, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const profile = { name: actorName };
|
||||
const { document } = new JSDOM(html).window;
|
||||
const profile = { name: actorName };
|
||||
|
||||
const bioKeys = Array.from(document.querySelectorAll('table.table td:nth-child(1)'), el => el.textContent.slice(0, -1));
|
||||
const bioValues = Array.from(document.querySelectorAll('table.table td:nth-child(2)'), el => el.textContent);
|
||||
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
|
||||
const bioKeys = Array.from(document.querySelectorAll('table.table td:nth-child(1)'), el => el.textContent.slice(0, -1));
|
||||
const bioValues = Array.from(document.querySelectorAll('table.table td:nth-child(2)'), el => el.textContent);
|
||||
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
|
||||
|
||||
if (bio.Measurements) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio.Birthplace) profile.birthPlace = bio.Birthplace;
|
||||
if (bio.Measurements) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio.Birthplace) profile.birthPlace = bio.Birthplace;
|
||||
|
||||
if (bio.Height) {
|
||||
const [feet, inches] = bio.Height.match(/\d+/g);
|
||||
profile.height = feetInchesToCm(feet, inches);
|
||||
}
|
||||
if (bio.Height) {
|
||||
const [feet, inches] = bio.Height.match(/\d+/g);
|
||||
profile.height = feetInchesToCm(feet, inches);
|
||||
}
|
||||
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
|
||||
const avatarEl = Array.from(document.querySelectorAll('img')).find(photo => photo.src.match('model'));
|
||||
const avatarEl = Array.from(document.querySelectorAll('img')).find(photo => photo.src.match('model'));
|
||||
|
||||
if (avatarEl) profile.avatar = avatarEl.src;
|
||||
if (avatarEl) profile.avatar = avatarEl.src;
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `https://kellymadison.com/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
|
||||
const res = await bhttp.get(url, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
const url = `https://kellymadison.com/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
|
||||
const res = await bhttp.get(url, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.statusCode === 200 && res.body.status === 'success') {
|
||||
return scrapeLatest(res.body.html, site);
|
||||
}
|
||||
if (res.statusCode === 200 && res.body.status === 'success') {
|
||||
return scrapeLatest(res.body.html, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, baseRelease) {
|
||||
const { pathname } = new URL(url);
|
||||
const { pathname } = new URL(url);
|
||||
|
||||
const res = await bhttp.get(`https://www.kellymadison.com${pathname}`, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
const res = await bhttp.get(`https://www.kellymadison.com${pathname}`, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site, baseRelease);
|
||||
return scrapeScene(res.body.toString(), url, site, baseRelease);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
const res = await bhttp.get(`https://www.kellymadison.com/models/${actorSlug}`, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
const res = await bhttp.get(`https://www.kellymadison.com/models/${actorSlug}`, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), actorName);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), actorName);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -5,116 +5,116 @@ const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.shoot-list .shoot').toArray();
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.shoot-list .shoot').toArray();
|
||||
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.shoot-thumb-title a');
|
||||
const href = sceneLinkElement.attr('href');
|
||||
const url = `https://kink.com${href}`;
|
||||
const shootId = href.split('/')[2];
|
||||
const title = sceneLinkElement.text().trim();
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.shoot-thumb-title a');
|
||||
const href = sceneLinkElement.attr('href');
|
||||
const url = `https://kink.com${href}`;
|
||||
const shootId = href.split('/')[2];
|
||||
const title = sceneLinkElement.text().trim();
|
||||
|
||||
const poster = $(element).find('.adimage').attr('src');
|
||||
const photos = $(element).find('.rollover .roll-image').map((photoIndex, photoElement) => $(photoElement).attr('data-imagesrc')).toArray();
|
||||
const poster = $(element).find('.adimage').attr('src');
|
||||
const photos = $(element).find('.rollover .roll-image').map((photoIndex, photoElement) => $(photoElement).attr('data-imagesrc')).toArray();
|
||||
|
||||
const date = moment.utc($(element).find('.date').text(), 'MMM DD, YYYY').toDate();
|
||||
const actors = $(element).find('.shoot-thumb-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const stars = $(element).find('.average-rating').attr('data-rating') / 10;
|
||||
const date = moment.utc($(element).find('.date').text(), 'MMM DD, YYYY').toDate();
|
||||
const actors = $(element).find('.shoot-thumb-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const stars = $(element).find('.average-rating').attr('data-rating') / 10;
|
||||
|
||||
const timestamp = $(element).find('.video span').text();
|
||||
const timestampComponents = timestamp.split(':'); // fix mixed hh:mm:ss and mm:ss format
|
||||
const duration = moment.duration(timestampComponents.length > 2 ? timestamp : `0:${timestamp}`).asSeconds();
|
||||
const timestamp = $(element).find('.video span').text();
|
||||
const timestampComponents = timestamp.split(':'); // fix mixed hh:mm:ss and mm:ss format
|
||||
const duration = moment.duration(timestampComponents.length > 2 ? timestamp : `0:${timestamp}`).asSeconds();
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId: shootId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
photos,
|
||||
poster,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
duration,
|
||||
site,
|
||||
};
|
||||
});
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId: shootId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
photos,
|
||||
poster,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
duration,
|
||||
site,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, shootId, ratingRes, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
// const title = $('h1.shoot-title').text().replace(/\ue800/, ''); // fallback, special character is 'like'-heart
|
||||
const title = $('h1.shoot-title span.favorite-button').attr('data-title');
|
||||
const actorsRaw = $('.shoot-info p.starring');
|
||||
// const title = $('h1.shoot-title').text().replace(/\ue800/, ''); // fallback, special character is 'like'-heart
|
||||
const title = $('h1.shoot-title span.favorite-button').attr('data-title');
|
||||
const actorsRaw = $('.shoot-info p.starring');
|
||||
|
||||
const photos = $('.gallery .thumb img').map((photoIndex, photoElement) => $(photoElement).attr('data-image-file')).toArray();
|
||||
const trailerVideo = $('.player span[data-type="trailer-src"]').attr('data-url');
|
||||
const trailerPoster = $('.player video#kink-player').attr('poster');
|
||||
const photos = $('.gallery .thumb img').map((photoIndex, photoElement) => $(photoElement).attr('data-image-file')).toArray();
|
||||
const trailerVideo = $('.player span[data-type="trailer-src"]').attr('data-url');
|
||||
const trailerPoster = $('.player video#kink-player').attr('poster');
|
||||
|
||||
const date = moment.utc($(actorsRaw)
|
||||
.prev()
|
||||
.text()
|
||||
.trim()
|
||||
.replace('Date: ', ''),
|
||||
'MMMM DD, YYYY')
|
||||
.toDate();
|
||||
const date = moment.utc($(actorsRaw)
|
||||
.prev()
|
||||
.text()
|
||||
.trim()
|
||||
.replace('Date: ', ''),
|
||||
'MMMM DD, YYYY')
|
||||
.toDate();
|
||||
|
||||
const actors = $(actorsRaw).find('span.names a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const description = $('.shoot-info .description').text().trim();
|
||||
const actors = $(actorsRaw).find('span.names a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const description = $('.shoot-info .description').text().trim();
|
||||
|
||||
const { average: stars } = ratingRes.body;
|
||||
const { average: stars } = ratingRes.body;
|
||||
|
||||
const siteName = $('.shoot-logo a').attr('href').split('/')[2];
|
||||
const siteSlug = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
const siteName = $('.shoot-logo a').attr('href').split('/')[2];
|
||||
const siteSlug = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
|
||||
const tags = $('.tag-list > a[href*="/tag"]').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
const channel = siteSlug;
|
||||
const tags = $('.tag-list > a[href*="/tag"]').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
const channel = siteSlug;
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId: shootId,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
description,
|
||||
photos,
|
||||
poster: trailerPoster,
|
||||
trailer: {
|
||||
src: trailerVideo,
|
||||
quality: 480,
|
||||
},
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
tags,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId: shootId,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
description,
|
||||
photos,
|
||||
poster: trailerPoster,
|
||||
trailer: {
|
||||
src: trailerVideo,
|
||||
quality: 480,
|
||||
},
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
tags,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/latest/page/${page}`);
|
||||
const res = await bhttp.get(`${site.url}/latest/page/${page}`);
|
||||
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const shootId = new URL(url).pathname.split('/')[2];
|
||||
const shootId = new URL(url).pathname.split('/')[2];
|
||||
|
||||
const [res, ratingRes] = await Promise.all([
|
||||
bhttp.get(url),
|
||||
bhttp.get(`https://kink.com/api/ratings/${shootId}`),
|
||||
]);
|
||||
const [res, ratingRes] = await Promise.all([
|
||||
bhttp.get(url),
|
||||
bhttp.get(`https://kink.com/api/ratings/${shootId}`),
|
||||
]);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, shootId, ratingRes, site);
|
||||
return scrapeScene(res.body.toString(), url, shootId, ratingRes, site);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -8,201 +8,201 @@ const moment = require('moment');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function extractTitle(originalTitle) {
|
||||
const titleComponents = originalTitle.split(' ');
|
||||
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes
|
||||
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
|
||||
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
|
||||
const titleComponents = originalTitle.split(' ');
|
||||
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes
|
||||
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
|
||||
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
|
||||
|
||||
return { shootId, title };
|
||||
return { shootId, title };
|
||||
}
|
||||
|
||||
function getPoster(posterElement, sceneId) {
|
||||
const posterStyle = posterElement.attr('style');
|
||||
const posterStyle = posterElement.attr('style');
|
||||
|
||||
if (posterStyle) {
|
||||
return posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
|
||||
}
|
||||
if (posterStyle) {
|
||||
return posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
|
||||
}
|
||||
|
||||
const posterRange = posterElement.attr('data-casting');
|
||||
const posterRangeData = posterRange ? JSON.parse(posterRange) : null;
|
||||
const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)];
|
||||
const posterRange = posterElement.attr('data-casting');
|
||||
const posterRangeData = posterRange ? JSON.parse(posterRange) : null;
|
||||
const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)];
|
||||
|
||||
if (!posterTimeRange) {
|
||||
return null;
|
||||
}
|
||||
if (!posterTimeRange) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (typeof posterTimeRange === 'number') {
|
||||
// poster time is already a single time value
|
||||
return `https://legalporno.com/casting/${sceneId}/${posterTimeRange}`;
|
||||
}
|
||||
if (typeof posterTimeRange === 'number') {
|
||||
// poster time is already a single time value
|
||||
return `https://legalporno.com/casting/${sceneId}/${posterTimeRange}`;
|
||||
}
|
||||
|
||||
const [max, min] = posterTimeRange.split('-');
|
||||
const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min));
|
||||
const [max, min] = posterTimeRange.split('-');
|
||||
const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min));
|
||||
|
||||
return `https://legalporno.com/casting/${sceneId}/${posterTime}`;
|
||||
return `https://legalporno.com/casting/${sceneId}/${posterTime}`;
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('.thumbnails > div').toArray();
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('.thumbnails > div').toArray();
|
||||
|
||||
return scenesElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.thumbnail-title a');
|
||||
const url = sceneLinkElement.attr('href');
|
||||
return scenesElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.thumbnail-title a');
|
||||
const url = sceneLinkElement.attr('href');
|
||||
|
||||
const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
|
||||
const { shootId, title } = extractTitle(originalTitle);
|
||||
const entryId = new URL(url).pathname.split('/')[2];
|
||||
const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
|
||||
const { shootId, title } = extractTitle(originalTitle);
|
||||
const entryId = new URL(url).pathname.split('/')[2];
|
||||
|
||||
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
|
||||
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
|
||||
|
||||
const sceneId = $(element).attr('data-content');
|
||||
const posterElement = $(element).find('.thumbnail-avatar');
|
||||
const sceneId = $(element).attr('data-content');
|
||||
const posterElement = $(element).find('.thumbnail-avatar');
|
||||
|
||||
const poster = getPoster(posterElement, sceneId);
|
||||
const poster = getPoster(posterElement, sceneId);
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId,
|
||||
title,
|
||||
date,
|
||||
poster,
|
||||
site,
|
||||
};
|
||||
});
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId,
|
||||
title,
|
||||
date,
|
||||
poster,
|
||||
site,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site, useGallery) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const playerObject = $('script:contains("new WatchPage")').html();
|
||||
const playerData = playerObject && playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.lastIndexOf('},') + 1);
|
||||
const data = playerData && JSON.parse(playerData);
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const playerObject = $('script:contains("new WatchPage")').html();
|
||||
const playerData = playerObject && playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.lastIndexOf('},') + 1);
|
||||
const data = playerData && JSON.parse(playerData);
|
||||
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
const originalTitle = $('h1.watchpage-title').text().trim();
|
||||
const { shootId, title } = extractTitle(originalTitle);
|
||||
const originalTitle = $('h1.watchpage-title').text().trim();
|
||||
const { shootId, title } = extractTitle(originalTitle);
|
||||
|
||||
release.shootId = shootId;
|
||||
release.entryId = new URL(url).pathname.split('/')[2];
|
||||
release.shootId = shootId;
|
||||
release.entryId = new URL(url).pathname.split('/')[2];
|
||||
|
||||
release.title = title;
|
||||
release.date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
|
||||
release.title = title;
|
||||
release.date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
|
||||
|
||||
const [actorsElement, tagsElement, descriptionElement] = $('.scene-description__row').toArray();
|
||||
const [actorsElement, tagsElement, descriptionElement] = $('.scene-description__row').toArray();
|
||||
|
||||
release.description = $('meta[name="description"]')?.attr('content')?.trim()
|
||||
release.description = $('meta[name="description"]')?.attr('content')?.trim()
|
||||
|| (descriptionElement && $(descriptionElement).find('dd').text().trim());
|
||||
|
||||
release.actors = $(actorsElement)
|
||||
.find('a[href*="com/model"]')
|
||||
.map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
release.actors = $(actorsElement)
|
||||
.find('a[href*="com/model"]')
|
||||
.map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
|
||||
release.duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
|
||||
release.tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
release.duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
|
||||
release.tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
|
||||
const photos = useGallery
|
||||
? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray()
|
||||
: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray();
|
||||
const photos = useGallery
|
||||
? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray()
|
||||
: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray();
|
||||
|
||||
release.photos = photos.map((source) => {
|
||||
// source without parameters sometimes serves larger preview photo
|
||||
const { origin, pathname } = new URL(source);
|
||||
release.photos = photos.map((source) => {
|
||||
// source without parameters sometimes serves larger preview photo
|
||||
const { origin, pathname } = new URL(source);
|
||||
|
||||
return `${origin}${pathname}`;
|
||||
return `${origin}${pathname}`;
|
||||
|
||||
/* disable thumbnail as fallback, usually enough high res photos available
|
||||
/* disable thumbnail as fallback, usually enough high res photos available
|
||||
return [
|
||||
`${origin}${pathname}`,
|
||||
source,
|
||||
];
|
||||
*/
|
||||
});
|
||||
});
|
||||
|
||||
const posterStyle = $('#player').attr('style');
|
||||
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
|
||||
const posterStyle = $('#player').attr('style');
|
||||
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
|
||||
|
||||
release.poster = poster || release.photos.slice(Math.floor(release.photos.length / 3) * -1); // poster unavailable, try last 1/3rd of high res photos as fallback
|
||||
release.poster = poster || release.photos.slice(Math.floor(release.photos.length / 3) * -1); // poster unavailable, try last 1/3rd of high res photos as fallback
|
||||
|
||||
if (data) {
|
||||
const qualityMap = {
|
||||
web: 240,
|
||||
vga: 480,
|
||||
hd: 720,
|
||||
'1080p': 1080,
|
||||
};
|
||||
if (data) {
|
||||
const qualityMap = {
|
||||
web: 240,
|
||||
vga: 480,
|
||||
hd: 720,
|
||||
'1080p': 1080,
|
||||
};
|
||||
|
||||
release.trailer = data.clip.qualities.map(trailer => ({
|
||||
src: trailer.src,
|
||||
type: trailer.type,
|
||||
quality: qualityMap[trailer.quality] || trailer.quality,
|
||||
}));
|
||||
}
|
||||
release.trailer = data.clip.qualities.map(trailer => ({
|
||||
src: trailer.src,
|
||||
type: trailer.type,
|
||||
quality: qualityMap[trailer.quality] || trailer.quality,
|
||||
}));
|
||||
}
|
||||
|
||||
const studioName = $('.watchpage-studioname').first().text().trim();
|
||||
release.studio = slugify(studioName, '');
|
||||
const studioName = $('.watchpage-studioname').first().text().trim();
|
||||
release.studio = slugify(studioName, '');
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function scrapeProfile(html, _url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
const avatarEl = document.querySelector('.model--avatar img[src^="http"]');
|
||||
const entries = Array.from(document.querySelectorAll('.model--description tr'), el => el.textContent.replace(/\n/g, '').split(':'));
|
||||
const avatarEl = document.querySelector('.model--avatar img[src^="http"]');
|
||||
const entries = Array.from(document.querySelectorAll('.model--description tr'), el => el.textContent.replace(/\n/g, '').split(':'));
|
||||
|
||||
const bio = entries
|
||||
.filter(entry => entry.length === 2) // ignore entries without ':' (About section, see Blanche Bradburry)
|
||||
.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {});
|
||||
const bio = entries
|
||||
.filter(entry => entry.length === 2) // ignore entries without ':' (About section, see Blanche Bradburry)
|
||||
.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {});
|
||||
|
||||
profile.birthPlace = bio.Nationality;
|
||||
profile.birthPlace = bio.Nationality;
|
||||
|
||||
if (bio.Age) profile.age = bio.Age;
|
||||
if (avatarEl) profile.avatar = avatarEl.src;
|
||||
if (bio.Age) profile.age = bio.Age;
|
||||
if (avatarEl) profile.avatar = avatarEl.src;
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/new-videos/${page}`);
|
||||
const res = await bhttp.get(`${site.url}/new-videos/${page}`);
|
||||
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const useGallery = true;
|
||||
const useGallery = true;
|
||||
|
||||
// TODO: fall back on screenshots when gallery is not available
|
||||
const res = useGallery
|
||||
? await bhttp.get(`${url}/gallery#gallery`)
|
||||
: await bhttp.get(`${url}/screenshots#screenshots`);
|
||||
// TODO: fall back on screenshots when gallery is not available
|
||||
const res = useGallery
|
||||
? await bhttp.get(`${url}/gallery#gallery`)
|
||||
: await bhttp.get(`${url}/screenshots#screenshots`);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site, useGallery);
|
||||
return scrapeScene(res.body.toString(), url, site, useGallery);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const res = await bhttp.get(`https://www.legalporno.com/api/autocomplete/search?q=${actorName.replace(' ', '+')}`);
|
||||
const data = res.body;
|
||||
const res = await bhttp.get(`https://www.legalporno.com/api/autocomplete/search?q=${actorName.replace(' ', '+')}`);
|
||||
const data = res.body;
|
||||
|
||||
const result = data.terms.find(item => item.type === 'model');
|
||||
const result = data.terms.find(item => item.type === 'model');
|
||||
|
||||
if (result) {
|
||||
const bioRes = await bhttp.get(result.url);
|
||||
const html = bioRes.body.toString();
|
||||
if (result) {
|
||||
const bioRes = await bhttp.get(result.url);
|
||||
const html = bioRes.body.toString();
|
||||
|
||||
return scrapeProfile(html, result.url, actorName);
|
||||
}
|
||||
return scrapeProfile(html, result.url, actorName);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'men', 'modelprofile');
|
||||
return fetchProfile(actorName, 'men', 'modelprofile');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile: networkFetchProfile,
|
||||
};
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'devianthardcore');
|
||||
return fetchProfile(actorName, 'devianthardcore');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -8,232 +8,232 @@ const moment = require('moment');
|
||||
const { get } = require('../utils/http');
|
||||
|
||||
const descriptionTags = {
|
||||
'anal cream pie': 'anal creampie',
|
||||
'ass to mouth': 'ass to mouth',
|
||||
'cream pie in her ass': 'anal creampie',
|
||||
'eats ass': 'ass eating',
|
||||
facial: 'facial',
|
||||
gaped: 'gaping',
|
||||
gapes: 'gaping',
|
||||
gape: 'gaping',
|
||||
'rectal cream pie': 'anal creampie',
|
||||
rimming: 'ass eating',
|
||||
'anal cream pie': 'anal creampie',
|
||||
'ass to mouth': 'ass to mouth',
|
||||
'cream pie in her ass': 'anal creampie',
|
||||
'eats ass': 'ass eating',
|
||||
facial: 'facial',
|
||||
gaped: 'gaping',
|
||||
gapes: 'gaping',
|
||||
gape: 'gaping',
|
||||
'rectal cream pie': 'anal creampie',
|
||||
rimming: 'ass eating',
|
||||
};
|
||||
|
||||
function deriveTagsFromDescription(description) {
|
||||
const matches = (description || '').toLowerCase().match(new RegExp(Object.keys(descriptionTags).join('|'), 'g'));
|
||||
const matches = (description || '').toLowerCase().match(new RegExp(Object.keys(descriptionTags).join('|'), 'g'));
|
||||
|
||||
return matches
|
||||
? matches.map(match => descriptionTags[match])
|
||||
: [];
|
||||
return matches
|
||||
? matches.map(match => descriptionTags[match])
|
||||
: [];
|
||||
}
|
||||
|
||||
async function scrapeLatestA(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const sceneElements = document.querySelectorAll('.content-item-large, .content-item');
|
||||
const { document } = new JSDOM(html).window;
|
||||
const sceneElements = document.querySelectorAll('.content-item-large, .content-item');
|
||||
|
||||
return Promise.all(Array.from(sceneElements, async (element) => {
|
||||
const $ = cheerio.load(element.innerHTML, { normalizeWhitespace: true });
|
||||
return Promise.all(Array.from(sceneElements, async (element) => {
|
||||
const $ = cheerio.load(element.innerHTML, { normalizeWhitespace: true });
|
||||
|
||||
const titleElement = element.querySelector('h3.title a');
|
||||
const title = titleElement.textContent;
|
||||
const url = titleElement.href;
|
||||
const entryId = url.split('/').slice(-2)[0];
|
||||
const titleElement = element.querySelector('h3.title a');
|
||||
const title = titleElement.textContent;
|
||||
const url = titleElement.href;
|
||||
const entryId = url.split('/').slice(-2)[0];
|
||||
|
||||
const descriptionElement = element.querySelector('.desc');
|
||||
const description = descriptionElement && descriptionElement.textContent.trim();
|
||||
const date = moment(element.querySelector('.date, time').textContent, 'Do MMM YYYY').toDate();
|
||||
const descriptionElement = element.querySelector('.desc');
|
||||
const description = descriptionElement && descriptionElement.textContent.trim();
|
||||
const date = moment(element.querySelector('.date, time').textContent, 'Do MMM YYYY').toDate();
|
||||
|
||||
const actors = Array.from(element.querySelectorAll('h4.models a'), actorElement => actorElement.textContent);
|
||||
const actors = Array.from(element.querySelectorAll('h4.models a'), actorElement => actorElement.textContent);
|
||||
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is sometimes 00:00, sometimes 0:00:00
|
||||
const duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is sometimes 00:00, sometimes 0:00:00
|
||||
const duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
|
||||
const ratingElement = element.querySelector('.rating');
|
||||
const stars = ratingElement && ratingElement.dataset.rating;
|
||||
const ratingElement = element.querySelector('.rating');
|
||||
const stars = ratingElement && ratingElement.dataset.rating;
|
||||
|
||||
const [poster, ...primaryPhotos] = Array.from(element.querySelectorAll('img'), imageElement => imageElement.src);
|
||||
const secondaryPhotos = $('.thumb-top, .thumb-bottom')
|
||||
.map((photoIndex, photoElement) => $(photoElement).css()['background-image'])
|
||||
.toArray()
|
||||
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
|
||||
const [poster, ...primaryPhotos] = Array.from(element.querySelectorAll('img'), imageElement => imageElement.src);
|
||||
const secondaryPhotos = $('.thumb-top, .thumb-bottom')
|
||||
.map((photoIndex, photoElement) => $(photoElement).css()['background-image'])
|
||||
.toArray()
|
||||
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
|
||||
|
||||
const photos = [...primaryPhotos, ...secondaryPhotos];
|
||||
const tags = deriveTagsFromDescription(description);
|
||||
const photos = [...primaryPhotos, ...secondaryPhotos];
|
||||
const tags = deriveTagsFromDescription(description);
|
||||
|
||||
const scene = {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
director: 'Mike Adriano',
|
||||
date,
|
||||
duration,
|
||||
tags,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
};
|
||||
const scene = {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
director: 'Mike Adriano',
|
||||
date,
|
||||
duration,
|
||||
tags,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
};
|
||||
|
||||
return scene;
|
||||
}));
|
||||
return scene;
|
||||
}));
|
||||
}
|
||||
|
||||
async function scrapeLatestB(html) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const sceneElements = document.querySelectorAll('.content-border');
|
||||
const { document } = new JSDOM(html).window;
|
||||
const sceneElements = document.querySelectorAll('.content-border');
|
||||
|
||||
return Promise.all(Array.from(sceneElements, async (element) => {
|
||||
const $ = cheerio.load(element.innerHTML, { normalizeWhitespace: true });
|
||||
const release = {
|
||||
director: 'Mike Adriano',
|
||||
};
|
||||
return Promise.all(Array.from(sceneElements, async (element) => {
|
||||
const $ = cheerio.load(element.innerHTML, { normalizeWhitespace: true });
|
||||
const release = {
|
||||
director: 'Mike Adriano',
|
||||
};
|
||||
|
||||
const titleElement = element.querySelector('.content-title-wrap a');
|
||||
release.title = titleElement.title || titleElement.textContent.trim();
|
||||
release.url = titleElement.href;
|
||||
release.entryId = release.url.split('/').slice(-2)[0];
|
||||
const titleElement = element.querySelector('.content-title-wrap a');
|
||||
release.title = titleElement.title || titleElement.textContent.trim();
|
||||
release.url = titleElement.href;
|
||||
release.entryId = release.url.split('/').slice(-2)[0];
|
||||
|
||||
release.description = element.querySelector('.content-description').textContent.trim();
|
||||
release.date = (moment(element.querySelector('.mobile-date').textContent, 'MM/DD/YYYY')
|
||||
release.description = element.querySelector('.content-description').textContent.trim();
|
||||
release.date = (moment(element.querySelector('.mobile-date').textContent, 'MM/DD/YYYY')
|
||||
|| moment(element.querySelector('.date').textContent, 'Do MMM YYYY')).toDate();
|
||||
release.actors = Array.from(element.querySelectorAll('.content-models a'), actorElement => actorElement.textContent);
|
||||
release.actors = Array.from(element.querySelectorAll('.content-models a'), actorElement => actorElement.textContent);
|
||||
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is somethines 00:00, sometimes 0:00:00
|
||||
release.duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is somethines 00:00, sometimes 0:00:00
|
||||
release.duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
|
||||
const [poster, ...primaryPhotos] = Array.from(element.querySelectorAll('a img'), imageElement => imageElement.src);
|
||||
const secondaryPhotos = $('.thumb-mouseover')
|
||||
.map((photoIndex, photoElement) => $(photoElement).css()['background-image'])
|
||||
.toArray()
|
||||
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
|
||||
const [poster, ...primaryPhotos] = Array.from(element.querySelectorAll('a img'), imageElement => imageElement.src);
|
||||
const secondaryPhotos = $('.thumb-mouseover')
|
||||
.map((photoIndex, photoElement) => $(photoElement).css()['background-image'])
|
||||
.toArray()
|
||||
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
|
||||
|
||||
release.poster = poster;
|
||||
release.photos = [...primaryPhotos, ...secondaryPhotos];
|
||||
release.poster = poster;
|
||||
release.photos = [...primaryPhotos, ...secondaryPhotos];
|
||||
|
||||
release.tags = deriveTagsFromDescription(release.description);
|
||||
return release;
|
||||
}));
|
||||
release.tags = deriveTagsFromDescription(release.description);
|
||||
return release;
|
||||
}));
|
||||
}
|
||||
|
||||
async function scrapeSceneA(html, url) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const element = document.querySelector('.content-page-info');
|
||||
const release = {
|
||||
url,
|
||||
director: 'Mike Adriano',
|
||||
};
|
||||
const { document } = new JSDOM(html).window;
|
||||
const element = document.querySelector('.content-page-info');
|
||||
const release = {
|
||||
url,
|
||||
director: 'Mike Adriano',
|
||||
};
|
||||
|
||||
release.entryId = url.split('/').slice(-2)[0];
|
||||
release.title = element.querySelector('.title').textContent.trim();
|
||||
release.description = element.querySelector('.desc').textContent.trim();
|
||||
release.date = moment(element.querySelector('.post-date').textContent.trim(), 'Do MMM YYYY').toDate();
|
||||
release.entryId = url.split('/').slice(-2)[0];
|
||||
release.title = element.querySelector('.title').textContent.trim();
|
||||
release.description = element.querySelector('.desc').textContent.trim();
|
||||
release.date = moment(element.querySelector('.post-date').textContent.trim(), 'Do MMM YYYY').toDate();
|
||||
|
||||
release.actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
|
||||
release.actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
|
||||
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is sometimes 00:00, sometimes 0:00:00
|
||||
release.duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is sometimes 00:00, sometimes 0:00:00
|
||||
release.duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
|
||||
const { poster } = document.querySelector('.content-page-header video');
|
||||
const { src, type } = document.querySelector('.content-page-header source');
|
||||
const { poster } = document.querySelector('.content-page-header video');
|
||||
const { src, type } = document.querySelector('.content-page-header source');
|
||||
|
||||
release.poster = poster;
|
||||
release.trailer = { src, type };
|
||||
release.poster = poster;
|
||||
release.trailer = { src, type };
|
||||
|
||||
release.tags = deriveTagsFromDescription(release.description);
|
||||
release.tags = deriveTagsFromDescription(release.description);
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function scrapeSceneB(html, url, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const element = document.querySelector('.content-page-info');
|
||||
const { document } = new JSDOM(html).window;
|
||||
const element = document.querySelector('.content-page-info');
|
||||
|
||||
const entryId = url.split('/').slice(-2)[0];
|
||||
const title = element.querySelector('.title').textContent.trim();
|
||||
const description = element.querySelector('.desc').textContent.trim();
|
||||
const date = moment(element.querySelector('.date').textContent.trim(), 'Do MMM YYYY').toDate();
|
||||
const entryId = url.split('/').slice(-2)[0];
|
||||
const title = element.querySelector('.title').textContent.trim();
|
||||
const description = element.querySelector('.desc').textContent.trim();
|
||||
const date = moment(element.querySelector('.date').textContent.trim(), 'Do MMM YYYY').toDate();
|
||||
|
||||
const actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
|
||||
const actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
|
||||
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is somethines 00:00, sometimes 0:00:00
|
||||
const duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is somethines 00:00, sometimes 0:00:00
|
||||
const duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
|
||||
const { poster } = document.querySelector('.content-page-header-inner video');
|
||||
const { src, type } = document.querySelector('.content-page-header-inner source');
|
||||
const { poster } = document.querySelector('.content-page-header-inner video');
|
||||
const { src, type } = document.querySelector('.content-page-header-inner source');
|
||||
|
||||
const tags = deriveTagsFromDescription(description);
|
||||
const tags = deriveTagsFromDescription(description);
|
||||
|
||||
const scene = {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
director: 'Mike Adriano',
|
||||
date,
|
||||
duration,
|
||||
tags,
|
||||
poster,
|
||||
trailer: {
|
||||
src,
|
||||
type,
|
||||
},
|
||||
site,
|
||||
};
|
||||
const scene = {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
director: 'Mike Adriano',
|
||||
date,
|
||||
duration,
|
||||
tags,
|
||||
poster,
|
||||
trailer: {
|
||||
src,
|
||||
type,
|
||||
},
|
||||
site,
|
||||
};
|
||||
|
||||
return scene;
|
||||
return scene;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const { host } = new URL(site.url);
|
||||
const url = `https://tour.${host}/videos?page=${page}`;
|
||||
const { host } = new URL(site.url);
|
||||
const url = `https://tour.${host}/videos?page=${page}`;
|
||||
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
if (host === 'trueanal.com' || host === 'swallowed.com') {
|
||||
return scrapeLatestA(res.html, site);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
if (host === 'trueanal.com' || host === 'swallowed.com') {
|
||||
return scrapeLatestA(res.html, site);
|
||||
}
|
||||
|
||||
return scrapeLatestB(res.html, site);
|
||||
}
|
||||
return scrapeLatestB(res.html, site);
|
||||
}
|
||||
|
||||
return res.code;
|
||||
return res.code;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const { host } = new URL(site.url);
|
||||
const res = await get(url);
|
||||
const { host } = new URL(site.url);
|
||||
const res = await get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
if (host === 'trueanal.com' || host === 'swallowed.com') {
|
||||
return scrapeSceneA(res.body.toString(), url, site);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
if (host === 'trueanal.com' || host === 'swallowed.com') {
|
||||
return scrapeSceneA(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
return scrapeSceneB(res.body.toString(), url, site);
|
||||
}
|
||||
return scrapeSceneB(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
return res.code;
|
||||
return res.code;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'milehighmedia');
|
||||
return fetchProfile(actorName, 'milehighmedia');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -13,257 +13,257 @@ const { inchesToCm, lbsToKg } = require('../utils/convert');
|
||||
const { cookieToData } = require('../utils/cookies');
|
||||
|
||||
function getThumbs(scene) {
|
||||
if (scene.images.poster) {
|
||||
return scene.images.poster.map(image => image.xl.url);
|
||||
}
|
||||
if (scene.images.poster) {
|
||||
return scene.images.poster.map(image => image.xl.url);
|
||||
}
|
||||
|
||||
if (scene.images.card_main_rect) {
|
||||
return scene.images.card_main_rect
|
||||
.concat(scene.images.card_secondary_rect || [])
|
||||
.map(image => image.xl.url.replace('.thumb', ''));
|
||||
}
|
||||
if (scene.images.card_main_rect) {
|
||||
return scene.images.card_main_rect
|
||||
.concat(scene.images.card_secondary_rect || [])
|
||||
.map(image => image.xl.url.replace('.thumb', ''));
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
function scrapeLatestX(data, site) {
|
||||
if (site.parameters?.extract === true && data.collections.length > 0) {
|
||||
// release should not belong to any channel
|
||||
return null;
|
||||
}
|
||||
if (site.parameters?.extract === true && data.collections.length > 0) {
|
||||
// release should not belong to any channel
|
||||
return null;
|
||||
}
|
||||
|
||||
if (typeof site.parameters?.extract === 'string' && !data.collections.some(collection => collection.shortName === site.parameters.extract)) {
|
||||
// release should belong to specific channel
|
||||
return null;
|
||||
}
|
||||
if (typeof site.parameters?.extract === 'string' && !data.collections.some(collection => collection.shortName === site.parameters.extract)) {
|
||||
// release should belong to specific channel
|
||||
return null;
|
||||
}
|
||||
|
||||
const release = {
|
||||
entryId: data.id,
|
||||
title: data.title,
|
||||
description: data.description,
|
||||
};
|
||||
const release = {
|
||||
entryId: data.id,
|
||||
title: data.title,
|
||||
description: data.description,
|
||||
};
|
||||
|
||||
const hostname = site.parameters?.native ? site.url : site.network.url;
|
||||
const hostname = site.parameters?.native ? site.url : site.network.url;
|
||||
|
||||
release.url = `${hostname}/scene/${release.entryId}/`;
|
||||
release.date = new Date(data.dateReleased);
|
||||
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
|
||||
release.url = `${hostname}/scene/${release.entryId}/`;
|
||||
release.date = new Date(data.dateReleased);
|
||||
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
|
||||
|
||||
release.tags = data.tags.map(tag => tag.name);
|
||||
release.tags = data.tags.map(tag => tag.name);
|
||||
|
||||
release.duration = data.videos.mediabook?.length;
|
||||
[release.poster, ...release.photos] = getThumbs(data);
|
||||
release.duration = data.videos.mediabook?.length;
|
||||
[release.poster, ...release.photos] = getThumbs(data);
|
||||
|
||||
const teaserSources = data.videos.mediabook?.files;
|
||||
const teaserSources = data.videos.mediabook?.files;
|
||||
|
||||
if (teaserSources) {
|
||||
release.teaser = Object.values(teaserSources).map(teaser => ({
|
||||
src: teaser.urls.view,
|
||||
quality: parseInt(teaser.format, 10),
|
||||
}));
|
||||
}
|
||||
if (teaserSources) {
|
||||
release.teaser = Object.values(teaserSources).map(teaser => ({
|
||||
src: teaser.urls.view,
|
||||
quality: parseInt(teaser.format, 10),
|
||||
}));
|
||||
}
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function scrapeLatest(items, site) {
|
||||
const latestReleases = await Promise.all(items.map(async data => scrapeLatestX(data, site)));
|
||||
const latestReleases = await Promise.all(items.map(async data => scrapeLatestX(data, site)));
|
||||
|
||||
return latestReleases.filter(Boolean);
|
||||
return latestReleases.filter(Boolean);
|
||||
}
|
||||
|
||||
function scrapeScene(data, url, _site, networkName) {
|
||||
const release = {};
|
||||
const release = {};
|
||||
|
||||
const { id: entryId, title, description } = data;
|
||||
const { id: entryId, title, description } = data;
|
||||
|
||||
release.entryId = data.id;
|
||||
release.title = title;
|
||||
release.description = description;
|
||||
release.entryId = data.id;
|
||||
release.title = title;
|
||||
release.description = description;
|
||||
|
||||
release.date = new Date(data.dateReleased);
|
||||
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
|
||||
release.date = new Date(data.dateReleased);
|
||||
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
|
||||
|
||||
release.tags = data.tags.map(tag => tag.name);
|
||||
release.tags = data.tags.map(tag => tag.name);
|
||||
|
||||
[release.poster, ...release.photos] = getThumbs(data);
|
||||
[release.poster, ...release.photos] = getThumbs(data);
|
||||
|
||||
const teaserSources = data.videos.mediabook?.files;
|
||||
const teaserSources = data.videos.mediabook?.files;
|
||||
|
||||
if (teaserSources) {
|
||||
release.teaser = Object.values(teaserSources).map(teaser => ({
|
||||
src: teaser.urls.view,
|
||||
quality: parseInt(teaser.format, 10),
|
||||
}));
|
||||
}
|
||||
if (teaserSources) {
|
||||
release.teaser = Object.values(teaserSources).map(teaser => ({
|
||||
src: teaser.urls.view,
|
||||
quality: parseInt(teaser.format, 10),
|
||||
}));
|
||||
}
|
||||
|
||||
const siteName = data.collections[0]?.name || data.brand;
|
||||
release.channel = slugify(siteName, '');
|
||||
const siteName = data.collections[0]?.name || data.brand;
|
||||
release.channel = slugify(siteName, '');
|
||||
|
||||
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
|
||||
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function getUrl(site) {
|
||||
const { search } = new URL(site.url);
|
||||
const { search } = new URL(site.url);
|
||||
|
||||
if (search.match(/\?site=\d+/)) {
|
||||
return site.url;
|
||||
}
|
||||
if (search.match(/\?site=\d+/)) {
|
||||
return site.url;
|
||||
}
|
||||
|
||||
if (site.parameters?.native) {
|
||||
return `${site.url}/scenes`;
|
||||
}
|
||||
if (site.parameters?.native) {
|
||||
return `${site.url}/scenes`;
|
||||
}
|
||||
|
||||
if (site.parameters?.extract) {
|
||||
return `${site.url}/scenes`;
|
||||
}
|
||||
if (site.parameters?.extract) {
|
||||
return `${site.url}/scenes`;
|
||||
}
|
||||
|
||||
if (site.parameters?.siteId) {
|
||||
return `${site.network.url}/scenes?site=${site.parameters.siteId}`;
|
||||
}
|
||||
if (site.parameters?.siteId) {
|
||||
return `${site.network.url}/scenes?site=${site.parameters.siteId}`;
|
||||
}
|
||||
|
||||
throw new Error(`Mind Geek site '${site.name}' (${site.url}) not supported`);
|
||||
throw new Error(`Mind Geek site '${site.name}' (${site.url}) not supported`);
|
||||
}
|
||||
|
||||
async function getSession(url) {
|
||||
const cookieJar = new CookieJar();
|
||||
const session = bhttp.session({ cookieJar });
|
||||
const cookieJar = new CookieJar();
|
||||
const session = bhttp.session({ cookieJar });
|
||||
|
||||
await session.get(url);
|
||||
await session.get(url);
|
||||
|
||||
const cookieString = await cookieJar.getCookieStringAsync(url);
|
||||
const { instance_token: instanceToken } = cookieToData(cookieString);
|
||||
const cookieString = await cookieJar.getCookieStringAsync(url);
|
||||
const { instance_token: instanceToken } = cookieToData(cookieString);
|
||||
|
||||
return { session, instanceToken };
|
||||
return { session, instanceToken };
|
||||
}
|
||||
|
||||
function scrapeProfile(data, html, releases = [], networkName) {
|
||||
const { qa, qd } = ex(html);
|
||||
const { qa, qd } = ex(html);
|
||||
|
||||
const profile = {
|
||||
description: data.bio,
|
||||
aliases: data.aliases,
|
||||
};
|
||||
const profile = {
|
||||
description: data.bio,
|
||||
aliases: data.aliases,
|
||||
};
|
||||
|
||||
const [bust, waist, hip] = data.measurements.split('-');
|
||||
const [bust, waist, hip] = data.measurements.split('-');
|
||||
|
||||
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
|
||||
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
|
||||
|
||||
if (profile.gender === 'female') {
|
||||
if (bust) profile.bust = bust.toUpperCase();
|
||||
if (waist) profile.waist = waist;
|
||||
if (hip) profile.hip = hip;
|
||||
}
|
||||
if (profile.gender === 'female') {
|
||||
if (bust) profile.bust = bust.toUpperCase();
|
||||
if (waist) profile.waist = waist;
|
||||
if (hip) profile.hip = hip;
|
||||
}
|
||||
|
||||
if (data.birthPlace) profile.birthPlace = data.birthPlace;
|
||||
if (data.height) profile.height = inchesToCm(data.height);
|
||||
if (data.weight) profile.weight = lbsToKg(data.weight);
|
||||
if (data.birthPlace) profile.birthPlace = data.birthPlace;
|
||||
if (data.height) profile.height = inchesToCm(data.height);
|
||||
if (data.weight) profile.weight = lbsToKg(data.weight);
|
||||
|
||||
if (data.images.card_main_rect?.[0]) {
|
||||
profile.avatar = data.images.card_main_rect[0].xl?.url
|
||||
if (data.images.card_main_rect?.[0]) {
|
||||
profile.avatar = data.images.card_main_rect[0].xl?.url
|
||||
|| data.images.card_main_rect[0].lg?.url
|
||||
|| data.images.card_main_rect[0].md?.url
|
||||
|| data.images.card_main_rect[0].sm?.url
|
||||
|| data.images.card_main_rect[0].xs?.url;
|
||||
}
|
||||
}
|
||||
|
||||
const birthdate = qa('li').find(el => /Date of Birth/.test(el.textContent));
|
||||
if (birthdate) profile.birthdate = qd(birthdate, 'span', 'MMMM Do, YYYY');
|
||||
const birthdate = qa('li').find(el => /Date of Birth/.test(el.textContent));
|
||||
if (birthdate) profile.birthdate = qd(birthdate, 'span', 'MMMM Do, YYYY');
|
||||
|
||||
profile.releases = releases.map(release => scrapeScene(release, null, null, networkName));
|
||||
profile.releases = releases.map(release => scrapeScene(release, null, null, networkName));
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = getUrl(site);
|
||||
const { search } = new URL(url);
|
||||
const siteId = new URLSearchParams(search).get('site');
|
||||
const url = getUrl(site);
|
||||
const { search } = new URL(url);
|
||||
const siteId = new URLSearchParams(search).get('site');
|
||||
|
||||
const { session, instanceToken } = await getSession(url);
|
||||
const { session, instanceToken } = await getSession(url);
|
||||
|
||||
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
|
||||
const limit = 10;
|
||||
const apiUrl = site.parameters?.native || site.parameters?.extract
|
||||
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
|
||||
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
|
||||
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
|
||||
const limit = 10;
|
||||
const apiUrl = site.parameters?.native || site.parameters?.extract
|
||||
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
|
||||
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
|
||||
|
||||
const res = await session.get(apiUrl, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
Origin: site.url,
|
||||
Referer: url,
|
||||
},
|
||||
});
|
||||
const res = await session.get(apiUrl, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
Origin: site.url,
|
||||
Referer: url,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.statusCode === 200 && res.body.result) {
|
||||
return scrapeLatest(res.body.result, site);
|
||||
}
|
||||
if (res.statusCode === 200 && res.body.result) {
|
||||
return scrapeLatest(res.body.result, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const entryId = url.match(/\d+/)[0];
|
||||
const { session, instanceToken } = await getSession(url);
|
||||
const entryId = url.match(/\d+/)[0];
|
||||
const { session, instanceToken } = await getSession(url);
|
||||
|
||||
const res = await session.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
},
|
||||
});
|
||||
const res = await session.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.statusCode === 200 && res.body.result) {
|
||||
return scrapeScene(res.body.result, url, site);
|
||||
}
|
||||
if (res.statusCode === 200 && res.body.result) {
|
||||
return scrapeScene(res.body.result, url, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, networkName, actorPath = 'model') {
|
||||
const url = `https://www.${networkName}.com`;
|
||||
const { session, instanceToken } = await getSession(url);
|
||||
const url = `https://www.${networkName}.com`;
|
||||
const { session, instanceToken } = await getSession(url);
|
||||
|
||||
const res = await session.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
},
|
||||
});
|
||||
const res = await session.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const actorData = res.body.result.find(actor => actor.name.toLowerCase() === actorName.toLowerCase());
|
||||
if (res.statusCode === 200) {
|
||||
const actorData = res.body.result.find(actor => actor.name.toLowerCase() === actorName.toLowerCase());
|
||||
|
||||
if (actorData) {
|
||||
const actorUrl = `https://www.${networkName}.com/${actorPath}/${actorData.id}/`;
|
||||
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
|
||||
if (actorData) {
|
||||
const actorUrl = `https://www.${networkName}.com/${actorPath}/${actorData.id}/`;
|
||||
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
|
||||
|
||||
const [actorRes, actorReleasesRes] = await Promise.all([
|
||||
bhttp.get(actorUrl),
|
||||
session.get(actorReleasesUrl, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
},
|
||||
}),
|
||||
]);
|
||||
const [actorRes, actorReleasesRes] = await Promise.all([
|
||||
bhttp.get(actorUrl),
|
||||
session.get(actorReleasesUrl, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
},
|
||||
}),
|
||||
]);
|
||||
|
||||
if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
|
||||
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkName);
|
||||
}
|
||||
if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
|
||||
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkName);
|
||||
}
|
||||
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorData, actorRes.body.toString(), null, networkName);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorData, actorRes.body.toString(), null, networkName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
scrapeLatestX,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
scrapeLatestX,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'mofos');
|
||||
return fetchProfile(actorName, 'mofos');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile: networkFetchProfile,
|
||||
};
|
||||
|
||||
@@ -9,149 +9,149 @@ const slugify = require('../utils/slugify');
|
||||
const { ex, get } = require('../utils/q');
|
||||
|
||||
function titleExtractor(pathname) {
|
||||
const components = pathname.split('/')[2].split('-');
|
||||
const entryId = components.slice(-1)[0];
|
||||
const components = pathname.split('/')[2].split('-');
|
||||
const entryId = components.slice(-1)[0];
|
||||
|
||||
const title = components.slice(0, -1).reduce((accTitle, word, index) => `${accTitle}${index > 0 ? ' ' : ''}${word.slice(0, 1).toUpperCase()}${word.slice(1)}`, '');
|
||||
const title = components.slice(0, -1).reduce((accTitle, word, index) => `${accTitle}${index > 0 ? ' ' : ''}${word.slice(0, 1).toUpperCase()}${word.slice(1)}`, '');
|
||||
|
||||
return { title, entryId };
|
||||
return { title, entryId };
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.site-list .scene-item').toArray();
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.site-list .scene-item').toArray();
|
||||
|
||||
return sceneElements.map((item) => {
|
||||
const element = $(item);
|
||||
return sceneElements.map((item) => {
|
||||
const element = $(item);
|
||||
|
||||
const sceneLinkElement = element.find('a').first();
|
||||
const { protocol, hostname, pathname } = new URL(sceneLinkElement.attr('href'));
|
||||
const url = `${protocol}//${hostname}${pathname}`;
|
||||
const { title, entryId } = titleExtractor(pathname);
|
||||
const sceneLinkElement = element.find('a').first();
|
||||
const { protocol, hostname, pathname } = new URL(sceneLinkElement.attr('href'));
|
||||
const url = `${protocol}//${hostname}${pathname}`;
|
||||
const { title, entryId } = titleExtractor(pathname);
|
||||
|
||||
const date = moment.utc(element.find('.entry-date').text(), 'MMM D, YYYY').toDate();
|
||||
const actors = element.find('.contain-actors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const date = moment.utc(element.find('.entry-date').text(), 'MMM D, YYYY').toDate();
|
||||
const actors = element.find('.contain-actors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
|
||||
const duration = Number(element.find('.scene-runtime').text().slice(0, -4)) * 60;
|
||||
const duration = Number(element.find('.scene-runtime').text().slice(0, -4)) * 60;
|
||||
|
||||
const posterString = sceneLinkElement.find('img[data-srcset]').attr('data-srcset') || sceneLinkElement.find('img[data-src]').attr('data-src');
|
||||
const poster = `https:${posterString.match(/[\w/.]+$/)[0]}`;
|
||||
const posterString = sceneLinkElement.find('img[data-srcset]').attr('data-srcset') || sceneLinkElement.find('img[data-src]').attr('data-src');
|
||||
const poster = `https:${posterString.match(/[\w/.]+$/)[0]}`;
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
poster,
|
||||
rating: null,
|
||||
site,
|
||||
};
|
||||
});
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
poster,
|
||||
rating: null,
|
||||
site,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene(html, url, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElement = $('.scene-info');
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElement = $('.scene-info');
|
||||
|
||||
const { protocol, hostname, pathname } = new URL(url);
|
||||
const originalUrl = `${protocol}//${hostname}${pathname}`;
|
||||
const { protocol, hostname, pathname } = new URL(url);
|
||||
const originalUrl = `${protocol}//${hostname}${pathname}`;
|
||||
|
||||
const entryId = originalUrl.split('-').slice(-1)[0];
|
||||
const title = sceneElement.find('h1.scene-title.grey-text').text();
|
||||
const description = sceneElement.find('.synopsis').contents().slice(2).text().replace(/[\s\n]+/g, ' ').trim();
|
||||
const entryId = originalUrl.split('-').slice(-1)[0];
|
||||
const title = sceneElement.find('h1.scene-title.grey-text').text();
|
||||
const description = sceneElement.find('.synopsis').contents().slice(2).text().replace(/[\s\n]+/g, ' ').trim();
|
||||
|
||||
const date = moment.utc(sceneElement.find('span.entry-date').text(), 'MMM D, YYYY').toDate();
|
||||
const actors = $('a.scene-title.grey-text.link').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const date = moment.utc(sceneElement.find('span.entry-date').text(), 'MMM D, YYYY').toDate();
|
||||
const actors = $('a.scene-title.grey-text.link').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
|
||||
const duration = Number(sceneElement.find('.duration-ratings .duration').text().slice(10, -4)) * 60;
|
||||
const duration = Number(sceneElement.find('.duration-ratings .duration').text().slice(10, -4)) * 60;
|
||||
|
||||
const poster = `https:${$('video, dl8-video').attr('poster')}`;
|
||||
const photos = $('.contain-scene-images.desktop-only a').map((index, el) => `https:${$(el).attr('href')}`).toArray();
|
||||
const poster = `https:${$('video, dl8-video').attr('poster')}`;
|
||||
const photos = $('.contain-scene-images.desktop-only a').map((index, el) => `https:${$(el).attr('href')}`).toArray();
|
||||
|
||||
const trailerEl = $('source');
|
||||
const trailerSrc = trailerEl.attr('src');
|
||||
const trailerType = trailerEl.attr('type');
|
||||
const trailerEl = $('source');
|
||||
const trailerSrc = trailerEl.attr('src');
|
||||
const trailerType = trailerEl.attr('type');
|
||||
|
||||
const siteName = sceneElement.find('a.site-title').text();
|
||||
const channel = siteName.replace(/[\s']+/g, '').toLowerCase();
|
||||
const siteName = sceneElement.find('a.site-title').text();
|
||||
const channel = siteName.replace(/[\s']+/g, '').toLowerCase();
|
||||
|
||||
const tags = $('.categories a.cat-tag').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
const tags = $('.categories a.cat-tag').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
tags,
|
||||
photos,
|
||||
poster,
|
||||
trailer: {
|
||||
src: trailerSrc,
|
||||
type: trailerType,
|
||||
},
|
||||
rating: null,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
tags,
|
||||
photos,
|
||||
poster,
|
||||
trailer: {
|
||||
src: trailerSrc,
|
||||
type: trailerType,
|
||||
},
|
||||
rating: null,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchActorReleases(url) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok
|
||||
? res.item.qu.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
|
||||
: [];
|
||||
return res.ok
|
||||
? res.item.qu.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
|
||||
: [];
|
||||
}
|
||||
|
||||
async function scrapeProfile(html) {
|
||||
const { qu } = ex(html);
|
||||
const profile = {};
|
||||
const { qu } = ex(html);
|
||||
const profile = {};
|
||||
|
||||
profile.description = qu.q('.bio_about_text', true);
|
||||
profile.description = qu.q('.bio_about_text', true);
|
||||
|
||||
const avatar = qu.q('img.performer-pic', 'src');
|
||||
if (avatar) profile.avatar = `https:${avatar}`;
|
||||
const avatar = qu.q('img.performer-pic', 'src');
|
||||
if (avatar) profile.avatar = `https:${avatar}`;
|
||||
|
||||
const releases = qu.urls('.scene-item > a:first-child');
|
||||
const otherPages = qu.urls('.pagination a:not([rel=next]):not([rel=prev])');
|
||||
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
|
||||
const releases = qu.urls('.scene-item > a:first-child');
|
||||
const otherPages = qu.urls('.pagination a:not([rel=next]):not([rel=prev])');
|
||||
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
|
||||
|
||||
profile.releases = releases.concat(olderReleases.flat());
|
||||
profile.releases = releases.concat(olderReleases.flat());
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}?page=${page}`);
|
||||
const res = await bhttp.get(`${site.url}?page=${page}`);
|
||||
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const actorSlug = slugify(actorName);
|
||||
|
||||
const res = await bhttp.get(`https://www.naughtyamerica.com/pornstar/${actorSlug}`);
|
||||
const res = await bhttp.get(`https://www.naughtyamerica.com/pornstar/${actorSlug}`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString());
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString());
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -3,75 +3,75 @@
|
||||
const { geta, ed } = require('../utils/q');
|
||||
|
||||
function scrapeBlockLatest(scenes) {
|
||||
return scenes.map(({ html, qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ html, qu }) => {
|
||||
const release = {};
|
||||
|
||||
const entryId = qu.q('div[class*="videothumb"]', 'class').match(/videothumb_(\d+)/)
|
||||
const entryId = qu.q('div[class*="videothumb"]', 'class').match(/videothumb_(\d+)/)
|
||||
|| qu.q('div[id*="videothumb"]', 'id').match(/videothumb_(\d+)/);
|
||||
|
||||
release.entryId = entryId[1];
|
||||
release.entryId = entryId[1];
|
||||
|
||||
release.title = qu.q('h4 a', true);
|
||||
release.url = qu.url('h4 a');
|
||||
release.date = ed(html, 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
|
||||
release.title = qu.q('h4 a', true);
|
||||
release.url = qu.url('h4 a');
|
||||
release.date = ed(html, 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
|
||||
|
||||
release.actors = qu.all('.tour_update_models a', true);
|
||||
release.actors = qu.all('.tour_update_models a', true);
|
||||
|
||||
release.poster = qu.q('div img').dataset.src;
|
||||
release.photos = [qu.q('div img', 'src0_4x') || qu.q('div img', 'src0_3x') || qu.q('div img', 'src0_2x')];
|
||||
release.poster = qu.q('div img').dataset.src;
|
||||
release.photos = [qu.q('div img', 'src0_4x') || qu.q('div img', 'src0_3x') || qu.q('div img', 'src0_2x')];
|
||||
|
||||
release.teaser = qu.video();
|
||||
release.teaser = qu.video();
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeClassicLatest(scenes) {
|
||||
return scenes.map(({ el, qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ el, qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = el.dataset.setid;
|
||||
release.url = qu.url('a');
|
||||
release.entryId = el.dataset.setid;
|
||||
release.url = qu.url('a');
|
||||
|
||||
release.title = qu.q('.update_title_small', true) || qu.q('a:nth-child(2)', true);
|
||||
release.title = qu.q('.update_title_small', true) || qu.q('a:nth-child(2)', true);
|
||||
|
||||
const description = qu.q('a', 'title');
|
||||
if (description) release.description = description;
|
||||
const description = qu.q('a', 'title');
|
||||
if (description) release.description = description;
|
||||
|
||||
const date = qu.date('.date_small, .update_date', 'MM/DD/YYYY');
|
||||
if (date) release.date = date;
|
||||
const date = qu.date('.date_small, .update_date', 'MM/DD/YYYY');
|
||||
if (date) release.date = date;
|
||||
|
||||
const durationLine = qu.q('.update_counts', true);
|
||||
if (durationLine) release.duration = Number(durationLine.match(/(\d+) min/i)[1]) * 60;
|
||||
const durationLine = qu.q('.update_counts', true);
|
||||
if (durationLine) release.duration = Number(durationLine.match(/(\d+) min/i)[1]) * 60;
|
||||
|
||||
const actors = qu.all('.update_models a', true);
|
||||
release.actors = actors.length > 0 ? actors : qu.q('.update_models', true).split(/,\s*/);
|
||||
const actors = qu.all('.update_models a', true);
|
||||
release.actors = actors.length > 0 ? actors : qu.q('.update_models', true).split(/,\s*/);
|
||||
|
||||
const photoCount = qu.q('.update_thumb', 'cnt');
|
||||
[release.poster, ...release.photos] = Array.from({ length: photoCount })
|
||||
.map((value, index) => qu.q('.update_thumb', `src${index}_3x`)
|
||||
const photoCount = qu.q('.update_thumb', 'cnt');
|
||||
[release.poster, ...release.photos] = Array.from({ length: photoCount })
|
||||
.map((value, index) => qu.q('.update_thumb', `src${index}_3x`)
|
||||
|| qu.q('.update_thumb', `src${index}_2x`)
|
||||
|| qu.q('.update_thumb', `src${index}_1x`));
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
if (!site.parameters) {
|
||||
return null;
|
||||
}
|
||||
if (!site.parameters) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const url = `${site.url}/tour_${site.parameters.siteId}/categories/movies_${page}_d.html`;
|
||||
const res = await geta(url, '.updatesBlock .movieBlock, .updatesBlock .videoBlock, .latest_updates_block .update_details, .category_listing_block .update_details');
|
||||
const url = `${site.url}/tour_${site.parameters.siteId}/categories/movies_${page}_d.html`;
|
||||
const res = await geta(url, '.updatesBlock .movieBlock, .updatesBlock .videoBlock, .latest_updates_block .update_details, .category_listing_block .update_details');
|
||||
|
||||
if (res.ok && site.parameters.block) {
|
||||
return scrapeBlockLatest(res.items, site);
|
||||
}
|
||||
if (res.ok && site.parameters.block) {
|
||||
return scrapeBlockLatest(res.items, site);
|
||||
}
|
||||
|
||||
return res.ok ? scrapeClassicLatest(res.items, site) : res.status;
|
||||
return res.ok ? scrapeClassicLatest(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchLatest,
|
||||
};
|
||||
|
||||
@@ -5,161 +5,161 @@ const slugify = require('../utils/slugify');
|
||||
const { heightToCm } = require('../utils/convert');
|
||||
|
||||
const slugUrlMap = {
|
||||
nubiles: 'https://www.nubiles.net',
|
||||
nubilesporn: 'https://www.nubiles-porn.com',
|
||||
nubiles: 'https://www.nubiles.net',
|
||||
nubilesporn: 'https://www.nubiles-porn.com',
|
||||
};
|
||||
|
||||
async function getPhotos(albumUrl) {
|
||||
const res = await geta(albumUrl, '.photo-thumb');
|
||||
const res = await geta(albumUrl, '.photo-thumb');
|
||||
|
||||
return res.ok
|
||||
? res.items.map(({ q }) => q('source').srcset)
|
||||
: [];
|
||||
return res.ok
|
||||
? res.items.map(({ q }) => q('source').srcset)
|
||||
: [];
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site, origin) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = qu.q('.title a', true);
|
||||
release.title = qu.q('.title a', true);
|
||||
|
||||
const url = qu.url('.title a').split('?')[0];
|
||||
const channelUrl = qu.url('.site-link');
|
||||
const url = qu.url('.title a').split('?')[0];
|
||||
const channelUrl = qu.url('.site-link');
|
||||
|
||||
if (/^http/.test(url)) {
|
||||
const { pathname } = new URL(url);
|
||||
release.entryId = pathname.split('/')[3];
|
||||
if (/^http/.test(url)) {
|
||||
const { pathname } = new URL(url);
|
||||
release.entryId = pathname.split('/')[3];
|
||||
|
||||
if (channelUrl) release.url = `${channelUrl}${pathname}`;
|
||||
else release.url = url;
|
||||
} else if (!/\/join/.test(url)) {
|
||||
release.entryId = url.split('/')[3];
|
||||
if (channelUrl) release.url = `${channelUrl}${pathname}`;
|
||||
else release.url = url;
|
||||
} else if (!/\/join/.test(url)) {
|
||||
release.entryId = url.split('/')[3];
|
||||
|
||||
if (channelUrl) release.url = `${channelUrl}${url}`;
|
||||
else if (site?.url) release.url = `${site.url}${url}`;
|
||||
else if (origin) release.url = `${origin}${url}`;
|
||||
} else {
|
||||
release.entryId = qu.q('a img', 'tube_tour_thumb_id');
|
||||
}
|
||||
if (channelUrl) release.url = `${channelUrl}${url}`;
|
||||
else if (site?.url) release.url = `${site.url}${url}`;
|
||||
else if (origin) release.url = `${origin}${url}`;
|
||||
} else {
|
||||
release.entryId = qu.q('a img', 'tube_tour_thumb_id');
|
||||
}
|
||||
|
||||
release.date = qu.date('.date', 'MMM D, YYYY');
|
||||
release.actors = qu.all('.models a.model', true);
|
||||
release.date = qu.date('.date', 'MMM D, YYYY');
|
||||
release.actors = qu.all('.models a.model', true);
|
||||
|
||||
const poster = qu.q('img').dataset.original;
|
||||
release.poster = [
|
||||
poster.replace('_640', '_1280'),
|
||||
poster,
|
||||
];
|
||||
const poster = qu.q('img').dataset.original;
|
||||
release.poster = [
|
||||
poster.replace('_640', '_1280'),
|
||||
poster,
|
||||
];
|
||||
|
||||
release.stars = Number(qu.q('.rating', true));
|
||||
release.likes = Number(qu.q('.likes', true));
|
||||
release.stars = Number(qu.q('.rating', true));
|
||||
release.likes = Number(qu.q('.likes', true));
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene({ qu }, url, site) {
|
||||
const release = {};
|
||||
const release = {};
|
||||
|
||||
const { origin, pathname } = new URL(url);
|
||||
release.url = `${origin}${pathname}`;
|
||||
const { origin, pathname } = new URL(url);
|
||||
release.url = `${origin}${pathname}`;
|
||||
|
||||
release.entryId = new URL(url).pathname.split('/')[3];
|
||||
release.title = qu.q('.content-pane-title h2', true);
|
||||
release.description = qu.q('.content-pane-column div', true);
|
||||
release.entryId = new URL(url).pathname.split('/')[3];
|
||||
release.title = qu.q('.content-pane-title h2', true);
|
||||
release.description = qu.q('.content-pane-column div', true);
|
||||
|
||||
release.date = qu.q('.date', 'MMM D, YYYY');
|
||||
release.date = qu.q('.date', 'MMM D, YYYY');
|
||||
|
||||
release.actors = qu.all('.content-pane-performers .model', true);
|
||||
release.tags = qu.all('.categories a', true);
|
||||
release.actors = qu.all('.content-pane-performers .model', true);
|
||||
release.tags = qu.all('.categories a', true);
|
||||
|
||||
release.poster = qu.poster() || qu.img('.fake-video-player img');
|
||||
release.trailer = qu.all('source').map(source => ({
|
||||
src: source.src,
|
||||
quality: Number(source.getAttribute('res')),
|
||||
}));
|
||||
release.poster = qu.poster() || qu.img('.fake-video-player img');
|
||||
release.trailer = qu.all('source').map(source => ({
|
||||
src: source.src,
|
||||
quality: Number(source.getAttribute('res')),
|
||||
}));
|
||||
|
||||
release.stars = Number(qu.q('.score', true));
|
||||
release.likes = Number(qu.q('#likecount', true));
|
||||
release.stars = Number(qu.q('.score', true));
|
||||
release.likes = Number(qu.q('#likecount', true));
|
||||
|
||||
const albumLink = qu.url('.content-pane-related-links a[href*="gallery"]');
|
||||
if (albumLink) release.photos = await getPhotos(`${site.url}${albumLink}`);
|
||||
const albumLink = qu.url('.content-pane-related-links a[href*="gallery"]');
|
||||
if (albumLink) release.photos = await getPhotos(`${site.url}${albumLink}`);
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ qu }, _actorName, origin) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const keys = qu.all('.model-profile h5', true);
|
||||
const values = qu.all('.model-profile h5 + p', true);
|
||||
const keys = qu.all('.model-profile h5', true);
|
||||
const values = qu.all('.model-profile h5 + p', true);
|
||||
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
|
||||
|
||||
profile.age = Number(bio.age);
|
||||
profile.description = qu.q('.model-bio', true);
|
||||
profile.age = Number(bio.age);
|
||||
profile.description = qu.q('.model-bio', true);
|
||||
|
||||
profile.residencePlace = bio.location;
|
||||
profile.residencePlace = bio.location;
|
||||
|
||||
profile.height = heightToCm(bio.height);
|
||||
[profile.bust, profile.waist, profile.hip] = bio.figure.split('-').map(v => Number(v) || v);
|
||||
profile.height = heightToCm(bio.height);
|
||||
[profile.bust, profile.waist, profile.hip] = bio.figure.split('-').map(v => Number(v) || v);
|
||||
|
||||
profile.avatar = qu.img('.model-profile img');
|
||||
profile.avatar = qu.img('.model-profile img');
|
||||
|
||||
const releases = qu.all('.content-grid-item').filter(el => /video\//.test(qu.url(el, '.img-wrapper a'))); // filter out photos
|
||||
profile.releases = scrapeAll(ctxa(releases), null, origin);
|
||||
const releases = qu.all('.content-grid-item').filter(el => /video\//.test(qu.url(el, '.img-wrapper a'))); // filter out photos
|
||||
profile.releases = scrapeAll(ctxa(releases), null, origin);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/video/gallery/${(page - 1) * 12}`;
|
||||
const res = await geta(url, '.content-grid-item');
|
||||
const url = `${site.url}/video/gallery/${(page - 1) * 12}`;
|
||||
const res = await geta(url, '.content-grid-item');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
if (site.parameters?.upcoming) {
|
||||
const url = `${site.url}/video/upcoming`;
|
||||
const res = await geta(url, '.content-grid-item');
|
||||
if (site.parameters?.upcoming) {
|
||||
const url = `${site.url}/video/upcoming`;
|
||||
const res = await geta(url, '.content-grid-item');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, siteSlug) {
|
||||
const firstLetter = actorName.charAt(0).toLowerCase();
|
||||
const origin = slugUrlMap[siteSlug] || `https://www.${siteSlug}.com`;
|
||||
const firstLetter = actorName.charAt(0).toLowerCase();
|
||||
const origin = slugUrlMap[siteSlug] || `https://www.${siteSlug}.com`;
|
||||
|
||||
const url = `${origin}/model/alpha/${firstLetter}`;
|
||||
const resModels = await get(url);
|
||||
const url = `${origin}/model/alpha/${firstLetter}`;
|
||||
const resModels = await get(url);
|
||||
|
||||
if (!resModels.ok) return resModels.status;
|
||||
if (!resModels.ok) return resModels.status;
|
||||
|
||||
const modelPath = resModels.item.qu.all('.content-grid-item a.title').find(el => slugify(el.textContent) === slugify(actorName));
|
||||
const modelPath = resModels.item.qu.all('.content-grid-item a.title').find(el => slugify(el.textContent) === slugify(actorName));
|
||||
|
||||
if (modelPath) {
|
||||
const modelUrl = `${origin}${modelPath}`;
|
||||
const resModel = await get(modelUrl);
|
||||
if (modelPath) {
|
||||
const modelUrl = `${origin}${modelPath}`;
|
||||
const resModel = await get(modelUrl);
|
||||
|
||||
return resModel.ok ? scrapeProfile(resModel.item, actorName, origin) : resModel.status;
|
||||
}
|
||||
return resModel.ok ? scrapeProfile(resModel.item, actorName, origin) : resModel.status;
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -7,143 +7,143 @@ const knex = require('../knex');
|
||||
const { ex, ctxa } = require('../utils/q');
|
||||
|
||||
async function getSiteSlugs() {
|
||||
return knex('sites')
|
||||
.pluck('sites.slug')
|
||||
.join('networks', 'networks.id', 'sites.network_id')
|
||||
.where('networks.slug', 'perfectgonzo');
|
||||
return knex('sites')
|
||||
.pluck('sites.slug')
|
||||
.join('networks', 'networks.id', 'sites.network_id')
|
||||
.where('networks.slug', 'perfectgonzo');
|
||||
}
|
||||
|
||||
function getHash(identifier) {
|
||||
const hash = blake2.createHash('blake2b', { digestLength: 8 });
|
||||
const hash = blake2.createHash('blake2b', { digestLength: 8 });
|
||||
|
||||
hash.update(Buffer.from(identifier));
|
||||
hash.update(Buffer.from(identifier));
|
||||
|
||||
return hash.digest('hex');
|
||||
return hash.digest('hex');
|
||||
}
|
||||
|
||||
function extractMaleModelsFromTags(tagContainer) {
|
||||
if (!tagContainer) {
|
||||
return [];
|
||||
}
|
||||
if (!tagContainer) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const tagEls = Array.from(tagContainer.childNodes, node => ({ type: node.nodeType, text: node.textContent.trim() })).filter(node => node.text.length > 0);
|
||||
const modelLabelIndex = tagEls.findIndex(node => node.text === 'Male Models');
|
||||
const tagEls = Array.from(tagContainer.childNodes, node => ({ type: node.nodeType, text: node.textContent.trim() })).filter(node => node.text.length > 0);
|
||||
const modelLabelIndex = tagEls.findIndex(node => node.text === 'Male Models');
|
||||
|
||||
if (modelLabelIndex > -1) {
|
||||
const nextLabelIndex = tagEls.findIndex((node, index) => index > modelLabelIndex && node.type === 3);
|
||||
const maleModels = tagEls.slice(modelLabelIndex + 1, nextLabelIndex);
|
||||
if (modelLabelIndex > -1) {
|
||||
const nextLabelIndex = tagEls.findIndex((node, index) => index > modelLabelIndex && node.type === 3);
|
||||
const maleModels = tagEls.slice(modelLabelIndex + 1, nextLabelIndex);
|
||||
|
||||
return maleModels.map(model => model.text);
|
||||
}
|
||||
return maleModels.map(model => model.text);
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
async function extractChannelFromPhoto(photo, metaSiteSlugs) {
|
||||
const siteSlugs = metaSiteSlugs || await getSiteSlugs();
|
||||
const channelMatch = photo.match(new RegExp(siteSlugs.join('|')));
|
||||
const siteSlugs = metaSiteSlugs || await getSiteSlugs();
|
||||
const channelMatch = photo.match(new RegExp(siteSlugs.join('|')));
|
||||
|
||||
if (channelMatch) {
|
||||
return channelMatch[0];
|
||||
}
|
||||
if (channelMatch) {
|
||||
return channelMatch[0];
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeLatest(html, site) {
|
||||
const siteSlugs = await getSiteSlugs();
|
||||
const { element } = ex(html);
|
||||
const siteSlugs = await getSiteSlugs();
|
||||
const { element } = ex(html);
|
||||
|
||||
return ctxa(element, '#content-main .itemm').map(({
|
||||
q, qa, qlength, qdate, qimages,
|
||||
}) => {
|
||||
const release = {
|
||||
site,
|
||||
meta: {
|
||||
siteSlugs,
|
||||
},
|
||||
};
|
||||
return ctxa(element, '#content-main .itemm').map(({
|
||||
q, qa, qlength, qdate, qimages,
|
||||
}) => {
|
||||
const release = {
|
||||
site,
|
||||
meta: {
|
||||
siteSlugs,
|
||||
},
|
||||
};
|
||||
|
||||
const sceneLink = q('a');
|
||||
const sceneLink = q('a');
|
||||
|
||||
release.title = sceneLink.title;
|
||||
release.url = `${site.url}${sceneLink.href}`;
|
||||
release.date = qdate('.nm-date', 'MM/DD/YYYY');
|
||||
release.title = sceneLink.title;
|
||||
release.url = `${site.url}${sceneLink.href}`;
|
||||
release.date = qdate('.nm-date', 'MM/DD/YYYY');
|
||||
|
||||
const slug = new URL(release.url).pathname.split('/')[2];
|
||||
release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`);
|
||||
const slug = new URL(release.url).pathname.split('/')[2];
|
||||
release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`);
|
||||
|
||||
release.actors = release.title.split('&').map(actor => actor.trim());
|
||||
release.actors = release.title.split('&').map(actor => actor.trim());
|
||||
|
||||
[release.poster, ...release.photos] = qimages('.bloc-link img');
|
||||
[release.poster, ...release.photos] = qimages('.bloc-link img');
|
||||
|
||||
release.tags = qa('.dropdown ul a', true).slice(1);
|
||||
release.duration = qlength('.dropdown p:first-child');
|
||||
release.tags = qa('.dropdown ul a', true).slice(1);
|
||||
release.duration = qlength('.dropdown p:first-child');
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, site, url, metaSiteSlugs) {
|
||||
const {
|
||||
q, qa, qlength, qdate, qposter, qtrailer,
|
||||
} = ex(html);
|
||||
const {
|
||||
q, qa, qlength, qdate, qposter, qtrailer,
|
||||
} = ex(html);
|
||||
|
||||
const release = { url, site };
|
||||
const release = { url, site };
|
||||
|
||||
release.title = q('#movie-header h2', true);
|
||||
release.date = qdate('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.title = q('#movie-header h2', true);
|
||||
release.date = qdate('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
|
||||
release.description = q('.container .mg-md', true);
|
||||
release.duration = qlength('#video-ribbon .container > div > span:nth-child(3)');
|
||||
release.description = q('.container .mg-md', true);
|
||||
release.duration = qlength('#video-ribbon .container > div > span:nth-child(3)');
|
||||
|
||||
release.actors = qa('#video-info a', true).concat(extractMaleModelsFromTags(q('.tag-container')));
|
||||
release.tags = qa('.tag-container a', true);
|
||||
release.actors = qa('#video-info a', true).concat(extractMaleModelsFromTags(q('.tag-container')));
|
||||
release.tags = qa('.tag-container a', true);
|
||||
|
||||
const uhd = q('#video-ribbon .container > div > span:nth-child(2)', true);
|
||||
if (/4K/.test(uhd)) release.tags = release.tags.concat('4k');
|
||||
const uhd = q('#video-ribbon .container > div > span:nth-child(2)', true);
|
||||
if (/4K/.test(uhd)) release.tags = release.tags.concat('4k');
|
||||
|
||||
release.photos = qa('.bxslider_pics img').map(el => el.dataset.original || el.src);
|
||||
release.poster = qposter();
|
||||
release.photos = qa('.bxslider_pics img').map(el => el.dataset.original || el.src);
|
||||
release.poster = qposter();
|
||||
|
||||
const trailer = qtrailer();
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
const trailer = qtrailer();
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
|
||||
if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], metaSiteSlugs);
|
||||
if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], metaSiteSlugs);
|
||||
|
||||
if (release.channel) {
|
||||
const { pathname } = new URL(url);
|
||||
release.url = `https://${release.channel}.com${pathname}`;
|
||||
if (release.channel) {
|
||||
const { pathname } = new URL(url);
|
||||
release.url = `https://${release.channel}.com${pathname}`;
|
||||
|
||||
const slug = pathname.split('/')[2];
|
||||
release.entryId = getHash(`${release.channel}${slug}${release.date.toISOString()}`);
|
||||
}
|
||||
const slug = pathname.split('/')[2];
|
||||
release.entryId = getHash(`${release.channel}${slug}${release.date.toISOString()}`);
|
||||
}
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/movies/page-${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
const url = `${site.url}/movies/page-${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, release) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs);
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -6,135 +6,135 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
async function getTrailer(entryId) {
|
||||
const trailerRes = await bhttp.post('https://www.pervcity.com/gettoken.php', {
|
||||
setId: entryId,
|
||||
});
|
||||
const trailerRes = await bhttp.post('https://www.pervcity.com/gettoken.php', {
|
||||
setId: entryId,
|
||||
});
|
||||
|
||||
if (trailerRes.statusCode === 200) {
|
||||
return {
|
||||
poster: trailerRes.body.TrailerImg,
|
||||
trailer: trailerRes.body.TrailerPath || trailerRes.body.Trailerfallback,
|
||||
};
|
||||
}
|
||||
if (trailerRes.statusCode === 200) {
|
||||
return {
|
||||
poster: trailerRes.body.TrailerImg,
|
||||
trailer: trailerRes.body.TrailerPath || trailerRes.body.Trailerfallback,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
function scrapeLatestScene(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const entryId = $('li').attr('id');
|
||||
const sceneLinkElement = $('#scene_title_border a');
|
||||
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
|
||||
const entryId = $('li').attr('id');
|
||||
const sceneLinkElement = $('#scene_title_border a');
|
||||
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
|
||||
|
||||
const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, '')); // replace weird commas
|
||||
const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate();
|
||||
const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, '')); // replace weird commas
|
||||
const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate();
|
||||
|
||||
const poster = $('a:nth-child(2) > img').attr('src');
|
||||
const photos = $('.sample-picker img').map((index, element) => $(element).attr('src').replace('tourpics', 'trailer')).toArray();
|
||||
const poster = $('a:nth-child(2) > img').attr('src');
|
||||
const photos = $('.sample-picker img').map((index, element) => $(element).attr('src').replace('tourpics', 'trailer')).toArray();
|
||||
|
||||
const stars = $('img[src*="/star.png"]')
|
||||
.toArray()
|
||||
.map(element => $(element).attr('src'))
|
||||
.length || 0;
|
||||
const stars = $('img[src*="/star.png"]')
|
||||
.toArray()
|
||||
.map(element => $(element).attr('src'))
|
||||
.length || 0;
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
};
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const release = { url, site };
|
||||
const release = { url, site };
|
||||
|
||||
release.entryId = document.querySelector('input#set_ID').value;
|
||||
release.entryId = document.querySelector('input#set_ID').value;
|
||||
|
||||
release.title = document.querySelector('title').textContent;
|
||||
release.description = document.querySelector('.player_data').textContent.trim();
|
||||
release.title = document.querySelector('title').textContent;
|
||||
release.description = document.querySelector('.player_data').textContent.trim();
|
||||
|
||||
const durationString = document.querySelector('.tag_lineR div:nth-child(2) span').textContent;
|
||||
const [minutes, seconds] = durationString.match(/\d+/g);
|
||||
const durationString = document.querySelector('.tag_lineR div:nth-child(2) span').textContent;
|
||||
const [minutes, seconds] = durationString.match(/\d+/g);
|
||||
|
||||
release.duration = Number(minutes) * 60 + Number(seconds);
|
||||
release.tags = document.querySelector('meta[name="keywords"]').content.split(',');
|
||||
release.duration = Number(minutes) * 60 + Number(seconds);
|
||||
release.tags = document.querySelector('meta[name="keywords"]').content.split(',');
|
||||
|
||||
const { poster, trailer } = await getTrailer(release.entryId);
|
||||
const { poster, trailer } = await getTrailer(release.entryId);
|
||||
|
||||
release.poster = poster;
|
||||
release.trailer = { src: trailer };
|
||||
release.poster = poster;
|
||||
release.trailer = { src: trailer };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeFallbackLanding(html) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
return document.querySelector('input#set_ID').value;
|
||||
return document.querySelector('input#set_ID').value;
|
||||
}
|
||||
|
||||
async function scrapeFallbackScene(html, entryId, url, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { url, entryId, site };
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { url, entryId, site };
|
||||
|
||||
release.title = document.querySelector('.popup_data_set_head label').textContent;
|
||||
release.description = document.querySelector('.popup_data_set_des p').textContent.trim();
|
||||
release.date = moment.utc(document.querySelector('.popup_left_top div span').textContent, 'MM-DD-YYYY').toDate();
|
||||
release.actors = Array.from(document.querySelectorAll('.popup_data_set_models a'), el => el.textContent);
|
||||
release.title = document.querySelector('.popup_data_set_head label').textContent;
|
||||
release.description = document.querySelector('.popup_data_set_des p').textContent.trim();
|
||||
release.date = moment.utc(document.querySelector('.popup_left_top div span').textContent, 'MM-DD-YYYY').toDate();
|
||||
release.actors = Array.from(document.querySelectorAll('.popup_data_set_models a'), el => el.textContent);
|
||||
|
||||
const { poster, trailer } = await getTrailer(release.entryId);
|
||||
const { poster, trailer } = await getTrailer(release.entryId);
|
||||
|
||||
release.poster = poster;
|
||||
release.trailer = { src: trailer };
|
||||
release.poster = poster;
|
||||
release.trailer = { src: trailer };
|
||||
|
||||
release.channel = document.querySelector('.popup_left_top div img').alt;
|
||||
release.channel = document.querySelector('.popup_left_top div img').alt;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = page === 1
|
||||
? await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=${(page - 1) * 9}&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`)
|
||||
: await bhttp.get(`${site.url}/final_load_latestupdate_grid_view.php?limitstart=0&limitend=${(page - 1) * 8 + 1}&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
|
||||
const elements = JSON.parse(res.body.toString());
|
||||
const res = page === 1
|
||||
? await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=${(page - 1) * 9}&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`)
|
||||
: await bhttp.get(`${site.url}/final_load_latestupdate_grid_view.php?limitstart=0&limitend=${(page - 1) * 8 + 1}&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
|
||||
const elements = JSON.parse(res.body.toString());
|
||||
|
||||
const latest = Object.values(elements.total_arr).map(html => scrapeLatestScene(html, site)); // total_arr is a key-value object for final_load_latestupdate_grid_view.php
|
||||
const latest = Object.values(elements.total_arr).map(html => scrapeLatestScene(html, site)); // total_arr is a key-value object for final_load_latestupdate_grid_view.php
|
||||
|
||||
return latest;
|
||||
return latest;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
if (site.isFallback) {
|
||||
const entryId = scrapeFallbackLanding(res.body.toString(), url);
|
||||
if (res.statusCode === 200) {
|
||||
if (site.isNetwork) {
|
||||
const entryId = scrapeFallbackLanding(res.body.toString(), url);
|
||||
|
||||
const fallbackRes = await bhttp.post('https://www.pervcity.com/set_popupvideo.php', {
|
||||
setId: entryId,
|
||||
});
|
||||
const fallbackRes = await bhttp.post('https://www.pervcity.com/set_popupvideo.php', {
|
||||
setId: entryId,
|
||||
});
|
||||
|
||||
return scrapeFallbackScene(fallbackRes.body.toString(), entryId, url, site);
|
||||
}
|
||||
return scrapeFallbackScene(fallbackRes.body.toString(), entryId, url, site);
|
||||
}
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -5,56 +5,56 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const ethnicityMap = {
|
||||
White: 'Caucasian',
|
||||
White: 'Caucasian',
|
||||
};
|
||||
|
||||
const hairMap = {
|
||||
Brunette: 'brown',
|
||||
Brunette: 'brown',
|
||||
};
|
||||
|
||||
async function scrapeProfile(html, _url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const entries = Array.from(document.querySelectorAll('.infoPiece'), el => el.textContent.replace(/\n|\t/g, '').split(':'));
|
||||
const bio = entries.reduce((acc, [key, value]) => (key ? { ...acc, [key.trim()]: value.trim() } : acc), {});
|
||||
const entries = Array.from(document.querySelectorAll('.infoPiece'), el => el.textContent.replace(/\n|\t/g, '').split(':'));
|
||||
const bio = entries.reduce((acc, [key, value]) => (key ? { ...acc, [key.trim()]: value.trim() } : acc), {});
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
const descriptionString = document.querySelector('div[itemprop="description"]') || document.querySelector('.longBio');
|
||||
const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img');
|
||||
const descriptionString = document.querySelector('div[itemprop="description"]') || document.querySelector('.longBio');
|
||||
const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img');
|
||||
|
||||
if (bio.Gender) profile.gender = bio.Gender.toLowerCase();
|
||||
if (bio.ethnicity) profile.ethnicity = ethnicityMap[bio.Ethnicity] || bio.Ethnicity;
|
||||
if (bio.Gender) profile.gender = bio.Gender.toLowerCase();
|
||||
if (bio.ethnicity) profile.ethnicity = ethnicityMap[bio.Ethnicity] || bio.Ethnicity;
|
||||
|
||||
if (descriptionString) profile.description = descriptionString.textContent;
|
||||
if (descriptionString) profile.description = descriptionString.textContent;
|
||||
|
||||
if (bio.Birthday && !/-0001/.test(bio.Birthday)) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); // birthyear sometimes -0001, see Spencer Bradley as of january 2020
|
||||
if (bio.Born) profile.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
|
||||
if (bio.Birthday && !/-0001/.test(bio.Birthday)) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); // birthyear sometimes -0001, see Spencer Bradley as of january 2020
|
||||
if (bio.Born) profile.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
|
||||
|
||||
profile.birthPlace = bio['Birth Place'] || bio.Birthplace;
|
||||
profile.residencePlace = bio['City and Country'];
|
||||
profile.birthPlace = bio['Birth Place'] || bio.Birthplace;
|
||||
profile.residencePlace = bio['City and Country'];
|
||||
|
||||
if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio['Fake Boobs']) profile.naturalBoobs = bio['Fake Boobs'] === 'No';
|
||||
if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio['Fake Boobs']) profile.naturalBoobs = bio['Fake Boobs'] === 'No';
|
||||
|
||||
if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1));
|
||||
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1));
|
||||
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
|
||||
if (bio.Piercings) profile.hasPiercings = bio.Piercings === 'Yes';
|
||||
if (bio.Tattoos) profile.hasTattoos = bio.Tattoos === 'Yes';
|
||||
if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1));
|
||||
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1));
|
||||
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
|
||||
if (bio.Piercings) profile.hasPiercings = bio.Piercings === 'Yes';
|
||||
if (bio.Tattoos) profile.hasTattoos = bio.Tattoos === 'Yes';
|
||||
|
||||
if (avatarEl && !/default\//.test(avatarEl.src)) profile.avatar = avatarEl.src;
|
||||
profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason
|
||||
if (avatarEl && !/default\//.test(avatarEl.src)) profile.avatar = avatarEl.src;
|
||||
profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
|
||||
/* Model pages are not reliably associated with actual porn stars
|
||||
/* Model pages are not reliably associated with actual porn stars
|
||||
const modelUrl = `https://pornhub.com/model/${actorSlug}`;
|
||||
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
|
||||
|
||||
@@ -74,12 +74,12 @@ async function fetchProfile(actorName) {
|
||||
}
|
||||
*/
|
||||
|
||||
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
|
||||
const pornstarRes = await bhttp.get(pornstarUrl);
|
||||
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
|
||||
const pornstarRes = await bhttp.get(pornstarUrl);
|
||||
|
||||
return scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
|
||||
return scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -9,193 +9,193 @@ const { get, geta } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
async function getPhotos(entryId, site) {
|
||||
const { hostname } = new URL(site.url);
|
||||
const { hostname } = new URL(site.url);
|
||||
|
||||
const res = await bhttp.get(`https://${hostname}/gallery.php?type=highres&id=${entryId}`);
|
||||
const html = res.body.toString();
|
||||
const res = await bhttp.get(`https://${hostname}/gallery.php?type=highres&id=${entryId}`);
|
||||
const html = res.body.toString();
|
||||
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const photos = $('a.fakethumb').map((photoIndex, photoElement) => $(photoElement).attr('data-src') || $(photoElement).attr('href')).toArray();
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const photos = $('a.fakethumb').map((photoIndex, photoElement) => $(photoElement).attr('data-src') || $(photoElement).attr('href')).toArray();
|
||||
|
||||
return photos;
|
||||
return photos;
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.content-wrapper .scene').toArray();
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.content-wrapper .scene').toArray();
|
||||
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('h3 a');
|
||||
const thumbnailElement = $(element).find('a img');
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('h3 a');
|
||||
const thumbnailElement = $(element).find('a img');
|
||||
|
||||
const url = sceneLinkElement.attr('href');
|
||||
// const title = sceneLinkElement.text();
|
||||
const entryId = url.split('/').slice(-1)[0];
|
||||
const url = sceneLinkElement.attr('href');
|
||||
// const title = sceneLinkElement.text();
|
||||
const entryId = url.split('/').slice(-1)[0];
|
||||
|
||||
const titleText = thumbnailElement.attr('alt');
|
||||
const title = titleText.slice(titleText.indexOf(':') + 1).trim();
|
||||
const titleText = thumbnailElement.attr('alt');
|
||||
const title = titleText.slice(titleText.indexOf(':') + 1).trim();
|
||||
|
||||
const date = moment.utc($(element).find('.scene-date'), ['MM/DD/YYYY', 'YYYY-MM-DD']).toDate();
|
||||
const date = moment.utc($(element).find('.scene-date'), ['MM/DD/YYYY', 'YYYY-MM-DD']).toDate();
|
||||
|
||||
const actors = $(element).find('.scene-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const likes = Number($(element).find('.scene-votes').text());
|
||||
const actors = $(element).find('.scene-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const likes = Number($(element).find('.scene-votes').text());
|
||||
|
||||
const photoCount = Number(thumbnailElement.attr('thumbs_num'));
|
||||
const poster = thumbnailElement.attr('src');
|
||||
const photos = Array.from({ length: photoCount }, (val, index) => thumbnailElement.attr(`src${index + 1}`));
|
||||
const photoCount = Number(thumbnailElement.attr('thumbs_num'));
|
||||
const poster = thumbnailElement.attr('src');
|
||||
const photos = Array.from({ length: photoCount }, (val, index) => thumbnailElement.attr(`src${index + 1}`));
|
||||
|
||||
const scene = {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
likes,
|
||||
},
|
||||
site,
|
||||
};
|
||||
const scene = {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
likes,
|
||||
},
|
||||
site,
|
||||
};
|
||||
|
||||
return scene;
|
||||
});
|
||||
return scene;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const release = { url };
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const release = { url };
|
||||
|
||||
[release.entryId] = url.split('/').slice(-1);
|
||||
release.title = $('.video-wrapper meta[itemprop="name"]').attr('content');
|
||||
release.description = $('.video-wrapper meta[itemprop="description"]').attr('content');
|
||||
[release.entryId] = url.split('/').slice(-1);
|
||||
release.title = $('.video-wrapper meta[itemprop="name"]').attr('content');
|
||||
release.description = $('.video-wrapper meta[itemprop="description"]').attr('content');
|
||||
|
||||
release.date = moment.utc($('.video-wrapper meta[itemprop="uploadDate"]').attr('content'), 'MM/DD/YYYY').toDate();
|
||||
release.actors = $('.content-wrapper .scene-models-list a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
release.date = moment.utc($('.video-wrapper meta[itemprop="uploadDate"]').attr('content'), 'MM/DD/YYYY').toDate();
|
||||
release.actors = $('.content-wrapper .scene-models-list a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
|
||||
const timestamp = $('.video-wrapper meta[itemprop="duration"]').attr('content');
|
||||
const timestamp = $('.video-wrapper meta[itemprop="duration"]').attr('content');
|
||||
|
||||
if (timestamp) {
|
||||
const [minutes, seconds] = timestamp.match(/\d+/g);
|
||||
release.duration = Number(minutes) * 60 + Number(seconds);
|
||||
}
|
||||
if (timestamp) {
|
||||
const [minutes, seconds] = timestamp.match(/\d+/g);
|
||||
release.duration = Number(minutes) * 60 + Number(seconds);
|
||||
}
|
||||
|
||||
release.tags = $('.content-desc .scene-tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
release.likes = Number($('.content-desc #social-actions #likes').text());
|
||||
release.tags = $('.content-desc .scene-tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
release.likes = Number($('.content-desc #social-actions #likes').text());
|
||||
|
||||
const posterScript = $('script:contains(poster)').html();
|
||||
const posterLink = posterScript?.slice(posterScript.indexOf('https://'), posterScript.indexOf('.jpg') + 4);
|
||||
release.poster = $('meta[property="og:image"]').attr('content') || posterLink || $('#trailer_player_finished img').attr('src');
|
||||
const posterScript = $('script:contains(poster)').html();
|
||||
const posterLink = posterScript?.slice(posterScript.indexOf('https://'), posterScript.indexOf('.jpg') + 4);
|
||||
release.poster = $('meta[property="og:image"]').attr('content') || posterLink || $('#trailer_player_finished img').attr('src');
|
||||
|
||||
const trailer = $('meta[property="og:video"]').attr('content') || $('#videojs-trailer source').attr('src');
|
||||
const trailer = $('meta[property="og:video"]').attr('content') || $('#videojs-trailer source').attr('src');
|
||||
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
|
||||
release.photos = await getPhotos(release.entryId, site);
|
||||
release.movie = $('a[data-track="FULL MOVIE"]').attr('href');
|
||||
release.photos = await getPhotos(release.entryId, site);
|
||||
release.movie = $('a[data-track="FULL MOVIE"]').attr('href');
|
||||
|
||||
const siteElement = $('.content-wrapper .logos-sites a');
|
||||
if (siteElement) release.channel = slugify(siteElement.text(), '');
|
||||
const siteElement = $('.content-wrapper .logos-sites a');
|
||||
if (siteElement) release.channel = slugify(siteElement.text(), '');
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ html, q, qa, qtx }) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const bio = qa('.model-facts li:not(.model-facts-long)', true).reduce((acc, fact) => {
|
||||
const [key, value] = fact.split(':');
|
||||
const trimmedValue = value.trim();
|
||||
const bio = qa('.model-facts li:not(.model-facts-long)', true).reduce((acc, fact) => {
|
||||
const [key, value] = fact.split(':');
|
||||
const trimmedValue = value.trim();
|
||||
|
||||
if (trimmedValue.length === 0 || trimmedValue === '-') return acc;
|
||||
return { ...acc, [slugify(key, '_')]: trimmedValue };
|
||||
}, {});
|
||||
if (trimmedValue.length === 0 || trimmedValue === '-') return acc;
|
||||
return { ...acc, [slugify(key, '_')]: trimmedValue };
|
||||
}, {});
|
||||
|
||||
const description = q('.model-facts-long', true);
|
||||
if (description) profile.description = description;
|
||||
const description = q('.model-facts-long', true);
|
||||
if (description) profile.description = description;
|
||||
|
||||
const aliases = qtx('.aka')?.split(/,\s*/);
|
||||
if (aliases) profile.aliases = aliases;
|
||||
const aliases = qtx('.aka')?.split(/,\s*/);
|
||||
if (aliases) profile.aliases = aliases;
|
||||
|
||||
if (bio.birth_place) profile.birthPlace = bio.birth_place;
|
||||
if (bio.nationality) profile.nationality = bio.nationality;
|
||||
if (bio.birth_place) profile.birthPlace = bio.birth_place;
|
||||
if (bio.nationality) profile.nationality = bio.nationality;
|
||||
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d+/)[0]);
|
||||
if (bio.height) profile.height = Number(bio.height.match(/^\d+/)[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d+/)[0]);
|
||||
if (bio.height) profile.height = Number(bio.height.match(/^\d+/)[0]);
|
||||
|
||||
if (bio.hair_color) profile.hair = bio.hair_color;
|
||||
if (bio.eye_color) profile.eye = bio.eye_color;
|
||||
if (bio.hair_color) profile.hair = bio.hair_color;
|
||||
if (bio.eye_color) profile.eye = bio.eye_color;
|
||||
|
||||
if (bio.tattoos) {
|
||||
profile.hasTattoos = true;
|
||||
profile.tattoos = bio.tattoos;
|
||||
}
|
||||
if (bio.tattoos) {
|
||||
profile.hasTattoos = true;
|
||||
profile.tattoos = bio.tattoos;
|
||||
}
|
||||
|
||||
if (bio.tattoos) {
|
||||
profile.hasTattoos = true;
|
||||
profile.tattoos = bio.tattoos;
|
||||
}
|
||||
if (bio.tattoos) {
|
||||
profile.hasTattoos = true;
|
||||
profile.tattoos = bio.tattoos;
|
||||
}
|
||||
|
||||
if (bio.piercings) {
|
||||
profile.hasPiercings = true;
|
||||
profile.piercings = bio.piercings;
|
||||
}
|
||||
if (bio.piercings) {
|
||||
profile.hasPiercings = true;
|
||||
profile.piercings = bio.piercings;
|
||||
}
|
||||
|
||||
profile.avatar = q('.img-pornstar img').dataset.src;
|
||||
profile.releases = scrapeLatest(html);
|
||||
profile.avatar = q('.img-pornstar img').dataset.src;
|
||||
profile.releases = scrapeLatest(html);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const { hostname } = new URL(site.url);
|
||||
const { hostname } = new URL(site.url);
|
||||
|
||||
if (hostname.match('private.com')) {
|
||||
const res = await bhttp.get(`${site.url}/${page}/`);
|
||||
if (hostname.match('private.com')) {
|
||||
const res = await bhttp.get(`${site.url}/${page}/`);
|
||||
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
const res = await bhttp.get(`${site.url}/scenes/${page}/`);
|
||||
const res = await bhttp.get(`${site.url}/scenes/${page}/`);
|
||||
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSearchSlug = slugify(actorName, '+');
|
||||
const url = `https://www.private.com/search.php?query=${actorSearchSlug}`;
|
||||
const modelRes = await geta(url, '.model h3 a');
|
||||
const actorSearchSlug = slugify(actorName, '+');
|
||||
const url = `https://www.private.com/search.php?query=${actorSearchSlug}`;
|
||||
const modelRes = await geta(url, '.model h3 a');
|
||||
|
||||
if (modelRes.ok) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const model = modelRes.items.find(({ text }) => slugify(text) === actorSlug);
|
||||
if (modelRes.ok) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const model = modelRes.items.find(({ text }) => slugify(text) === actorSlug);
|
||||
|
||||
if (model) {
|
||||
const res = await get(model.el.href);
|
||||
if (model) {
|
||||
const res = await get(model.el.href);
|
||||
|
||||
return res.ok ? scrapeProfile(res.item) : res.status;
|
||||
}
|
||||
}
|
||||
return res.ok ? scrapeProfile(res.item) : res.status;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
};
|
||||
|
||||
@@ -4,49 +4,49 @@ const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
|
||||
const {
|
||||
scrapeLatestX,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
scrapeLatestX,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
} = require('./mindgeek');
|
||||
|
||||
function scrapeLatestClassic(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const stateTag = $('script:contains("initialState")').html();
|
||||
const prefix = 'initialState = {';
|
||||
const prefixIndex = stateTag.indexOf('initialState = {');
|
||||
const suffix = '};';
|
||||
const stateString = stateTag.slice(prefixIndex + prefix.length - 1, stateTag.indexOf('};', prefixIndex) + suffix.length - 1);
|
||||
const data = JSON.parse(stateString);
|
||||
const stateTag = $('script:contains("initialState")').html();
|
||||
const prefix = 'initialState = {';
|
||||
const prefixIndex = stateTag.indexOf('initialState = {');
|
||||
const suffix = '};';
|
||||
const stateString = stateTag.slice(prefixIndex + prefix.length - 1, stateTag.indexOf('};', prefixIndex) + suffix.length - 1);
|
||||
const data = JSON.parse(stateString);
|
||||
|
||||
return Object.values(data.entities.releases).map(scene => scrapeLatestX(scene, site));
|
||||
return Object.values(data.entities.releases).map(scene => scrapeLatestX(scene, site));
|
||||
}
|
||||
|
||||
async function fetchClassic(site, page) {
|
||||
const res = await bhttp.get(`${site.url}/scenes?page=${page}`);
|
||||
const res = await bhttp.get(`${site.url}/scenes?page=${page}`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatestClassic(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatestClassic(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchLatestWrap(site, page = 1) {
|
||||
if (site.parameters?.classic) {
|
||||
return fetchClassic(site, page);
|
||||
}
|
||||
if (site.parameters?.classic) {
|
||||
return fetchClassic(site, page);
|
||||
}
|
||||
|
||||
return fetchLatest(site, page);
|
||||
return fetchLatest(site, page);
|
||||
}
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'realitykings');
|
||||
return fetchProfile(actorName, 'realitykings');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchLatestWrap,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest: fetchLatestWrap,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -7,255 +7,255 @@ const slugify = require('../utils/slugify');
|
||||
const { heightToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
function scrapePhotos(html) {
|
||||
const { qis } = ex(html, '#photos-page');
|
||||
const photos = qis('img');
|
||||
const { qis } = ex(html, '#photos-page');
|
||||
const photos = qis('img');
|
||||
|
||||
return photos.map(photo => [
|
||||
photo
|
||||
.replace('x_800', 'x_xl')
|
||||
.replace('_tn', ''),
|
||||
photo,
|
||||
]);
|
||||
return photos.map(photo => [
|
||||
photo
|
||||
.replace('x_800', 'x_xl')
|
||||
.replace('_tn', ''),
|
||||
photo,
|
||||
]);
|
||||
}
|
||||
|
||||
async function fetchPhotos(url) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapePhotos(res.body.toString(), url);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapePhotos(res.body.toString(), url);
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
function scrapeAll(html, site) {
|
||||
return exa(html, '.container .video, .container-fluid .video').map(({ q, qa, qd, ql }) => {
|
||||
const release = {};
|
||||
return exa(html, '.container .video, .container-fluid .video').map(({ q, qa, qd, ql }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = q('.title, .i-title', true);
|
||||
release.title = q('.title, .i-title', true);
|
||||
|
||||
const linkEl = q('a');
|
||||
const url = new URL(linkEl.href);
|
||||
release.url = `${url.origin}${url.pathname}`;
|
||||
const linkEl = q('a');
|
||||
const url = new URL(linkEl.href);
|
||||
release.url = `${url.origin}${url.pathname}`;
|
||||
|
||||
// this is a photo album, not a scene (used for profiles)
|
||||
if (/photos\//.test(url)) return null;
|
||||
// this is a photo album, not a scene (used for profiles)
|
||||
if (/photos\//.test(url)) return null;
|
||||
|
||||
[release.entryId] = url.pathname.split('/').slice(-2);
|
||||
[release.entryId] = url.pathname.split('/').slice(-2);
|
||||
|
||||
release.date = qd('.i-date', 'MMM DD', /\w+ \d{1,2}$/)
|
||||
release.date = qd('.i-date', 'MMM DD', /\w+ \d{1,2}$/)
|
||||
|| qd('.dt-box', 'MMM.DD YYYY');
|
||||
release.actors = site?.parameters?.actors || qa('.model, .i-model', true);
|
||||
release.duration = ql('.i-amount, .amount');
|
||||
release.actors = site?.parameters?.actors || qa('.model, .i-model', true);
|
||||
release.duration = ql('.i-amount, .amount');
|
||||
|
||||
const posterEl = q('.item-img img');
|
||||
const posterEl = q('.item-img img');
|
||||
|
||||
if (posterEl) {
|
||||
release.poster = `https:${posterEl.src}`;
|
||||
}
|
||||
if (posterEl) {
|
||||
release.poster = `https:${posterEl.src}`;
|
||||
}
|
||||
|
||||
if (posterEl?.dataset.gifPreview) {
|
||||
release.teaser = {
|
||||
src: `https:${posterEl.dataset.gifPreview}`,
|
||||
};
|
||||
}
|
||||
if (posterEl?.dataset.gifPreview) {
|
||||
release.teaser = {
|
||||
src: `https:${posterEl.dataset.gifPreview}`,
|
||||
};
|
||||
}
|
||||
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
const { qu } = ex(html, '#videos-page, #content');
|
||||
const release = {};
|
||||
const { qu } = ex(html, '#videos-page, #content');
|
||||
const release = {};
|
||||
|
||||
[release.entryId] = new URL(url).pathname.split('/').slice(-2);
|
||||
[release.entryId] = new URL(url).pathname.split('/').slice(-2);
|
||||
|
||||
release.title = qu.q('h2.text-uppercase, h2.title, #breadcrumb-top + h1', true)
|
||||
release.title = qu.q('h2.text-uppercase, h2.title, #breadcrumb-top + h1', true)
|
||||
|| qu.q('h1.m-title', true)?.split(/»|\//).slice(-1)[0].trim();
|
||||
release.description = qu.text('.p-desc, .desc');
|
||||
release.description = qu.text('.p-desc, .desc');
|
||||
|
||||
release.actors = qu.all('.value a[href*=models], .value a[href*=performer], .value a[href*=teen-babes]', true);
|
||||
release.actors = qu.all('.value a[href*=models], .value a[href*=performer], .value a[href*=teen-babes]', true);
|
||||
|
||||
if (release.actors.length === 0) {
|
||||
const actorEl = qu.all('.stat').find(stat => /Featuring/.test(stat.textContent));
|
||||
const actorString = qu.text(actorEl);
|
||||
if (release.actors.length === 0) {
|
||||
const actorEl = qu.all('.stat').find(stat => /Featuring/.test(stat.textContent));
|
||||
const actorString = qu.text(actorEl);
|
||||
|
||||
release.actors = actorString?.split(/,\band\b|,/g).map(actor => actor.trim()) || [];
|
||||
}
|
||||
release.actors = actorString?.split(/,\band\b|,/g).map(actor => actor.trim()) || [];
|
||||
}
|
||||
|
||||
if (release.actors.length === 0 && site.parameters?.actors) release.actors = site.parameters.actors;
|
||||
if (release.actors.length === 0 && site.parameters?.actors) release.actors = site.parameters.actors;
|
||||
|
||||
release.tags = qu.all('a[href*=tag]', true);
|
||||
release.tags = qu.all('a[href*=tag]', true);
|
||||
|
||||
const dateEl = qu.all('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent));
|
||||
release.date = qu.date(dateEl, null, 'MMMM Do, YYYY')
|
||||
const dateEl = qu.all('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent));
|
||||
release.date = qu.date(dateEl, null, 'MMMM Do, YYYY')
|
||||
|| qu.date('.date', 'MMMM Do, YYYY', /\w+ \d{1,2}\w+, \d{4}/)
|
||||
|| qu.date('.info .holder', 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
|
||||
|
||||
const durationEl = qu.all('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
|
||||
release.duration = qu.dur(durationEl);
|
||||
const durationEl = qu.all('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
|
||||
release.duration = qu.dur(durationEl);
|
||||
|
||||
release.poster = qu.poster('video') || qu.img('.flowplayer img') || qu.img('img'); // _800.jpg is larger than _xl.jpg in landscape
|
||||
const photosUrl = qu.url('.stat a[href*=photos]');
|
||||
release.poster = qu.poster('video') || qu.img('.flowplayer img') || qu.img('img'); // _800.jpg is larger than _xl.jpg in landscape
|
||||
const photosUrl = qu.url('.stat a[href*=photos]');
|
||||
|
||||
if (photosUrl) {
|
||||
release.photos = await fetchPhotos(photosUrl);
|
||||
} else {
|
||||
release.photos = qu.imgs('img[src*=ThumbNails], .p-photos .tn img').map(photo => [
|
||||
photo.replace('_tn', ''),
|
||||
photo,
|
||||
]);
|
||||
}
|
||||
if (photosUrl) {
|
||||
release.photos = await fetchPhotos(photosUrl);
|
||||
} else {
|
||||
release.photos = qu.imgs('img[src*=ThumbNails], .p-photos .tn img').map(photo => [
|
||||
photo.replace('_tn', ''),
|
||||
photo,
|
||||
]);
|
||||
}
|
||||
|
||||
const trailers = qu.all('a[href*=Trailers]');
|
||||
const trailers = qu.all('a[href*=Trailers]');
|
||||
|
||||
if (trailers) {
|
||||
release.trailer = trailers.map((trailer) => {
|
||||
const src = `https:${trailer.href}`;
|
||||
const format = trailer.textContent.trim().match(/^\w+/)[0].toLowerCase();
|
||||
const quality = parseInt(trailer.textContent.trim().match(/\d+([a-zA-Z]+)?$/)[0], 10);
|
||||
if (trailers) {
|
||||
release.trailer = trailers.map((trailer) => {
|
||||
const src = `https:${trailer.href}`;
|
||||
const format = trailer.textContent.trim().match(/^\w+/)[0].toLowerCase();
|
||||
const quality = parseInt(trailer.textContent.trim().match(/\d+([a-zA-Z]+)?$/)[0], 10);
|
||||
|
||||
return format === 'mp4' ? { src, quality } : null;
|
||||
}).filter(Boolean);
|
||||
}
|
||||
return format === 'mp4' ? { src, quality } : null;
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
const stars = qu.q('.rate-box').dataset.score;
|
||||
if (stars) release.rating = { stars };
|
||||
const stars = qu.q('.rate-box').dataset.score;
|
||||
if (stars) release.rating = { stars };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeModels(html, actorName) {
|
||||
const { qa } = ex(html);
|
||||
const model = qa('.model a').find(link => link.title === actorName);
|
||||
const { qa } = ex(html);
|
||||
const model = qa('.model a').find(link => link.title === actorName);
|
||||
|
||||
return model?.href || null;
|
||||
return model?.href || null;
|
||||
}
|
||||
|
||||
async function fetchActorReleases(url, accReleases = []) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
if (res.ok) {
|
||||
const releases = accReleases.concat(scrapeAll(res.item.document.body.outerHTML));
|
||||
const nextPage = res.item.qu.url('.next-pg');
|
||||
if (res.ok) {
|
||||
const releases = accReleases.concat(scrapeAll(res.item.document.body.outerHTML));
|
||||
const nextPage = res.item.qu.url('.next-pg');
|
||||
|
||||
if (nextPage && new URL(nextPage).searchParams.has('page')) { // last page has 'next' button linking to join page
|
||||
return fetchActorReleases(nextPage, releases);
|
||||
}
|
||||
if (nextPage && new URL(nextPage).searchParams.has('page')) { // last page has 'next' button linking to join page
|
||||
return fetchActorReleases(nextPage, releases);
|
||||
}
|
||||
|
||||
return releases;
|
||||
}
|
||||
return releases;
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeProfile(html, actorUrl, withReleases) {
|
||||
const { q, qa, qi } = ex(html, '#model-page');
|
||||
const profile = { gender: 'female' };
|
||||
const { q, qa, qi } = ex(html, '#model-page');
|
||||
const profile = { gender: 'female' };
|
||||
|
||||
const bio = qa('.stat').reduce((acc, el) => {
|
||||
const prop = q(el, '.label', true).slice(0, -1);
|
||||
const key = slugify(prop, '_');
|
||||
const value = q(el, '.value', true);
|
||||
const bio = qa('.stat').reduce((acc, el) => {
|
||||
const prop = q(el, '.label', true).slice(0, -1);
|
||||
const key = slugify(prop, '_');
|
||||
const value = q(el, '.value', true);
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[key]: value,
|
||||
};
|
||||
}, {});
|
||||
return {
|
||||
...acc,
|
||||
[key]: value,
|
||||
};
|
||||
}, {});
|
||||
|
||||
if (bio.location) profile.residencePlace = bio.location.replace('Czech Repulic', 'Czech Republic'); // see Laura Lion
|
||||
if (bio.location) profile.residencePlace = bio.location.replace('Czech Repulic', 'Czech Republic'); // see Laura Lion
|
||||
|
||||
if (bio.birthday) {
|
||||
const birthMonth = bio.birthday.match(/^\w+/)[0].toLowerCase();
|
||||
const [birthDay] = bio.birthday.match(/\d+/);
|
||||
if (bio.birthday) {
|
||||
const birthMonth = bio.birthday.match(/^\w+/)[0].toLowerCase();
|
||||
const [birthDay] = bio.birthday.match(/\d+/);
|
||||
|
||||
profile.birthday = [birthMonth, birthDay]; // currently unused, not to be confused with birthdate
|
||||
}
|
||||
profile.birthday = [birthMonth, birthDay]; // currently unused, not to be confused with birthdate
|
||||
}
|
||||
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
if (bio.hair_color) profile.hair = bio.hair_color;
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
if (bio.hair_color) profile.hair = bio.hair_color;
|
||||
|
||||
if (bio.height) profile.height = heightToCm(bio.height);
|
||||
if (bio.weight) profile.weight = lbsToKg(bio.weight);
|
||||
if (bio.height) profile.height = heightToCm(bio.height);
|
||||
if (bio.weight) profile.weight = lbsToKg(bio.weight);
|
||||
|
||||
if (bio.bra_size) profile.bust = bio.bra_size;
|
||||
if (bio.measurements) [, profile.waist, profile.hip] = bio.measurements.split('-');
|
||||
if (bio.bra_size) profile.bust = bio.bra_size;
|
||||
if (bio.measurements) [, profile.waist, profile.hip] = bio.measurements.split('-');
|
||||
|
||||
if (bio.occupation) profile.occupation = bio.occupation;
|
||||
if (bio.occupation) profile.occupation = bio.occupation;
|
||||
|
||||
const avatar = qi('img');
|
||||
if (avatar) profile.avatar = avatar;
|
||||
const avatar = qi('img');
|
||||
if (avatar) profile.avatar = avatar;
|
||||
|
||||
if (withReleases) {
|
||||
const { origin, pathname } = new URL(actorUrl);
|
||||
profile.releases = await fetchActorReleases(`${origin}${pathname}/scenes?page=1`);
|
||||
}
|
||||
if (withReleases) {
|
||||
const { origin, pathname } = new URL(actorUrl);
|
||||
profile.releases = await fetchActorReleases(`${origin}${pathname}/scenes?page=1`);
|
||||
}
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const latestPath = site.parameters?.path || '/big-boob-videos';
|
||||
const url = `${site.url}${latestPath}?page=${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
const latestPath = site.parameters?.path || '/big-boob-videos';
|
||||
const url = `${site.url}${latestPath}?page=${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeAll(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeAll(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return res.statusCode;
|
||||
return res.statusCode;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug, site, include, page = 1, source = 0) {
|
||||
const letter = actorName.charAt(0).toUpperCase();
|
||||
const letter = actorName.charAt(0).toUpperCase();
|
||||
|
||||
const sources = [
|
||||
`https://www.scoreland.com/big-boob-models/browse/${letter}/?page=${page}`,
|
||||
`https://www.50plusmilfs.com/xxx-milf-models/browse/${letter}/?page=${page}`,
|
||||
];
|
||||
const sources = [
|
||||
`https://www.scoreland.com/big-boob-models/browse/${letter}/?page=${page}`,
|
||||
`https://www.50plusmilfs.com/xxx-milf-models/browse/${letter}/?page=${page}`,
|
||||
];
|
||||
|
||||
const url = sources[source];
|
||||
const url = sources[source];
|
||||
|
||||
const res = await bhttp.get(url, {
|
||||
followRedirects: false,
|
||||
});
|
||||
const res = await bhttp.get(url, {
|
||||
followRedirects: false,
|
||||
});
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const actorUrl = scrapeModels(res.body.toString(), actorName);
|
||||
if (res.statusCode === 200) {
|
||||
const actorUrl = scrapeModels(res.body.toString(), actorName);
|
||||
|
||||
if (actorUrl) {
|
||||
const actorRes = await bhttp.get(actorUrl);
|
||||
if (actorUrl) {
|
||||
const actorRes = await bhttp.get(actorUrl);
|
||||
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString(), actorUrl, include.scenes);
|
||||
}
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString(), actorUrl, include.scenes);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
return fetchProfile(actorName, scraperSlug, site, include, page + 1, source);
|
||||
}
|
||||
return fetchProfile(actorName, scraperSlug, site, include, page + 1, source);
|
||||
}
|
||||
|
||||
if (sources[source + 1]) {
|
||||
return fetchProfile(actorName, scraperSlug, site, include, 1, source + 1);
|
||||
}
|
||||
if (sources[source + 1]) {
|
||||
return fetchProfile(actorName, scraperSlug, site, include, 1, source + 1);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -65,143 +65,143 @@ const freeones = require('./freeones');
|
||||
// const freeoneslegacy = require('./freeones_legacy');
|
||||
|
||||
module.exports = {
|
||||
releases: {
|
||||
'21naturals': naturals,
|
||||
'21sextreme': sextreme,
|
||||
'21sextury': sextury,
|
||||
adulttime,
|
||||
amateurallure,
|
||||
assylum,
|
||||
aziani,
|
||||
babes,
|
||||
bamvisions,
|
||||
bang,
|
||||
bangbros,
|
||||
blowpass,
|
||||
brazzers,
|
||||
burningangel,
|
||||
cherrypimps,
|
||||
ddfnetwork,
|
||||
digitalplayground,
|
||||
dogfart,
|
||||
dogfartnetwork: dogfart,
|
||||
evilangel,
|
||||
fakehub,
|
||||
famedigital,
|
||||
fantasymassage,
|
||||
fullpornnetwork,
|
||||
girlsway,
|
||||
girlgirl: julesjordan,
|
||||
hussiepass: hush,
|
||||
hushpass: hush,
|
||||
insex,
|
||||
interracialpass: hush,
|
||||
jayrock,
|
||||
jesseloadsmonsterfacials,
|
||||
julesjordan,
|
||||
kellymadison,
|
||||
kink,
|
||||
legalporno,
|
||||
men,
|
||||
metrohd,
|
||||
mikeadriano,
|
||||
milehighmedia,
|
||||
mindgeek,
|
||||
mofos,
|
||||
naughtyamerica,
|
||||
newsensations,
|
||||
nubiles,
|
||||
perfectgonzo,
|
||||
pervcity,
|
||||
pimpxxx: cherrypimps,
|
||||
pornpros: whalemember,
|
||||
private: privateNetwork,
|
||||
puretaboo,
|
||||
realitykings,
|
||||
score,
|
||||
sexyhub: mindgeek,
|
||||
swallowsalon: julesjordan,
|
||||
teamskeet,
|
||||
twistys,
|
||||
vivid,
|
||||
vixen,
|
||||
vogov,
|
||||
whalemember,
|
||||
wicked,
|
||||
xempire,
|
||||
},
|
||||
actors: {
|
||||
'21sextury': sextury,
|
||||
analbbc: fullpornnetwork,
|
||||
analized: fullpornnetwork,
|
||||
analviolation: fullpornnetwork,
|
||||
anilos: nubiles,
|
||||
aziani,
|
||||
babes,
|
||||
baddaddypov: fullpornnetwork,
|
||||
bamvisions,
|
||||
bangbros,
|
||||
blacked: vixen,
|
||||
blackedraw: vixen,
|
||||
blowpass,
|
||||
boobpedia,
|
||||
brattysis: nubiles,
|
||||
brazzers,
|
||||
burningangel,
|
||||
cherrypimps,
|
||||
ddfnetwork,
|
||||
deeper: vixen,
|
||||
deeplush: nubiles,
|
||||
digitalplayground,
|
||||
dtfsluts: fullpornnetwork,
|
||||
evilangel,
|
||||
eyeontheguy: hush,
|
||||
fakehub,
|
||||
famedigital,
|
||||
freeones,
|
||||
gangbangcreampie: aziani,
|
||||
girlfaction: fullpornnetwork,
|
||||
gloryholesecrets: aziani,
|
||||
hergape: fullpornnetwork,
|
||||
homemadeanalwhores: fullpornnetwork,
|
||||
hotcrazymess: nubiles,
|
||||
hushpass: hush,
|
||||
hussiepass: hush,
|
||||
iconmale,
|
||||
interracialpass: hush,
|
||||
interracialpovs: hush,
|
||||
jamesdeen: fullpornnetwork,
|
||||
julesjordan,
|
||||
kellymadison,
|
||||
legalporno,
|
||||
men,
|
||||
metrohd,
|
||||
milehighmedia,
|
||||
mofos,
|
||||
mugfucked: fullpornnetwork,
|
||||
naughtyamerica,
|
||||
nfbusty: nubiles,
|
||||
nubilefilms: nubiles,
|
||||
nubiles,
|
||||
nubilesporn: nubiles,
|
||||
onlyprince: fullpornnetwork,
|
||||
pervertgallery: fullpornnetwork,
|
||||
pimpxxx: cherrypimps,
|
||||
pornhub,
|
||||
povperverts: fullpornnetwork,
|
||||
povpornstars: hush,
|
||||
private: privateNetwork,
|
||||
realitykings,
|
||||
score,
|
||||
seehimfuck: hush,
|
||||
sexyhub: mindgeek,
|
||||
thatsitcomshow: nubiles,
|
||||
transangels,
|
||||
tushy: vixen,
|
||||
tushyraw: vixen,
|
||||
twistys,
|
||||
vixen,
|
||||
wicked,
|
||||
xempire,
|
||||
},
|
||||
releases: {
|
||||
'21naturals': naturals,
|
||||
'21sextreme': sextreme,
|
||||
'21sextury': sextury,
|
||||
adulttime,
|
||||
amateurallure,
|
||||
assylum,
|
||||
aziani,
|
||||
babes,
|
||||
bamvisions,
|
||||
bang,
|
||||
bangbros,
|
||||
blowpass,
|
||||
brazzers,
|
||||
burningangel,
|
||||
cherrypimps,
|
||||
ddfnetwork,
|
||||
digitalplayground,
|
||||
dogfart,
|
||||
dogfartnetwork: dogfart,
|
||||
evilangel,
|
||||
fakehub,
|
||||
famedigital,
|
||||
fantasymassage,
|
||||
fullpornnetwork,
|
||||
girlsway,
|
||||
girlgirl: julesjordan,
|
||||
hussiepass: hush,
|
||||
hushpass: hush,
|
||||
insex,
|
||||
interracialpass: hush,
|
||||
jayrock,
|
||||
jesseloadsmonsterfacials,
|
||||
julesjordan,
|
||||
kellymadison,
|
||||
kink,
|
||||
legalporno,
|
||||
men,
|
||||
metrohd,
|
||||
mikeadriano,
|
||||
milehighmedia,
|
||||
mindgeek,
|
||||
mofos,
|
||||
naughtyamerica,
|
||||
newsensations,
|
||||
nubiles,
|
||||
perfectgonzo,
|
||||
pervcity,
|
||||
pimpxxx: cherrypimps,
|
||||
pornpros: whalemember,
|
||||
private: privateNetwork,
|
||||
puretaboo,
|
||||
realitykings,
|
||||
score,
|
||||
sexyhub: mindgeek,
|
||||
swallowsalon: julesjordan,
|
||||
teamskeet,
|
||||
twistys,
|
||||
vivid,
|
||||
vixen,
|
||||
vogov,
|
||||
whalemember,
|
||||
wicked,
|
||||
xempire,
|
||||
},
|
||||
actors: {
|
||||
'21sextury': sextury,
|
||||
analbbc: fullpornnetwork,
|
||||
analized: fullpornnetwork,
|
||||
analviolation: fullpornnetwork,
|
||||
anilos: nubiles,
|
||||
aziani,
|
||||
babes,
|
||||
baddaddypov: fullpornnetwork,
|
||||
bamvisions,
|
||||
bangbros,
|
||||
blacked: vixen,
|
||||
blackedraw: vixen,
|
||||
blowpass,
|
||||
boobpedia,
|
||||
brattysis: nubiles,
|
||||
brazzers,
|
||||
burningangel,
|
||||
cherrypimps,
|
||||
ddfnetwork,
|
||||
deeper: vixen,
|
||||
deeplush: nubiles,
|
||||
digitalplayground,
|
||||
dtfsluts: fullpornnetwork,
|
||||
evilangel,
|
||||
eyeontheguy: hush,
|
||||
fakehub,
|
||||
famedigital,
|
||||
freeones,
|
||||
gangbangcreampie: aziani,
|
||||
girlfaction: fullpornnetwork,
|
||||
gloryholesecrets: aziani,
|
||||
hergape: fullpornnetwork,
|
||||
homemadeanalwhores: fullpornnetwork,
|
||||
hotcrazymess: nubiles,
|
||||
hushpass: hush,
|
||||
hussiepass: hush,
|
||||
iconmale,
|
||||
interracialpass: hush,
|
||||
interracialpovs: hush,
|
||||
jamesdeen: fullpornnetwork,
|
||||
julesjordan,
|
||||
kellymadison,
|
||||
legalporno,
|
||||
men,
|
||||
metrohd,
|
||||
milehighmedia,
|
||||
mofos,
|
||||
mugfucked: fullpornnetwork,
|
||||
naughtyamerica,
|
||||
nfbusty: nubiles,
|
||||
nubilefilms: nubiles,
|
||||
nubiles,
|
||||
nubilesporn: nubiles,
|
||||
onlyprince: fullpornnetwork,
|
||||
pervertgallery: fullpornnetwork,
|
||||
pimpxxx: cherrypimps,
|
||||
pornhub,
|
||||
povperverts: fullpornnetwork,
|
||||
povpornstars: hush,
|
||||
private: privateNetwork,
|
||||
realitykings,
|
||||
score,
|
||||
seehimfuck: hush,
|
||||
sexyhub: mindgeek,
|
||||
thatsitcomshow: nubiles,
|
||||
transangels,
|
||||
tushy: vixen,
|
||||
tushyraw: vixen,
|
||||
twistys,
|
||||
vixen,
|
||||
wicked,
|
||||
xempire,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -5,176 +5,176 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
function extractTitle(pathname) {
|
||||
return pathname
|
||||
.split('/')
|
||||
.slice(-2)[0]
|
||||
.split('_')
|
||||
.map(seg => `${seg.charAt(0).toUpperCase()}${seg.slice(1)}`)
|
||||
.join(' ');
|
||||
return pathname
|
||||
.split('/')
|
||||
.slice(-2)[0]
|
||||
.split('_')
|
||||
.map(seg => `${seg.charAt(0).toUpperCase()}${seg.slice(1)}`)
|
||||
.join(' ');
|
||||
}
|
||||
|
||||
function extractActors(str) {
|
||||
return str
|
||||
.split(/,|\band\b/ig)
|
||||
.filter(actor => !/\.{3}/.test(actor))
|
||||
.map(actor => actor.trim())
|
||||
.filter(actor => actor.length > 0);
|
||||
return str
|
||||
.split(/,|\band\b/ig)
|
||||
.filter(actor => !/\.{3}/.test(actor))
|
||||
.map(actor => actor.trim())
|
||||
.filter(actor => actor.length > 0);
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const scenes = Array.from(document.querySelectorAll('#updatesList li.grey, #updatesList li.white'));
|
||||
const scenes = Array.from(document.querySelectorAll('#updatesList li.grey, #updatesList li.white'));
|
||||
|
||||
return scenes.map((scene) => {
|
||||
const release = { site };
|
||||
return scenes.map((scene) => {
|
||||
const release = { site };
|
||||
|
||||
const link = scene.querySelector('.info a');
|
||||
const poster = scene.querySelector('img');
|
||||
const { pathname } = new URL(link);
|
||||
const link = scene.querySelector('.info a');
|
||||
const poster = scene.querySelector('img');
|
||||
const { pathname } = new URL(link);
|
||||
|
||||
[release.entryId] = poster.id.match(/\d+/);
|
||||
[release.entryId] = poster.id.match(/\d+/);
|
||||
|
||||
release.url = `https://www.teamskeet.com${pathname}`;
|
||||
release.title = extractTitle(pathname);
|
||||
release.url = `https://www.teamskeet.com${pathname}`;
|
||||
release.title = extractTitle(pathname);
|
||||
|
||||
release.date = moment.utc(scene.querySelector('strong').textContent, 'MM/DD/YYYY').toDate();
|
||||
release.date = moment.utc(scene.querySelector('strong').textContent, 'MM/DD/YYYY').toDate();
|
||||
|
||||
const photos = Array.from({ length: 5 }, (_value, index) => poster.dataset.original.replace(/\d+.jpg/, `${String(index + 1).padStart(2, '0')}.jpg`));
|
||||
[release.poster] = photos;
|
||||
release.photos = photos.slice(1);
|
||||
const photos = Array.from({ length: 5 }, (_value, index) => poster.dataset.original.replace(/\d+.jpg/, `${String(index + 1).padStart(2, '0')}.jpg`));
|
||||
[release.poster] = photos;
|
||||
release.photos = photos.slice(1);
|
||||
|
||||
const actors = scene.querySelector('div span[rel="test"]').textContent;
|
||||
release.actors = extractActors(actors);
|
||||
const actors = scene.querySelector('div span[rel="test"]').textContent;
|
||||
release.actors = extractActors(actors);
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene(html, site, url) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { site };
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { site };
|
||||
|
||||
release.entryId = document.querySelector('#story-and-tags .scene_rater').attributes.rel.value;
|
||||
release.description = document.querySelector('#story-and-tags td:nth-child(2) div').textContent;
|
||||
const [actors, title, channel] = document.querySelector('title').textContent.split('|').map(item => item.trim());
|
||||
release.entryId = document.querySelector('#story-and-tags .scene_rater').attributes.rel.value;
|
||||
release.description = document.querySelector('#story-and-tags td:nth-child(2) div').textContent;
|
||||
const [actors, title, channel] = document.querySelector('title').textContent.split('|').map(item => item.trim());
|
||||
|
||||
release.url = url;
|
||||
release.title = title;
|
||||
release.actors = extractActors(actors);
|
||||
release.channel = channel.toLowerCase();
|
||||
release.tags = Array.from(document.querySelectorAll('#story-and-tags tr:nth-child(2) a'), el => el.rel);
|
||||
release.url = url;
|
||||
release.title = title;
|
||||
release.actors = extractActors(actors);
|
||||
release.channel = channel.toLowerCase();
|
||||
release.tags = Array.from(document.querySelectorAll('#story-and-tags tr:nth-child(2) a'), el => el.rel);
|
||||
|
||||
const date = document.querySelector('h3 ~ div:nth-child(4), h3 ~ div div.gray:not(.scene_rater)').textContent.split(':')[1].trim();
|
||||
release.date = moment.utc(date, 'MMMM Do, YYYY').toDate();
|
||||
const date = document.querySelector('h3 ~ div:nth-child(4), h3 ~ div div.gray:not(.scene_rater)').textContent.split(':')[1].trim();
|
||||
release.date = moment.utc(date, 'MMMM Do, YYYY').toDate();
|
||||
|
||||
const { poster } = document.querySelector('video');
|
||||
if (poster && !/gen/.test(poster)) release.poster = [poster.replace('low', 'hi'), poster];
|
||||
const { poster } = document.querySelector('video');
|
||||
if (poster && !/gen/.test(poster)) release.poster = [poster.replace('low', 'hi'), poster];
|
||||
|
||||
const siteId = document.querySelector('#story-and-tags img').src.match(/\w+.jpg/)[0].replace('.jpg', '');
|
||||
const actorsSlug = document.querySelector('h3 a').href.split('/').slice(-2)[0];
|
||||
const siteId = document.querySelector('#story-and-tags img').src.match(/\w+.jpg/)[0].replace('.jpg', '');
|
||||
const actorsSlug = document.querySelector('h3 a').href.split('/').slice(-2)[0];
|
||||
|
||||
release.photos = Array.from({ length: 5 }, (value, index) => `https://images.psmcdn.net/teamskeet/${siteId}/${actorsSlug}/shared/scenes/new/${String(index + 1).padStart(2, '0')}.jpg`);
|
||||
release.photos = Array.from({ length: 5 }, (value, index) => `https://images.psmcdn.net/teamskeet/${siteId}/${actorsSlug}/shared/scenes/new/${String(index + 1).padStart(2, '0')}.jpg`);
|
||||
|
||||
const trailer = document.querySelector('div.right.gray a').href;
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
const trailer = document.querySelector('div.right.gray a').href;
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeSceneA(html, site, sceneX, url) {
|
||||
const scene = sceneX || new JSDOM(html).window.document;
|
||||
const release = { site };
|
||||
const scene = sceneX || new JSDOM(html).window.document;
|
||||
const release = { site };
|
||||
|
||||
release.description = scene.querySelector('.scene-story').textContent.replace('...read more', '...').trim();
|
||||
release.description = scene.querySelector('.scene-story').textContent.replace('...read more', '...').trim();
|
||||
|
||||
release.date = moment.utc(scene.querySelector('.scene-date').textContent, 'MM/DD/YYYY').toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.starring span'), el => extractActors(el.textContent)).flat();
|
||||
release.date = moment.utc(scene.querySelector('.scene-date').textContent, 'MM/DD/YYYY').toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.starring span'), el => extractActors(el.textContent)).flat();
|
||||
|
||||
const durationString = scene.querySelector('.time').textContent.trim();
|
||||
const duration = ['00'].concat(durationString.split(':')).slice(-3).join(':'); // ensure hh:mm:ss
|
||||
release.duration = moment.duration(duration).asSeconds();
|
||||
const durationString = scene.querySelector('.time').textContent.trim();
|
||||
const duration = ['00'].concat(durationString.split(':')).slice(-3).join(':'); // ensure hh:mm:ss
|
||||
release.duration = moment.duration(duration).asSeconds();
|
||||
|
||||
if (sceneX) {
|
||||
const titleEl = scene.querySelector(':scope > a');
|
||||
if (sceneX) {
|
||||
const titleEl = scene.querySelector(':scope > a');
|
||||
|
||||
release.url = titleEl.href;
|
||||
release.entryId = titleEl.id;
|
||||
release.title = titleEl.title;
|
||||
release.url = titleEl.href;
|
||||
release.entryId = titleEl.id;
|
||||
release.title = titleEl.title;
|
||||
|
||||
const [poster, ...photos] = Array.from(scene.querySelectorAll('.scene img'), el => el.src);
|
||||
release.poster = [poster.replace('bio_big', 'video'), poster];
|
||||
release.photos = photos;
|
||||
}
|
||||
const [poster, ...photos] = Array.from(scene.querySelectorAll('.scene img'), el => el.src);
|
||||
release.poster = [poster.replace('bio_big', 'video'), poster];
|
||||
release.photos = photos;
|
||||
}
|
||||
|
||||
if (!sceneX) {
|
||||
release.title = scene.querySelector('.title span').textContent;
|
||||
release.url = url;
|
||||
if (!sceneX) {
|
||||
release.title = scene.querySelector('.title span').textContent;
|
||||
release.url = url;
|
||||
|
||||
release.poster = scene.querySelector('video').poster;
|
||||
release.photos = [release.poster.replace('video', 'bio_small'), release.poster.replace('video', 'bio_small2')];
|
||||
}
|
||||
release.poster = scene.querySelector('video').poster;
|
||||
release.photos = [release.poster.replace('video', 'bio_small'), release.poster.replace('video', 'bio_small2')];
|
||||
}
|
||||
|
||||
const [, entryIdA, entryIdB] = new URL(release.url).pathname.split('/');
|
||||
release.entryId = entryIdA === 'scenes' ? entryIdB : entryIdA;
|
||||
const [, entryIdA, entryIdB] = new URL(release.url).pathname.split('/');
|
||||
release.entryId = entryIdA === 'scenes' ? entryIdB : entryIdA;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeLatestA(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const scenes = Array.from(document.querySelectorAll('.scenewrapper'));
|
||||
const scenes = Array.from(document.querySelectorAll('.scenewrapper'));
|
||||
|
||||
return scenes.map(scene => scrapeSceneA(null, site, scene));
|
||||
return scenes.map(scene => scrapeSceneA(null, site, scene));
|
||||
}
|
||||
|
||||
async function fetchLatestTeamSkeet(site, page = 1) {
|
||||
const url = `https://www.teamskeet.com/t1/updates/load?fltrs[site]=${site.parameters.id}&page=${page}&view=newest&fltrs[time]=ALL&order=DESC`;
|
||||
const res = await bhttp.get(url);
|
||||
const url = `https://www.teamskeet.com/t1/updates/load?fltrs[site]=${site.parameters.id}&page=${page}&view=newest&fltrs[time]=ALL&order=DESC`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchLatestA(site) {
|
||||
const url = `${site.url}/scenes`;
|
||||
const res = await bhttp.get(url);
|
||||
const url = `${site.url}/scenes`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatestA(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatestA(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
if (site.parameters.id) {
|
||||
return fetchLatestTeamSkeet(site, page);
|
||||
}
|
||||
if (site.parameters.id) {
|
||||
return fetchLatestTeamSkeet(site, page);
|
||||
}
|
||||
|
||||
if (site.parameters.scraper === 'A') {
|
||||
return fetchLatestA(site, page);
|
||||
}
|
||||
if (site.parameters.scraper === 'A') {
|
||||
return fetchLatestA(site, page);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const session = bhttp.session(); // resolve redirects
|
||||
const res = await session.get(url);
|
||||
const session = bhttp.session(); // resolve redirects
|
||||
const res = await session.get(url);
|
||||
|
||||
if (site.parameters?.scraper === 'A') {
|
||||
return scrapeSceneA(res.body.toString(), site, null, url);
|
||||
}
|
||||
if (site.parameters?.scraper === 'A') {
|
||||
return scrapeSceneA(res.body.toString(), site, null, url);
|
||||
}
|
||||
|
||||
return scrapeScene(res.body.toString(), site, url);
|
||||
return scrapeScene(res.body.toString(), site, url);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
const { fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'transangels');
|
||||
return fetchProfile(actorName, 'transangels');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchProfile: networkFetchProfile,
|
||||
};
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'twistys');
|
||||
return fetchProfile(actorName, 'twistys');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -8,128 +8,128 @@ const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = requir
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeLatestNative(scenes, site) {
|
||||
return scenes.map((scene) => {
|
||||
const release = {};
|
||||
return scenes.map((scene) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = scene.id;
|
||||
release.url = `${site.url}${scene.url}`;
|
||||
release.entryId = scene.id;
|
||||
release.url = `${site.url}${scene.url}`;
|
||||
|
||||
release.title = scene.name;
|
||||
release.date = ed(scene.release_date, 'YYYY-MM-DD');
|
||||
release.duration = parseInt(scene.runtime, 10) * 60;
|
||||
release.title = scene.name;
|
||||
release.date = ed(scene.release_date, 'YYYY-MM-DD');
|
||||
release.duration = parseInt(scene.runtime, 10) * 60;
|
||||
|
||||
release.actors = scene.cast?.map(actor => ({
|
||||
name: actor.stagename,
|
||||
gender: actor.gender.toLowerCase(),
|
||||
avatar: actor.placard,
|
||||
})) || [];
|
||||
release.actors = scene.cast?.map(actor => ({
|
||||
name: actor.stagename,
|
||||
gender: actor.gender.toLowerCase(),
|
||||
avatar: actor.placard,
|
||||
})) || [];
|
||||
|
||||
release.stars = Number(scene.rating);
|
||||
release.poster = scene.placard_800 || scene.placard;
|
||||
release.stars = Number(scene.rating);
|
||||
release.poster = scene.placard_800 || scene.placard;
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeSceneNative({ html, q, qa }, url, _site) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
release.entryId = new URL(url).pathname.split('/')[2]; // eslint-disable-line prefer-destructuring
|
||||
release.entryId = new URL(url).pathname.split('/')[2]; // eslint-disable-line prefer-destructuring
|
||||
|
||||
release.title = q('.scene-h2-heading', true);
|
||||
release.description = q('.indie-model-p', true);
|
||||
release.title = q('.scene-h2-heading', true);
|
||||
release.description = q('.indie-model-p', true);
|
||||
|
||||
const dateString = qa('h5').find(el => /Released/.test(el.textContent)).textContent;
|
||||
release.date = ed(dateString, 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
const dateString = qa('h5').find(el => /Released/.test(el.textContent)).textContent;
|
||||
release.date = ed(dateString, 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
|
||||
const duration = qa('h5').find(el => /Runtime/.test(el.textContent)).textContent;
|
||||
const [hours, minutes] = duration.match(/\d+/g);
|
||||
const duration = qa('h5').find(el => /Runtime/.test(el.textContent)).textContent;
|
||||
const [hours, minutes] = duration.match(/\d+/g);
|
||||
|
||||
if (minutes) release.duration = (hours * 3600) + (minutes * 60);
|
||||
else release.duration = hours * 60; // scene shorter that 1hr, hour match are minutes
|
||||
if (minutes) release.duration = (hours * 3600) + (minutes * 60);
|
||||
else release.duration = hours * 60; // scene shorter that 1hr, hour match are minutes
|
||||
|
||||
release.actors = qa('h4 a[href*="/stars"], h4 a[href*="/celebs"]', true);
|
||||
release.tags = qa('h5 a[href*="/categories"]', true);
|
||||
release.actors = qa('h4 a[href*="/stars"], h4 a[href*="/celebs"]', true);
|
||||
release.tags = qa('h5 a[href*="/categories"]', true);
|
||||
|
||||
const [poster, trailer] = html.match(/https:\/\/content.vivid.com(.*)(.jpg|.mp4)/g);
|
||||
release.poster = poster;
|
||||
const [poster, trailer] = html.match(/https:\/\/content.vivid.com(.*)(.jpg|.mp4)/g);
|
||||
release.poster = poster;
|
||||
|
||||
if (trailer) {
|
||||
release.trailer = {
|
||||
src: trailer,
|
||||
};
|
||||
}
|
||||
if (trailer) {
|
||||
release.trailer = {
|
||||
src: trailer,
|
||||
};
|
||||
}
|
||||
|
||||
const channel = q('h5 a[href*="/sites"]', true);
|
||||
if (channel) release.channel = channel.replace(/\.\w+/, '');
|
||||
const channel = q('h5 a[href*="/sites"]', true);
|
||||
if (channel) release.channel = channel.replace(/\.\w+/, '');
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatestNative(site, page = 1) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiLatest(site, page);
|
||||
}
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiLatest(site, page);
|
||||
}
|
||||
|
||||
const apiUrl = `${site.url}/videos/api/?limit=50&offset=${(page - 1) * 50}&sort=datedesc`;
|
||||
const res = await bhttp.get(apiUrl, {
|
||||
decodeJSON: true,
|
||||
});
|
||||
const apiUrl = `${site.url}/videos/api/?limit=50&offset=${(page - 1) * 50}&sort=datedesc`;
|
||||
const res = await bhttp.get(apiUrl, {
|
||||
decodeJSON: true,
|
||||
});
|
||||
|
||||
if (res.statusCode === 200 && res.body.code === 200) {
|
||||
return scrapeLatestNative(res.body.responseData, site);
|
||||
}
|
||||
if (res.statusCode === 200 && res.body.code === 200) {
|
||||
return scrapeLatestNative(res.body.responseData, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchUpcomingNative(site) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiUpcoming(site);
|
||||
}
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiUpcoming(site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchSceneNative(url, site, release) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchScene(url, site, release);
|
||||
}
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchScene(url, site, release);
|
||||
}
|
||||
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeSceneNative(res.item, url, site) : res.status;
|
||||
return res.ok ? scrapeSceneNative(res.item, url, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchSceneWrapper(url, site, release) {
|
||||
const scene = await fetchScene(url, site, release);
|
||||
const scene = await fetchScene(url, site, release);
|
||||
|
||||
if (scene.date - new Date(site.parameters?.lastNative) <= 0) {
|
||||
// scene is probably still available on Vivid site, use search API to get URL and original date
|
||||
const searchUrl = `${site.url}/videos/api/?limit=10&sort=datedesc&search=${encodeURI(scene.title)}`;
|
||||
const searchRes = await bhttp.get(searchUrl, {
|
||||
decodeJSON: true,
|
||||
});
|
||||
if (scene.date - new Date(site.parameters?.lastNative) <= 0) {
|
||||
// scene is probably still available on Vivid site, use search API to get URL and original date
|
||||
const searchUrl = `${site.url}/videos/api/?limit=10&sort=datedesc&search=${encodeURI(scene.title)}`;
|
||||
const searchRes = await bhttp.get(searchUrl, {
|
||||
decodeJSON: true,
|
||||
});
|
||||
|
||||
if (searchRes.statusCode === 200 && searchRes.body.code === 200) {
|
||||
const sceneMatch = searchRes.body.responseData.find(item => slugify(item.name) === slugify(scene.title));
|
||||
if (searchRes.statusCode === 200 && searchRes.body.code === 200) {
|
||||
const sceneMatch = searchRes.body.responseData.find(item => slugify(item.name) === slugify(scene.title));
|
||||
|
||||
if (sceneMatch) {
|
||||
return {
|
||||
...scene,
|
||||
url: `${site.url}${sceneMatch.url}`,
|
||||
date: ed(sceneMatch.release_date, 'YYYY-MM-DD'),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
if (sceneMatch) {
|
||||
return {
|
||||
...scene,
|
||||
url: `${site.url}${sceneMatch.url}`,
|
||||
date: ed(sceneMatch.release_date, 'YYYY-MM-DD'),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return scene;
|
||||
return scene;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene: fetchSceneWrapper,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene: fetchSceneWrapper,
|
||||
};
|
||||
|
||||
@@ -8,246 +8,246 @@ const { get, post } = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
const genderMap = {
|
||||
F: 'female',
|
||||
M: 'male',
|
||||
T: 'transsexual', // not yet observed
|
||||
F: 'female',
|
||||
M: 'male',
|
||||
T: 'transsexual', // not yet observed
|
||||
};
|
||||
|
||||
function getPosterFallbacks(poster) {
|
||||
return poster
|
||||
.filter(image => /landscape/i.test(image.name))
|
||||
.sort((imageA, imageB) => imageB.height - imageA.height)
|
||||
.map((image) => {
|
||||
const sources = [image.src, image.highdpi?.['2x'], image.highdpi?.['3x']];
|
||||
// high DPI images for full HD source are huge, only prefer for smaller fallback sources
|
||||
return image.height === 1080 ? sources : sources.reverse();
|
||||
})
|
||||
.flat();
|
||||
return poster
|
||||
.filter(image => /landscape/i.test(image.name))
|
||||
.sort((imageA, imageB) => imageB.height - imageA.height)
|
||||
.map((image) => {
|
||||
const sources = [image.src, image.highdpi?.['2x'], image.highdpi?.['3x']];
|
||||
// high DPI images for full HD source are huge, only prefer for smaller fallback sources
|
||||
return image.height === 1080 ? sources : sources.reverse();
|
||||
})
|
||||
.flat();
|
||||
}
|
||||
|
||||
function getTeaserFallbacks(teaser) {
|
||||
return teaser
|
||||
.filter(video => /landscape/i.test(video.name))
|
||||
.map(video => ({
|
||||
src: video.src,
|
||||
type: video.type,
|
||||
quality: Number(String(video.height).replace('353', '360')),
|
||||
}));
|
||||
return teaser
|
||||
.filter(video => /landscape/i.test(video.name))
|
||||
.map(video => ({
|
||||
src: video.src,
|
||||
type: video.type,
|
||||
quality: Number(String(video.height).replace('353', '360')),
|
||||
}));
|
||||
}
|
||||
|
||||
function getAvatarFallbacks(avatar) {
|
||||
return avatar
|
||||
.sort((imageA, imageB) => imageB.height - imageA.height)
|
||||
.map(image => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src])
|
||||
.flat();
|
||||
return avatar
|
||||
.sort((imageA, imageB) => imageB.height - imageA.height)
|
||||
.map(image => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src])
|
||||
.flat();
|
||||
}
|
||||
|
||||
async function getTrailer(scene, site, url) {
|
||||
const qualities = [360, 480, 720, 1080, 2160];
|
||||
const qualities = [360, 480, 720, 1080, 2160];
|
||||
|
||||
const tokenRes = await post(`${site.url}/api/__record_tknreq`, {
|
||||
file: scene.previewVideoUrl1080P,
|
||||
sizes: qualities.join('+'),
|
||||
type: 'trailer',
|
||||
}, { referer: url });
|
||||
const tokenRes = await post(`${site.url}/api/__record_tknreq`, {
|
||||
file: scene.previewVideoUrl1080P,
|
||||
sizes: qualities.join('+'),
|
||||
type: 'trailer',
|
||||
}, { referer: url });
|
||||
|
||||
if (!tokenRes.ok) {
|
||||
return null;
|
||||
}
|
||||
if (!tokenRes.ok) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`;
|
||||
const trailersRes = await post(trailerUrl, null, { referer: url });
|
||||
const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`;
|
||||
const trailersRes = await post(trailerUrl, null, { referer: url });
|
||||
|
||||
if (trailersRes.ok) {
|
||||
return qualities.map(quality => (trailersRes.body[quality] ? {
|
||||
src: trailersRes.body[quality].token,
|
||||
quality,
|
||||
} : null)).filter(Boolean);
|
||||
}
|
||||
if (trailersRes.ok) {
|
||||
return qualities.map(quality => (trailersRes.body[quality] ? {
|
||||
src: trailersRes.body[quality].token,
|
||||
quality,
|
||||
} : null)).filter(Boolean);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site, origin) {
|
||||
return scenes.map((scene) => {
|
||||
const release = {};
|
||||
return scenes.map((scene) => {
|
||||
const release = {};
|
||||
|
||||
release.title = scene.title;
|
||||
release.title = scene.title;
|
||||
|
||||
release.entryId = String(scene.newId);
|
||||
release.url = `${site?.url || origin}${scene.targetUrl}`;
|
||||
release.entryId = String(scene.newId);
|
||||
release.url = `${site?.url || origin}${scene.targetUrl}`;
|
||||
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
|
||||
release.actors = scene.models;
|
||||
release.stars = Number(scene.textRating) / 2;
|
||||
release.actors = scene.models;
|
||||
release.stars = Number(scene.textRating) / 2;
|
||||
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeUpcoming(scene, site) {
|
||||
if (!scene || scene.isPreReleasePeriod) return null;
|
||||
if (!scene || scene.isPreReleasePeriod) return null;
|
||||
|
||||
const release = {};
|
||||
const release = {};
|
||||
|
||||
release.title = scene.targetUrl
|
||||
.slice(1)
|
||||
.split('-')
|
||||
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
|
||||
.join(' ');
|
||||
release.title = scene.targetUrl
|
||||
.slice(1)
|
||||
.split('-')
|
||||
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
|
||||
.join(' ');
|
||||
|
||||
release.url = `${site.url}${scene.targetUrl}`;
|
||||
release.url = `${site.url}${scene.targetUrl}`;
|
||||
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
|
||||
release.actors = scene.models;
|
||||
release.actors = scene.models;
|
||||
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
|
||||
release.entryId = (release.poster[0] || release.teaser[0])?.match(/\/(\d+)/)?.[1];
|
||||
release.entryId = (release.poster[0] || release.teaser[0])?.match(/\/(\d+)/)?.[1];
|
||||
|
||||
return [release];
|
||||
return [release];
|
||||
}
|
||||
|
||||
async function scrapeScene(data, url, site, baseRelease) {
|
||||
const scene = data.video;
|
||||
const scene = data.video;
|
||||
|
||||
const release = {
|
||||
url,
|
||||
title: scene.title,
|
||||
description: scene.description,
|
||||
actors: scene.models,
|
||||
director: scene.directorNames,
|
||||
duration: scene.runLength,
|
||||
stars: scene.totalRateVal,
|
||||
tags: scene.tags,
|
||||
};
|
||||
const release = {
|
||||
url,
|
||||
title: scene.title,
|
||||
description: scene.description,
|
||||
actors: scene.models,
|
||||
director: scene.directorNames,
|
||||
duration: scene.runLength,
|
||||
stars: scene.totalRateVal,
|
||||
tags: scene.tags,
|
||||
};
|
||||
|
||||
release.entryId = scene.newId;
|
||||
release.entryId = scene.newId;
|
||||
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
|
||||
release.actors = baseRelease?.actors || scene.models;
|
||||
release.actors = baseRelease?.actors || scene.models;
|
||||
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.photos = data.pictureset.map(photo => photo.main[0].src);
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.photos = data.pictureset.map(photo => photo.main[0].src);
|
||||
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
|
||||
const trailer = await getTrailer(scene, site, url);
|
||||
if (trailer) release.trailer = trailer;
|
||||
const trailer = await getTrailer(scene, site, url);
|
||||
if (trailer) release.trailer = trailer;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchActorReleases(pages, model, origin) {
|
||||
const releasesPerPage = await Promise.map(pages, async (page) => {
|
||||
const url = `${origin}/api${model.targetUrl}?page=${page}`;
|
||||
const res = await get(url);
|
||||
const releasesPerPage = await Promise.map(pages, async (page) => {
|
||||
const url = `${origin}/api${model.targetUrl}?page=${page}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeAll(res.body.data.videos.videos, null, origin);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
return scrapeAll(res.body.data.videos.videos, null, origin);
|
||||
}
|
||||
|
||||
return [];
|
||||
}, { concurrency: 3 });
|
||||
return [];
|
||||
}, { concurrency: 3 });
|
||||
|
||||
return releasesPerPage.flat();
|
||||
return releasesPerPage.flat();
|
||||
}
|
||||
|
||||
async function scrapeProfile(data, origin, withReleases) {
|
||||
const model = data.model;
|
||||
const profile = {};
|
||||
const model = data.model;
|
||||
const profile = {};
|
||||
|
||||
profile.birthdate = new Date(model.dateOfBirth);
|
||||
profile.gender = genderMap[model.sex];
|
||||
profile.birthdate = new Date(model.dateOfBirth);
|
||||
profile.gender = genderMap[model.sex];
|
||||
|
||||
profile.hair = model.hairColour;
|
||||
profile.nationality = model.nationality;
|
||||
profile.hair = model.hairColour;
|
||||
profile.nationality = model.nationality;
|
||||
|
||||
if (model.biography.trim().length > 0) profile.description = model.biography;
|
||||
if (model.biography.trim().length > 0) profile.description = model.biography;
|
||||
|
||||
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
|
||||
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
|
||||
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
|
||||
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
|
||||
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
|
||||
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
|
||||
|
||||
profile.avatar = getAvatarFallbacks(model.images.listing);
|
||||
profile.poster = getAvatarFallbacks(model.images.profile);
|
||||
profile.banner = getAvatarFallbacks(model.images.poster);
|
||||
profile.avatar = getAvatarFallbacks(model.images.listing);
|
||||
profile.poster = getAvatarFallbacks(model.images.profile);
|
||||
profile.banner = getAvatarFallbacks(model.images.poster);
|
||||
|
||||
const releases = scrapeAll(data.videos.videos, null, origin);
|
||||
const releases = scrapeAll(data.videos.videos, null, origin);
|
||||
|
||||
if (withReleases) {
|
||||
const pageCount = Math.ceil(data.videos.count / 6);
|
||||
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
|
||||
if (withReleases) {
|
||||
const pageCount = Math.ceil(data.videos.count / 6);
|
||||
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
|
||||
|
||||
profile.releases = [...releases, ...otherReleases];
|
||||
} else {
|
||||
profile.releases = releases;
|
||||
}
|
||||
profile.releases = [...releases, ...otherReleases];
|
||||
} else {
|
||||
profile.releases = releases;
|
||||
}
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/api/videos?page=${page}`;
|
||||
const res = await get(url);
|
||||
const url = `${site.url}/api/videos?page=${page}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeAll(res.body.data.videos, site);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
return scrapeAll(res.body.data.videos, site);
|
||||
}
|
||||
|
||||
return res.code;
|
||||
return res.code;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
const apiUrl = `${site.url}/api`;
|
||||
const res = await get(apiUrl);
|
||||
const apiUrl = `${site.url}/api`;
|
||||
const res = await get(apiUrl);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeUpcoming(res.body.data.nextScene, site);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
return scrapeUpcoming(res.body.data.nextScene, site);
|
||||
}
|
||||
|
||||
return res.code;
|
||||
return res.code;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, baseRelease) {
|
||||
const { origin, pathname } = new URL(url);
|
||||
const apiUrl = `${origin}/api${pathname}`;
|
||||
const { origin, pathname } = new URL(url);
|
||||
const apiUrl = `${origin}/api${pathname}`;
|
||||
|
||||
const res = await get(apiUrl);
|
||||
const res = await get(apiUrl);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeScene(res.body.data, url, site, baseRelease);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
return scrapeScene(res.body.data, url, site, baseRelease);
|
||||
}
|
||||
|
||||
return res.code;
|
||||
return res.code;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug, site, include) {
|
||||
const origin = `https://www.${scraperSlug}.com`;
|
||||
const actorSlug = slugify(actorName);
|
||||
const url = `${origin}/api/${actorSlug}`;
|
||||
const res = await get(url);
|
||||
const origin = `https://www.${scraperSlug}.com`;
|
||||
const actorSlug = slugify(actorName);
|
||||
const url = `${origin}/api/${actorSlug}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeProfile(res.body.data, origin, include.scenes);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
return scrapeProfile(res.body.data, origin, include.scenes);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -5,199 +5,199 @@ const { ex, ctxa } = require('../utils/q');
|
||||
// const slugify = require('../utils/slugify');
|
||||
|
||||
function getLicenseCode(html) {
|
||||
const licensePrefix = 'license_code: \'';
|
||||
const licenseStart = html.indexOf(licensePrefix);
|
||||
const licenseCode = html.slice(licenseStart + licensePrefix.length, html.indexOf('\'', licenseStart + licensePrefix.length));
|
||||
const licensePrefix = 'license_code: \'';
|
||||
const licenseStart = html.indexOf(licensePrefix);
|
||||
const licenseCode = html.slice(licenseStart + licensePrefix.length, html.indexOf('\'', licenseStart + licensePrefix.length));
|
||||
|
||||
const c = '16px';
|
||||
let f;
|
||||
let g;
|
||||
let h;
|
||||
let i;
|
||||
let j;
|
||||
let k;
|
||||
let l;
|
||||
let m;
|
||||
let n;
|
||||
const c = '16px';
|
||||
let f;
|
||||
let g;
|
||||
let h;
|
||||
let i;
|
||||
let j;
|
||||
let k;
|
||||
let l;
|
||||
let m;
|
||||
let n;
|
||||
|
||||
for (f = '', g = 1; g < licenseCode.length; g += 1) {
|
||||
f += parseInt(licenseCode[g], 10) ? parseInt(licenseCode[g], 10) : 1;
|
||||
}
|
||||
for (f = '', g = 1; g < licenseCode.length; g += 1) {
|
||||
f += parseInt(licenseCode[g], 10) ? parseInt(licenseCode[g], 10) : 1;
|
||||
}
|
||||
|
||||
for (j = parseInt(f.length / 2, 10),
|
||||
k = parseInt(f.substring(0, j + 1), 10),
|
||||
l = parseInt(f.substring(j), 10),
|
||||
g = l - k,
|
||||
g < 0 && (g = -g),
|
||||
f = g,
|
||||
g = k - l,
|
||||
g < 0 && (g = -g),
|
||||
f += g,
|
||||
f *= 2,
|
||||
f = String(f),
|
||||
i = (parseInt(c, 10) / 2) + 2,
|
||||
m = '',
|
||||
g = 0; g < j + 1; g += 1) {
|
||||
for (h = 1; h <= 4; h += 1) {
|
||||
n = parseInt(licenseCode[g + h], 10) + parseInt(f[g], 10);
|
||||
for (j = parseInt(f.length / 2, 10),
|
||||
k = parseInt(f.substring(0, j + 1), 10),
|
||||
l = parseInt(f.substring(j), 10),
|
||||
g = l - k,
|
||||
g < 0 && (g = -g),
|
||||
f = g,
|
||||
g = k - l,
|
||||
g < 0 && (g = -g),
|
||||
f += g,
|
||||
f *= 2,
|
||||
f = String(f),
|
||||
i = (parseInt(c, 10) / 2) + 2,
|
||||
m = '',
|
||||
g = 0; g < j + 1; g += 1) {
|
||||
for (h = 1; h <= 4; h += 1) {
|
||||
n = parseInt(licenseCode[g + h], 10) + parseInt(f[g], 10);
|
||||
|
||||
if (n >= i) n -= i;
|
||||
m += n;
|
||||
}
|
||||
}
|
||||
if (n >= i) n -= i;
|
||||
m += n;
|
||||
}
|
||||
}
|
||||
|
||||
return m;
|
||||
return m;
|
||||
}
|
||||
|
||||
function decodeTrailerUrl(html, encodedTrailerUrl) {
|
||||
const licenseCode = getLicenseCode(html);
|
||||
const i = licenseCode;
|
||||
const licenseCode = getLicenseCode(html);
|
||||
const i = licenseCode;
|
||||
|
||||
let j;
|
||||
let k;
|
||||
let l;
|
||||
let m;
|
||||
let n;
|
||||
let o;
|
||||
let j;
|
||||
let k;
|
||||
let l;
|
||||
let m;
|
||||
let n;
|
||||
let o;
|
||||
|
||||
const d = '16px';
|
||||
const g = encodedTrailerUrl.split('/').slice(2);
|
||||
const d = '16px';
|
||||
const g = encodedTrailerUrl.split('/').slice(2);
|
||||
|
||||
let h = g[5].substring(0, 2 * parseInt(d, 10));
|
||||
let h = g[5].substring(0, 2 * parseInt(d, 10));
|
||||
|
||||
for (j = h, k = h.length - 1; k >= 0; k -= 1) {
|
||||
for (l = k, m = k; m < i.length; m += 1) {
|
||||
l += parseInt(i[m], 10);
|
||||
}
|
||||
for (j = h, k = h.length - 1; k >= 0; k -= 1) {
|
||||
for (l = k, m = k; m < i.length; m += 1) {
|
||||
l += parseInt(i[m], 10);
|
||||
}
|
||||
|
||||
for (; l >= h.length;) {
|
||||
l -= h.length;
|
||||
}
|
||||
for (; l >= h.length;) {
|
||||
l -= h.length;
|
||||
}
|
||||
|
||||
for (n = '', o = 0; o < h.length; o += 1) {
|
||||
if (o === k) {
|
||||
n += h[l];
|
||||
} else {
|
||||
n += (o === l ? h[k] : h[o]);
|
||||
}
|
||||
}
|
||||
for (n = '', o = 0; o < h.length; o += 1) {
|
||||
if (o === k) {
|
||||
n += h[l];
|
||||
} else {
|
||||
n += (o === l ? h[k] : h[o]);
|
||||
}
|
||||
}
|
||||
|
||||
h = n;
|
||||
}
|
||||
h = n;
|
||||
}
|
||||
|
||||
g[5] = g[5].replace(j, h);
|
||||
const trailer = g.join('/');
|
||||
g[5] = g[5].replace(j, h);
|
||||
const trailer = g.join('/');
|
||||
|
||||
return trailer;
|
||||
return trailer;
|
||||
}
|
||||
|
||||
function scrapeLatest(html) {
|
||||
const { document } = ex(html);
|
||||
const { document } = ex(html);
|
||||
|
||||
return ctxa(document, '.video-post').map(({ q, qa, qd }) => {
|
||||
const release = {};
|
||||
return ctxa(document, '.video-post').map(({ q, qa, qd }) => {
|
||||
const release = {};
|
||||
|
||||
// release.entryId = slugify(release.title);
|
||||
release.entryId = q('.ico-fav-0').dataset.favVideoId;
|
||||
// release.entryId = slugify(release.title);
|
||||
release.entryId = q('.ico-fav-0').dataset.favVideoId;
|
||||
|
||||
const titleEl = q('.video-title-title');
|
||||
release.title = titleEl.title;
|
||||
release.url = titleEl.href;
|
||||
const titleEl = q('.video-title-title');
|
||||
release.title = titleEl.title;
|
||||
release.url = titleEl.href;
|
||||
|
||||
release.date = qd('.video-data em', 'MMM DD, YYYY');
|
||||
release.actors = qa('.video-model-list a', true);
|
||||
release.date = qd('.video-data em', 'MMM DD, YYYY');
|
||||
release.actors = qa('.video-model-list a', true);
|
||||
|
||||
const posterData = q('img.thumb').dataset;
|
||||
release.poster = posterData.src;
|
||||
release.trailer = posterData.preview;
|
||||
const posterData = q('img.thumb').dataset;
|
||||
release.poster = posterData.src;
|
||||
release.trailer = posterData.preview;
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene(html, url) {
|
||||
const { qu } = ex(html);
|
||||
const release = { url };
|
||||
const { qu } = ex(html);
|
||||
const release = { url };
|
||||
|
||||
// release.entryId = slugify(release.title);
|
||||
[release.entryId] = qu.q('link[rel="canonical"]').href.match(/\d+/);
|
||||
// release.entryId = slugify(release.title);
|
||||
[release.entryId] = qu.q('link[rel="canonical"]').href.match(/\d+/);
|
||||
|
||||
release.title = qu.meta('meta[property="og:title"]') || qu.q('.video-page-header h1', true);
|
||||
release.description = qu.meta('meta[property="og:description"]') || qu.q('.info-video-description', true);
|
||||
release.title = qu.meta('meta[property="og:title"]') || qu.q('.video-page-header h1', true);
|
||||
release.description = qu.meta('meta[property="og:description"]') || qu.q('.info-video-description', true);
|
||||
|
||||
release.date = qu.date('.info-video-details li:first-child span', 'MMM DD, YYYY');
|
||||
release.duration = qu.dur('.info-video-details li:nth-child(2) span');
|
||||
release.date = qu.date('.info-video-details li:first-child span', 'MMM DD, YYYY');
|
||||
release.duration = qu.dur('.info-video-details li:nth-child(2) span');
|
||||
|
||||
release.actors = qu.all('.info-video-models a', true);
|
||||
release.tags = qu.all('.info-video-category a', true);
|
||||
release.actors = qu.all('.info-video-models a', true);
|
||||
release.tags = qu.all('.info-video-category a', true);
|
||||
|
||||
release.photos = qu.urls('.swiper-wrapper .swiper-slide a').map(source => source.replace('.jpg/', '.jpg'));
|
||||
release.poster = qu.meta('meta[property="og:image"');
|
||||
release.photos = qu.urls('.swiper-wrapper .swiper-slide a').map(source => source.replace('.jpg/', '.jpg'));
|
||||
release.poster = qu.meta('meta[property="og:image"');
|
||||
|
||||
if (!release.poster) {
|
||||
const previewStart = html.indexOf('preview_url');
|
||||
release.poster = html.slice(html.indexOf('http', previewStart), html.indexOf('.jpg', previewStart) + 4);
|
||||
}
|
||||
if (!release.poster) {
|
||||
const previewStart = html.indexOf('preview_url');
|
||||
release.poster = html.slice(html.indexOf('http', previewStart), html.indexOf('.jpg', previewStart) + 4);
|
||||
}
|
||||
|
||||
const varsPrefix = 'flashvars = {';
|
||||
const varsStart = html.indexOf(varsPrefix);
|
||||
const varsString = html.slice(varsStart + varsPrefix.length, html.indexOf('};', varsStart));
|
||||
const varsPrefix = 'flashvars = {';
|
||||
const varsStart = html.indexOf(varsPrefix);
|
||||
const varsString = html.slice(varsStart + varsPrefix.length, html.indexOf('};', varsStart));
|
||||
|
||||
const vars = varsString.split(',').reduce((acc, item) => {
|
||||
const [prop, value] = item.split(': ');
|
||||
acc[prop.trim()] = value.trim().replace(/'/g, '');
|
||||
const vars = varsString.split(',').reduce((acc, item) => {
|
||||
const [prop, value] = item.split(': ');
|
||||
acc[prop.trim()] = value.trim().replace(/'/g, '');
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
release.trailer = [
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_url),
|
||||
quality: parseInt(vars.video_url_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url),
|
||||
quality: parseInt(vars.video_alt_url_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url2),
|
||||
quality: parseInt(vars.video_alt_url2_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url3),
|
||||
quality: parseInt(vars.video_alt_url3_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url4),
|
||||
quality: parseInt(vars.video_alt_url4_text, 10),
|
||||
},
|
||||
];
|
||||
release.trailer = [
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_url),
|
||||
quality: parseInt(vars.video_url_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url),
|
||||
quality: parseInt(vars.video_alt_url_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url2),
|
||||
quality: parseInt(vars.video_alt_url2_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url3),
|
||||
quality: parseInt(vars.video_alt_url3_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url4),
|
||||
quality: parseInt(vars.video_alt_url4_text, 10),
|
||||
},
|
||||
];
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `https://vogov.com/latest-videos/?sort_by=post_date&from=${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
const url = `https://vogov.com/latest-videos/?sort_by=post_date&from=${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), url);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), url);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -5,86 +5,86 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { origin } = new URL(site.url);
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { origin } = new URL(site.url);
|
||||
|
||||
const videos = Array.from(document.querySelectorAll('.video-releases-list')).slice(-1)[0];
|
||||
const videos = Array.from(document.querySelectorAll('.video-releases-list')).slice(-1)[0];
|
||||
|
||||
return Array.from(videos.querySelectorAll('.card'), (scene) => {
|
||||
const release = { site };
|
||||
return Array.from(videos.querySelectorAll('.card'), (scene) => {
|
||||
const release = { site };
|
||||
|
||||
release.url = `${origin}${scene.querySelector(':scope > a').href}`;
|
||||
release.entryId = scene.dataset.videoId;
|
||||
release.title = scene.querySelector('.card-title').textContent;
|
||||
release.date = moment.utc(scene.dataset.date, 'MMMM DD, YYYY').toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.actors a'), el => el.textContent);
|
||||
release.url = `${origin}${scene.querySelector(':scope > a').href}`;
|
||||
release.entryId = scene.dataset.videoId;
|
||||
release.title = scene.querySelector('.card-title').textContent;
|
||||
release.date = moment.utc(scene.dataset.date, 'MMMM DD, YYYY').toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.actors a'), el => el.textContent);
|
||||
|
||||
release.poster = `https:${scene.querySelector('.single-image').src}`;
|
||||
release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), el => `https:${el.dataset.src}`);
|
||||
release.poster = `https:${scene.querySelector('.single-image').src}`;
|
||||
release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), el => `https:${el.dataset.src}`);
|
||||
|
||||
const trailerEl = scene.querySelector('source');
|
||||
if (trailerEl) release.trailer = { src: trailerEl.dataset.src };
|
||||
const trailerEl = scene.querySelector('source');
|
||||
if (trailerEl) release.trailer = { src: trailerEl.dataset.src };
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene(html, site, url) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { site };
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { site };
|
||||
|
||||
const scene = document.querySelector('#t2019-2col');
|
||||
const scene = document.querySelector('#t2019-2col');
|
||||
|
||||
release.url = url;
|
||||
release.title = scene.querySelector('.t2019-stitle').textContent.trim();
|
||||
release.description = scene.querySelector('#t2019-description').textContent.trim();
|
||||
release.actors = Array.from(scene.querySelectorAll('#t2019-models a'), el => el.textContent);
|
||||
release.url = url;
|
||||
release.title = scene.querySelector('.t2019-stitle').textContent.trim();
|
||||
release.description = scene.querySelector('#t2019-description').textContent.trim();
|
||||
release.actors = Array.from(scene.querySelectorAll('#t2019-models a'), el => el.textContent);
|
||||
|
||||
const durationEls = Array.from(scene.querySelectorAll('#t2019-stime span'));
|
||||
const durationEls = Array.from(scene.querySelectorAll('#t2019-stime span'));
|
||||
|
||||
if (durationEls.length > 1) {
|
||||
release.date = moment.utc(durationEls[0].textContent, 'MMMM DD, YYYY').toDate();
|
||||
release.duration = Number(durationEls[1].textContent.match(/\d+/)[0]) * 60;
|
||||
} else {
|
||||
release.duration = Number(durationEls[0].textContent.match(/\d+/)[0]) * 60;
|
||||
}
|
||||
if (durationEls.length > 1) {
|
||||
release.date = moment.utc(durationEls[0].textContent, 'MMMM DD, YYYY').toDate();
|
||||
release.duration = Number(durationEls[1].textContent.match(/\d+/)[0]) * 60;
|
||||
} else {
|
||||
release.duration = Number(durationEls[0].textContent.match(/\d+/)[0]) * 60;
|
||||
}
|
||||
|
||||
release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), el => `https:${el.src}`);
|
||||
release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), el => `https:${el.src}`);
|
||||
|
||||
const posterEl = scene.querySelector('#no-player-image');
|
||||
const videoEl = scene.querySelector('video');
|
||||
const posterEl = scene.querySelector('#no-player-image');
|
||||
const videoEl = scene.querySelector('video');
|
||||
|
||||
if (posterEl) release.poster = `https:${posterEl.src}`;
|
||||
else if (videoEl) release.poster = `https:${videoEl.poster}`;
|
||||
if (posterEl) release.poster = `https:${posterEl.src}`;
|
||||
else if (videoEl) release.poster = `https:${videoEl.poster}`;
|
||||
|
||||
const trailerEl = scene.querySelector('#t2019-video source');
|
||||
if (trailerEl) release.trailer = { src: trailerEl.src };
|
||||
const trailerEl = scene.querySelector('#t2019-video source');
|
||||
if (trailerEl) release.trailer = { src: trailerEl.src };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}?page=${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
const url = `${site.url}?page=${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), site, url);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), site, url);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
};
|
||||
|
||||
@@ -5,31 +5,31 @@ const bhttp = require('bhttp');
|
||||
const { fetchLatest, fetchUpcoming, scrapeScene, fetchProfile } = require('./gamma');
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
const release = await scrapeScene(res.body.toString(), url, site);
|
||||
const release = await scrapeScene(res.body.toString(), url, site);
|
||||
|
||||
const siteDomain = release.$('meta[name="twitter:domain"]').attr('content') || 'allblackx.com'; // only AllBlackX has no twitter domain, no other useful hints available
|
||||
const siteSlug = siteDomain && siteDomain.split('.')[0].toLowerCase();
|
||||
// const siteUrl = siteDomain && `https://www.${siteDomain}`;
|
||||
const siteDomain = release.$('meta[name="twitter:domain"]').attr('content') || 'allblackx.com'; // only AllBlackX has no twitter domain, no other useful hints available
|
||||
const siteSlug = siteDomain && siteDomain.split('.')[0].toLowerCase();
|
||||
// const siteUrl = siteDomain && `https://www.${siteDomain}`;
|
||||
|
||||
release.channel = siteSlug;
|
||||
release.director = 'Mason';
|
||||
release.channel = siteSlug;
|
||||
release.director = 'Mason';
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function getActorReleasesUrl(actorPath, page = 1) {
|
||||
return `https://www.xempire.com/en/videos/xempire/latest/${page}/All-Categories/0${actorPath}`;
|
||||
return `https://www.xempire.com/en/videos/xempire/latest/${page}/All-Categories/0${actorPath}`;
|
||||
}
|
||||
|
||||
async function networkFetchProfile(actorName, scraperSlug, site, include) {
|
||||
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
|
||||
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
272
src/sites.js
272
src/sites.js
@@ -8,189 +8,189 @@ const knex = require('./knex');
|
||||
const whereOr = require('./utils/where-or');
|
||||
|
||||
async function curateSite(site, includeParameters = false, includeTags = true) {
|
||||
const curatedSite = {
|
||||
id: site.id,
|
||||
name: site.name,
|
||||
url: site.url,
|
||||
description: site.description,
|
||||
slug: site.slug,
|
||||
independent: !!site.parameters && site.parameters.independent,
|
||||
parameters: includeParameters ? site.parameters : null,
|
||||
network: {
|
||||
id: site.network_id,
|
||||
name: site.network_name,
|
||||
description: site.network_description,
|
||||
slug: site.network_slug,
|
||||
url: site.network_url,
|
||||
parameters: includeParameters ? site.network_parameters : null,
|
||||
},
|
||||
};
|
||||
const curatedSite = {
|
||||
id: site.id,
|
||||
name: site.name,
|
||||
url: site.url,
|
||||
description: site.description,
|
||||
slug: site.slug,
|
||||
independent: !!site.parameters && site.parameters.independent,
|
||||
parameters: includeParameters ? site.parameters : null,
|
||||
network: {
|
||||
id: site.network_id,
|
||||
name: site.network_name,
|
||||
description: site.network_description,
|
||||
slug: site.network_slug,
|
||||
url: site.network_url,
|
||||
parameters: includeParameters ? site.network_parameters : null,
|
||||
},
|
||||
};
|
||||
|
||||
if (includeTags) {
|
||||
curatedSite.tags = await knex('sites_tags')
|
||||
.select('tags.*', 'sites_tags.inherit')
|
||||
.where('site_id', site.id)
|
||||
.join('tags', 'tags.id', 'sites_tags.tag_id');
|
||||
}
|
||||
if (includeTags) {
|
||||
curatedSite.tags = await knex('sites_tags')
|
||||
.select('tags.*', 'sites_tags.inherit')
|
||||
.where('site_id', site.id)
|
||||
.join('tags', 'tags.id', 'sites_tags.tag_id');
|
||||
}
|
||||
|
||||
return curatedSite;
|
||||
return curatedSite;
|
||||
}
|
||||
|
||||
async function curateSites(sites, includeParameters) {
|
||||
return Promise.all(sites.map(async site => curateSite(site, includeParameters)));
|
||||
return Promise.all(sites.map(async site => curateSite(site, includeParameters)));
|
||||
}
|
||||
|
||||
function destructConfigNetworks(networks = []) {
|
||||
return networks.reduce((acc, network) => {
|
||||
if (Array.isArray(network)) {
|
||||
// network specifies sites
|
||||
return {
|
||||
...acc,
|
||||
sites: [...acc.sites, ...network[1]],
|
||||
};
|
||||
}
|
||||
return networks.reduce((acc, network) => {
|
||||
if (Array.isArray(network)) {
|
||||
// network specifies sites
|
||||
return {
|
||||
...acc,
|
||||
sites: [...acc.sites, ...network[1]],
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
...acc,
|
||||
networks: [...acc.networks, network],
|
||||
};
|
||||
}, {
|
||||
networks: [],
|
||||
sites: [],
|
||||
});
|
||||
return {
|
||||
...acc,
|
||||
networks: [...acc.networks, network],
|
||||
};
|
||||
}, {
|
||||
networks: [],
|
||||
sites: [],
|
||||
});
|
||||
}
|
||||
|
||||
async function findSiteByUrl(url) {
|
||||
const { origin, hostname, pathname } = new URL(url);
|
||||
// const domain = hostname.replace(/www.|tour./, '');
|
||||
const dirUrl = `${origin}${pathname.split('/').slice(0, 2).join('/')}`; // allow for sites on URI directory
|
||||
const { origin, hostname, pathname } = new URL(url);
|
||||
// const domain = hostname.replace(/www.|tour./, '');
|
||||
const dirUrl = `${origin}${pathname.split('/').slice(0, 2).join('/')}`; // allow for sites on URI directory
|
||||
|
||||
const site = await knex('sites')
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
)
|
||||
.where('sites.url', url)
|
||||
.orWhere('sites.url', origin)
|
||||
.orWhere('sites.url', origin.replace(/www\.|tour\./, ''))
|
||||
.orWhere('sites.url', `https://www.${hostname}`)
|
||||
.orWhere('sites.url', `http://www.${hostname}`)
|
||||
.orWhere('sites.url', dirUrl)
|
||||
// .orWhere('sites.url', 'like', `%${domain}`)
|
||||
.first();
|
||||
const site = await knex('sites')
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
)
|
||||
.where('sites.url', url)
|
||||
.orWhere('sites.url', origin)
|
||||
.orWhere('sites.url', origin.replace(/www\.|tour\./, ''))
|
||||
.orWhere('sites.url', `https://www.${hostname}`)
|
||||
.orWhere('sites.url', `http://www.${hostname}`)
|
||||
.orWhere('sites.url', dirUrl)
|
||||
// .orWhere('sites.url', 'like', `%${domain}`)
|
||||
.first();
|
||||
|
||||
if (site) {
|
||||
const curatedSite = curateSite(site, true, false);
|
||||
if (site) {
|
||||
const curatedSite = curateSite(site, true, false);
|
||||
|
||||
return curatedSite;
|
||||
}
|
||||
return curatedSite;
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
function sitesByNetwork(sites) {
|
||||
const networks = sites.reduce((acc, site) => {
|
||||
if (acc[site.network.slug]) {
|
||||
acc[site.network.slug].sites = acc[site.network.slug].sites.concat(site);
|
||||
const networks = sites.reduce((acc, site) => {
|
||||
if (acc[site.network.slug]) {
|
||||
acc[site.network.slug].sites = acc[site.network.slug].sites.concat(site);
|
||||
|
||||
return acc;
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc[site.network.slug] = {
|
||||
...site.network,
|
||||
sites: [site],
|
||||
};
|
||||
acc[site.network.slug] = {
|
||||
...site.network,
|
||||
sites: [site],
|
||||
};
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
return Object.values(networks);
|
||||
return Object.values(networks);
|
||||
}
|
||||
|
||||
async function fetchSitesFromArgv() {
|
||||
const rawSites = await knex('sites')
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
)
|
||||
.whereIn('sites.slug', argv.sites || [])
|
||||
.orWhereIn('networks.slug', argv.networks || [])
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id');
|
||||
const rawSites = await knex('sites')
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
)
|
||||
.whereIn('sites.slug', argv.sites || [])
|
||||
.orWhereIn('networks.slug', argv.networks || [])
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id');
|
||||
|
||||
const curatedSites = await curateSites(rawSites, true);
|
||||
logger.info(`Found ${curatedSites.length} sites in database`);
|
||||
const curatedSites = await curateSites(rawSites, true);
|
||||
logger.info(`Found ${curatedSites.length} sites in database`);
|
||||
|
||||
return sitesByNetwork(curatedSites);
|
||||
return sitesByNetwork(curatedSites);
|
||||
}
|
||||
|
||||
async function fetchSitesFromConfig() {
|
||||
const included = destructConfigNetworks(config.include);
|
||||
const excluded = destructConfigNetworks(config.exclude);
|
||||
const included = destructConfigNetworks(config.include);
|
||||
const excluded = destructConfigNetworks(config.exclude);
|
||||
|
||||
const rawSites = await knex('sites')
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.where((builder) => {
|
||||
if (config.include) {
|
||||
builder
|
||||
.whereIn('sites.slug', included.sites)
|
||||
.orWhereIn('networks.slug', included.networks);
|
||||
}
|
||||
})
|
||||
.whereNot((builder) => {
|
||||
builder
|
||||
.whereIn('sites.slug', excluded.sites)
|
||||
.orWhereIn('networks.slug', excluded.networks);
|
||||
});
|
||||
const rawSites = await knex('sites')
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.where((builder) => {
|
||||
if (config.include) {
|
||||
builder
|
||||
.whereIn('sites.slug', included.sites)
|
||||
.orWhereIn('networks.slug', included.networks);
|
||||
}
|
||||
})
|
||||
.whereNot((builder) => {
|
||||
builder
|
||||
.whereIn('sites.slug', excluded.sites)
|
||||
.orWhereIn('networks.slug', excluded.networks);
|
||||
});
|
||||
|
||||
const curatedSites = await curateSites(rawSites, true);
|
||||
logger.info(`Found ${curatedSites.length} sites in database`);
|
||||
const curatedSites = await curateSites(rawSites, true);
|
||||
logger.info(`Found ${curatedSites.length} sites in database`);
|
||||
|
||||
return sitesByNetwork(curatedSites);
|
||||
return sitesByNetwork(curatedSites);
|
||||
}
|
||||
|
||||
async function fetchIncludedSites() {
|
||||
if (argv.networks || argv.sites) {
|
||||
return fetchSitesFromArgv();
|
||||
}
|
||||
if (argv.networks || argv.sites) {
|
||||
return fetchSitesFromArgv();
|
||||
}
|
||||
|
||||
return fetchSitesFromConfig();
|
||||
return fetchSitesFromConfig();
|
||||
}
|
||||
|
||||
async function fetchSites(queryObject) {
|
||||
const sites = await knex('sites')
|
||||
.where(builder => whereOr(queryObject, 'sites', builder))
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.limit(100);
|
||||
const sites = await knex('sites')
|
||||
.where(builder => whereOr(queryObject, 'sites', builder))
|
||||
.select(
|
||||
'sites.*',
|
||||
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||
)
|
||||
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||
.limit(100);
|
||||
|
||||
return curateSites(sites);
|
||||
return curateSites(sites);
|
||||
}
|
||||
|
||||
async function fetchSitesFromReleases() {
|
||||
const sites = await knex('releases')
|
||||
.select('site_id', '')
|
||||
.leftJoin('sites', 'sites.id', 'releases.site_id')
|
||||
.groupBy('sites.id')
|
||||
.limit(100);
|
||||
const sites = await knex('releases')
|
||||
.select('site_id', '')
|
||||
.leftJoin('sites', 'sites.id', 'releases.site_id')
|
||||
.groupBy('sites.id')
|
||||
.limit(100);
|
||||
|
||||
return curateSites(sites);
|
||||
return curateSites(sites);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
curateSite,
|
||||
curateSites,
|
||||
fetchIncludedSites,
|
||||
fetchSites,
|
||||
fetchSitesFromConfig,
|
||||
fetchSitesFromArgv,
|
||||
fetchSitesFromReleases,
|
||||
findSiteByUrl,
|
||||
curateSite,
|
||||
curateSites,
|
||||
fetchIncludedSites,
|
||||
fetchSites,
|
||||
fetchSitesFromConfig,
|
||||
fetchSitesFromArgv,
|
||||
fetchSitesFromReleases,
|
||||
findSiteByUrl,
|
||||
};
|
||||
|
||||
@@ -11,155 +11,164 @@ const { curateSite } = require('./sites');
|
||||
const { associateReleaseMedia } = require('./media');
|
||||
|
||||
function curateReleaseEntry(release, batchId, existingRelease) {
|
||||
const slug = slugify(release.title || release.actors?.join('-') || null, '-', {
|
||||
encode: true,
|
||||
limit: config.titleSlugLength,
|
||||
});
|
||||
const slug = slugify(release.title || release.actors?.join('-') || null, '-', {
|
||||
encode: true,
|
||||
limit: config.titleSlugLength,
|
||||
});
|
||||
|
||||
const curatedRelease = {
|
||||
title: release.title,
|
||||
entry_id: release.entryId || null,
|
||||
site_id: release.site.id,
|
||||
shoot_id: release.shootId || null,
|
||||
studio_id: release.studio?.id || null,
|
||||
url: release.url,
|
||||
date: release.date,
|
||||
slug,
|
||||
description: release.description,
|
||||
duration: release.duration,
|
||||
type: release.type,
|
||||
// director: release.director,
|
||||
// likes: release.rating && release.rating.likes,
|
||||
// dislikes: release.rating && release.rating.dislikes,
|
||||
// rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
|
||||
deep: typeof release.deep === 'boolean' ? release.deep : false,
|
||||
deep_url: release.deepUrl,
|
||||
updated_batch_id: batchId,
|
||||
};
|
||||
const curatedRelease = {
|
||||
title: release.title,
|
||||
entry_id: release.entryId || null,
|
||||
site_id: release.site?.id,
|
||||
network_id: release.site ? null : release.network?.id, // prefer site ID if available
|
||||
shoot_id: release.shootId || null,
|
||||
studio_id: release.studio?.id || null,
|
||||
url: release.url,
|
||||
date: release.date,
|
||||
slug,
|
||||
description: release.description,
|
||||
duration: release.duration,
|
||||
type: release.type,
|
||||
// director: release.director,
|
||||
// likes: release.rating && release.rating.likes,
|
||||
// dislikes: release.rating && release.rating.dislikes,
|
||||
// rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
|
||||
deep: typeof release.deep === 'boolean' ? release.deep : false,
|
||||
deep_url: release.deepUrl,
|
||||
updated_batch_id: batchId,
|
||||
};
|
||||
|
||||
if (!existingRelease && !release.id) {
|
||||
curatedRelease.created_batch_id = batchId;
|
||||
}
|
||||
if (!existingRelease && !release.id) {
|
||||
curatedRelease.created_batch_id = batchId;
|
||||
}
|
||||
|
||||
return curatedRelease;
|
||||
return curatedRelease;
|
||||
}
|
||||
|
||||
async function attachChannelSites(releases) {
|
||||
const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isFallback));
|
||||
const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isNetwork));
|
||||
|
||||
const channelSites = await knex('sites')
|
||||
.leftJoin('networks', 'networks.id', 'sites.network_id')
|
||||
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.parameters as network_parameters', 'networks.description as network_description')
|
||||
.whereIn('sites.slug', releasesWithoutSite.map(release => release.channel));
|
||||
const channelSites = await knex('sites')
|
||||
.leftJoin('networks', 'networks.id', 'sites.network_id')
|
||||
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.parameters as network_parameters', 'networks.description as network_description')
|
||||
.whereIn('sites.slug', releasesWithoutSite.map(release => release.channel));
|
||||
|
||||
const channelSitesBySlug = channelSites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
|
||||
const channelSitesBySlug = channelSites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
|
||||
|
||||
const releasesWithChannelSite = await Promise.all(releases
|
||||
.map(async (release) => {
|
||||
if (release.site && !release.site.isFallback) {
|
||||
return release;
|
||||
}
|
||||
const releasesWithChannelSite = await Promise.all(releases
|
||||
.map(async (release) => {
|
||||
if (release.site && !release.site.isNetwork) {
|
||||
return release;
|
||||
}
|
||||
|
||||
if (release.channel && channelSitesBySlug[release.channel]) {
|
||||
const curatedSite = await curateSite(channelSitesBySlug[release.channel]);
|
||||
if (release.channel && channelSitesBySlug[release.channel]) {
|
||||
const curatedSite = await curateSite(channelSitesBySlug[release.channel]);
|
||||
|
||||
return {
|
||||
...release,
|
||||
site: curatedSite,
|
||||
};
|
||||
}
|
||||
return {
|
||||
...release,
|
||||
site: curatedSite,
|
||||
};
|
||||
}
|
||||
|
||||
logger.error(`Unable to match channel '${release.channel?.slug || release.channel}' from generic URL ${release.url}`);
|
||||
if (release.site && release.site.isNetwork) {
|
||||
return {
|
||||
...release,
|
||||
site: null,
|
||||
network: release.site,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}));
|
||||
logger.error(`Unable to match channel '${release.channel?.slug || release.channel}' from generic URL ${release.url}`);
|
||||
|
||||
return releasesWithChannelSite.filter(Boolean);
|
||||
return null;
|
||||
}));
|
||||
|
||||
return releasesWithChannelSite.filter(Boolean);
|
||||
}
|
||||
|
||||
async function attachStudios(releases) {
|
||||
const studioSlugs = releases.map(release => release.studio).filter(Boolean);
|
||||
const studioSlugs = releases.map(release => release.studio).filter(Boolean);
|
||||
|
||||
const studios = await knex('studios').whereIn('slug', studioSlugs);
|
||||
const studioBySlug = studios.reduce((acc, studio) => ({ ...acc, [studio.slug]: studio }), {});
|
||||
const studios = await knex('studios').whereIn('slug', studioSlugs);
|
||||
const studioBySlug = studios.reduce((acc, studio) => ({ ...acc, [studio.slug]: studio }), {});
|
||||
|
||||
const releasesWithStudio = releases.map((release) => {
|
||||
if (release.studio && studioBySlug[release.studio]) {
|
||||
return {
|
||||
...release,
|
||||
studio: studioBySlug[release.studio],
|
||||
};
|
||||
}
|
||||
const releasesWithStudio = releases.map((release) => {
|
||||
if (release.studio && studioBySlug[release.studio]) {
|
||||
return {
|
||||
...release,
|
||||
studio: studioBySlug[release.studio],
|
||||
};
|
||||
}
|
||||
|
||||
if (release.studio) {
|
||||
logger.warn(`Unable to match studio '${release.studio}' for ${release.url}`);
|
||||
}
|
||||
if (release.studio) {
|
||||
logger.warn(`Unable to match studio '${release.studio}' for ${release.url}`);
|
||||
}
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
|
||||
return releasesWithStudio;
|
||||
return releasesWithStudio;
|
||||
}
|
||||
|
||||
function attachReleaseIds(releases, storedReleases) {
|
||||
const storedReleaseIdsBySiteIdAndEntryId = storedReleases.reduce((acc, release) => {
|
||||
if (!acc[release.site_id]) acc[release.site_id] = {};
|
||||
acc[release.site_id][release.entry_id] = release.id;
|
||||
const storedReleaseIdsBySiteIdAndEntryId = storedReleases.reduce((acc, release) => {
|
||||
if (!acc[release.site_id]) acc[release.site_id] = {};
|
||||
acc[release.site_id][release.entry_id] = release.id;
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
const releasesWithId = releases.map(release => ({
|
||||
...release,
|
||||
id: storedReleaseIdsBySiteIdAndEntryId[release.site.id][release.entryId],
|
||||
}));
|
||||
const releasesWithId = releases.map(release => ({
|
||||
...release,
|
||||
id: storedReleaseIdsBySiteIdAndEntryId[release.site.id][release.entryId],
|
||||
}));
|
||||
|
||||
return releasesWithId;
|
||||
return releasesWithId;
|
||||
}
|
||||
|
||||
function filterInternalDuplicateReleases(releases) {
|
||||
const releasesBySiteIdAndEntryId = releases.reduce((acc, release) => {
|
||||
if (!acc[release.site.id]) {
|
||||
acc[release.site.id] = {};
|
||||
}
|
||||
const releasesBySiteIdAndEntryId = releases.reduce((acc, release) => {
|
||||
if (!acc[release.site.id]) {
|
||||
acc[release.site.id] = {};
|
||||
}
|
||||
|
||||
acc[release.site.id][release.entryId] = release;
|
||||
acc[release.site.id][release.entryId] = release;
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
return Object.values(releasesBySiteIdAndEntryId)
|
||||
.map(siteReleases => Object.values(siteReleases))
|
||||
.flat();
|
||||
return Object.values(releasesBySiteIdAndEntryId)
|
||||
.map(siteReleases => Object.values(siteReleases))
|
||||
.flat();
|
||||
}
|
||||
|
||||
async function filterDuplicateReleases(releases) {
|
||||
const internalUniqueReleases = filterInternalDuplicateReleases(releases);
|
||||
const internalUniqueReleases = filterInternalDuplicateReleases(releases);
|
||||
|
||||
const duplicateReleaseEntries = await knex('releases')
|
||||
.whereIn(['entry_id', 'site_id'], internalUniqueReleases.map(release => [release.entryId, release.site.id]));
|
||||
const duplicateReleaseEntries = await knex('releases')
|
||||
.whereIn(['entry_id', 'site_id'], internalUniqueReleases.map(release => [release.entryId, release.site.id]));
|
||||
|
||||
const duplicateReleasesBySiteIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => {
|
||||
if (!acc[release.site_id]) acc[release.site_id] = {};
|
||||
acc[release.site_id][release.entry_id] = true;
|
||||
const duplicateReleasesBySiteIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => {
|
||||
if (!acc[release.site_id]) acc[release.site_id] = {};
|
||||
acc[release.site_id][release.entry_id] = true;
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
const duplicateReleases = internalUniqueReleases.filter(release => duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]);
|
||||
const uniqueReleases = internalUniqueReleases.filter(release => !duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]);
|
||||
const duplicateReleases = internalUniqueReleases.filter(release => duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]);
|
||||
const uniqueReleases = internalUniqueReleases.filter(release => !duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]);
|
||||
|
||||
return {
|
||||
uniqueReleases,
|
||||
duplicateReleases,
|
||||
duplicateReleaseEntries,
|
||||
};
|
||||
return {
|
||||
uniqueReleases,
|
||||
duplicateReleases,
|
||||
duplicateReleaseEntries,
|
||||
};
|
||||
}
|
||||
|
||||
async function updateReleasesSearch(releaseIds) {
|
||||
logger.info(`Updating search documents for ${releaseIds ? releaseIds.length : 'all' } releases`);
|
||||
logger.info(`Updating search documents for ${releaseIds ? releaseIds.length : 'all' } releases`);
|
||||
|
||||
const documents = await knex.raw(`
|
||||
const documents = await knex.raw(`
|
||||
SELECT
|
||||
releases.id AS release_id,
|
||||
TO_TSVECTOR(
|
||||
@@ -190,45 +199,49 @@ async function updateReleasesSearch(releaseIds) {
|
||||
GROUP BY releases.id, sites.name, sites.slug, sites.alias, sites.url, networks.name, networks.slug, networks.url;
|
||||
`, releaseIds && [releaseIds]);
|
||||
|
||||
if (documents.rows?.length > 0) {
|
||||
const query = knex('releases_search').insert(documents.rows).toString();
|
||||
await knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
|
||||
}
|
||||
if (documents.rows?.length > 0) {
|
||||
const query = knex('releases_search').insert(documents.rows).toString();
|
||||
await knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
|
||||
}
|
||||
}
|
||||
|
||||
async function storeReleases(releases) {
|
||||
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
|
||||
if (releases.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const releasesWithSites = await attachChannelSites(releases);
|
||||
const releasesWithStudios = await attachStudios(releasesWithSites);
|
||||
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
|
||||
|
||||
// uniqueness is site ID + entry ID, filter uniques after adding sites
|
||||
const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios);
|
||||
const releasesWithSites = await attachChannelSites(releases);
|
||||
const releasesWithStudios = await attachStudios(releasesWithSites);
|
||||
|
||||
const curatedNewReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId));
|
||||
// uniqueness is site ID + entry ID, filter uniques after adding sites
|
||||
const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios);
|
||||
|
||||
const storedReleases = await knex('releases').insert(curatedNewReleaseEntries).returning('*');
|
||||
// TODO: update duplicate releases
|
||||
const curatedNewReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId));
|
||||
|
||||
const storedReleaseEntries = Array.isArray(storedReleases) ? storedReleases : [];
|
||||
const releasesWithId = attachReleaseIds([].concat(uniqueReleases, duplicateReleases), [].concat(storedReleaseEntries, duplicateReleaseEntries));
|
||||
const storedReleases = await knex('releases').insert(curatedNewReleaseEntries).returning('*');
|
||||
// TODO: update duplicate releases
|
||||
|
||||
await Promise.all([
|
||||
associateActors(releasesWithId, batchId),
|
||||
associateReleaseTags(releasesWithId),
|
||||
]);
|
||||
const storedReleaseEntries = Array.isArray(storedReleases) ? storedReleases : [];
|
||||
const releasesWithId = attachReleaseIds([].concat(uniqueReleases, duplicateReleases), [].concat(storedReleaseEntries, duplicateReleaseEntries));
|
||||
|
||||
// media is more error-prone, associate separately
|
||||
await associateReleaseMedia(releasesWithId);
|
||||
await Promise.all([
|
||||
associateActors(releasesWithId, batchId),
|
||||
associateReleaseTags(releasesWithId),
|
||||
]);
|
||||
|
||||
logger.info(`Stored ${storedReleaseEntries.length} releases`);
|
||||
// media is more error-prone, associate separately
|
||||
await associateReleaseMedia(releasesWithId);
|
||||
|
||||
await updateReleasesSearch(releasesWithId.map(release => release.id));
|
||||
logger.info(`Stored ${storedReleaseEntries.length} releases`);
|
||||
|
||||
return releasesWithId;
|
||||
await updateReleasesSearch(releasesWithId.map(release => release.id));
|
||||
|
||||
return releasesWithId;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
storeReleases,
|
||||
updateReleasesSearch,
|
||||
storeReleases,
|
||||
updateReleasesSearch,
|
||||
};
|
||||
|
||||
@@ -5,106 +5,106 @@ const knex = require('./knex');
|
||||
const whereOr = require('./utils/where-or');
|
||||
|
||||
async function curateTag(tag) {
|
||||
const [aliases, media] = await Promise.all([
|
||||
knex('tags').where({ alias_for: tag.id }),
|
||||
knex('media')
|
||||
.where('domain', 'tags')
|
||||
.andWhere('target_id', tag.id)
|
||||
.orderBy('index'),
|
||||
]);
|
||||
const [aliases, media] = await Promise.all([
|
||||
knex('tags').where({ alias_for: tag.id }),
|
||||
knex('media')
|
||||
.where('domain', 'tags')
|
||||
.andWhere('target_id', tag.id)
|
||||
.orderBy('index'),
|
||||
]);
|
||||
|
||||
return {
|
||||
id: tag.id,
|
||||
name: tag.name,
|
||||
slug: tag.slug,
|
||||
description: tag.description,
|
||||
poster: media.find(photo => photo.role === 'poster'),
|
||||
photos: media.filter(photo => photo.role === 'photo'),
|
||||
group: {
|
||||
id: tag.group_id,
|
||||
name: tag.group_name,
|
||||
description: tag.group_description,
|
||||
slug: tag.group_slug,
|
||||
},
|
||||
aliases: aliases.map(({ name }) => name),
|
||||
};
|
||||
return {
|
||||
id: tag.id,
|
||||
name: tag.name,
|
||||
slug: tag.slug,
|
||||
description: tag.description,
|
||||
poster: media.find(photo => photo.role === 'poster'),
|
||||
photos: media.filter(photo => photo.role === 'photo'),
|
||||
group: {
|
||||
id: tag.group_id,
|
||||
name: tag.group_name,
|
||||
description: tag.group_description,
|
||||
slug: tag.group_slug,
|
||||
},
|
||||
aliases: aliases.map(({ name }) => name),
|
||||
};
|
||||
}
|
||||
|
||||
function curateTags(tags) {
|
||||
return Promise.all(tags.map(async tag => curateTag(tag)));
|
||||
return Promise.all(tags.map(async tag => curateTag(tag)));
|
||||
}
|
||||
|
||||
async function matchTags(rawTags) {
|
||||
const filteredTags = rawTags.filter(Boolean);
|
||||
const filteredTags = rawTags.filter(Boolean);
|
||||
|
||||
const tags = filteredTags
|
||||
.concat(filteredTags.map(tag => tag.toLowerCase()))
|
||||
.concat(filteredTags.map(tag => tag.toUpperCase()));
|
||||
const tags = filteredTags
|
||||
.concat(filteredTags.map(tag => tag.toLowerCase()))
|
||||
.concat(filteredTags.map(tag => tag.toUpperCase()));
|
||||
|
||||
const tagEntries = await knex('tags')
|
||||
.pluck('aliases.id')
|
||||
.whereIn('tags.name', tags)
|
||||
.leftJoin('tags as aliases', function join() {
|
||||
this
|
||||
.on('tags.alias_for', 'aliases.id')
|
||||
.orOn('tags.id', 'aliases.id');
|
||||
})
|
||||
.where(function where() {
|
||||
this
|
||||
.whereNull('tags.alias_for')
|
||||
.orWhereNull('aliases.alias_for');
|
||||
})
|
||||
.groupBy('aliases.id');
|
||||
const tagEntries = await knex('tags')
|
||||
.pluck('aliases.id')
|
||||
.whereIn('tags.name', tags)
|
||||
.leftJoin('tags as aliases', function join() {
|
||||
this
|
||||
.on('tags.alias_for', 'aliases.id')
|
||||
.orOn('tags.id', 'aliases.id');
|
||||
})
|
||||
.where(function where() {
|
||||
this
|
||||
.whereNull('tags.alias_for')
|
||||
.orWhereNull('aliases.alias_for');
|
||||
})
|
||||
.groupBy('aliases.id');
|
||||
|
||||
return tagEntries;
|
||||
return tagEntries;
|
||||
}
|
||||
|
||||
async function associateTags(release, releaseId) {
|
||||
const siteTags = release.site?.tags?.filter(tag => tag.inherit === true).map(tag => tag.id) || [];
|
||||
const siteTags = release.site?.tags?.filter(tag => tag.inherit === true).map(tag => tag.id) || [];
|
||||
|
||||
const rawReleaseTags = release.tags?.filter(Boolean) || [];
|
||||
const releaseTags = rawReleaseTags.some(tag => typeof tag === 'string')
|
||||
? await matchTags(release.tags) // scraper returned raw tags
|
||||
: rawReleaseTags; // tags already matched by (outdated) scraper
|
||||
const rawReleaseTags = release.tags?.filter(Boolean) || [];
|
||||
const releaseTags = rawReleaseTags.some(tag => typeof tag === 'string')
|
||||
? await matchTags(release.tags) // scraper returned raw tags
|
||||
: rawReleaseTags; // tags already matched by (outdated) scraper
|
||||
|
||||
const tags = Array.from(new Set(releaseTags.concat(siteTags)));
|
||||
const tags = Array.from(new Set(releaseTags.concat(siteTags)));
|
||||
|
||||
if (tags.length === 0) {
|
||||
logger.info(`No tags available for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
return;
|
||||
}
|
||||
if (tags.length === 0) {
|
||||
logger.info(`No tags available for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
return;
|
||||
}
|
||||
|
||||
const associationEntries = await knex('releases_tags')
|
||||
.where('release_id', releaseId)
|
||||
.whereIn('tag_id', tags);
|
||||
const associationEntries = await knex('releases_tags')
|
||||
.where('release_id', releaseId)
|
||||
.whereIn('tag_id', tags);
|
||||
|
||||
const existingAssociations = new Set(associationEntries.map(association => association.tag_id));
|
||||
const newAssociations = tags.filter(tagId => !existingAssociations.has(tagId));
|
||||
const existingAssociations = new Set(associationEntries.map(association => association.tag_id));
|
||||
const newAssociations = tags.filter(tagId => !existingAssociations.has(tagId));
|
||||
|
||||
await knex('releases_tags').insert(newAssociations.map(tagId => ({
|
||||
tag_id: tagId,
|
||||
release_id: releaseId,
|
||||
})));
|
||||
await knex('releases_tags').insert(newAssociations.map(tagId => ({
|
||||
tag_id: tagId,
|
||||
release_id: releaseId,
|
||||
})));
|
||||
}
|
||||
|
||||
async function fetchTags(queryObject, groupsQueryObject, limit = 100) {
|
||||
const tags = await knex('tags')
|
||||
.where(builder => whereOr(queryObject, 'tags', builder))
|
||||
.orWhere(builder => whereOr(groupsQueryObject, 'tags_groups', builder))
|
||||
.andWhere({ 'tags.alias_for': null })
|
||||
.select(
|
||||
'tags.*',
|
||||
'tags_groups.id as group_id', 'tags_groups.name as group_name', 'tags_groups.slug as group_slug', 'tags_groups.description as groups_description',
|
||||
)
|
||||
.leftJoin('tags_groups', 'tags.group_id', 'tags_groups.id')
|
||||
.orderBy('name')
|
||||
.limit(limit);
|
||||
const tags = await knex('tags')
|
||||
.where(builder => whereOr(queryObject, 'tags', builder))
|
||||
.orWhere(builder => whereOr(groupsQueryObject, 'tags_groups', builder))
|
||||
.andWhere({ 'tags.alias_for': null })
|
||||
.select(
|
||||
'tags.*',
|
||||
'tags_groups.id as group_id', 'tags_groups.name as group_name', 'tags_groups.slug as group_slug', 'tags_groups.description as groups_description',
|
||||
)
|
||||
.leftJoin('tags_groups', 'tags.group_id', 'tags_groups.id')
|
||||
.orderBy('name')
|
||||
.limit(limit);
|
||||
|
||||
return curateTags(tags);
|
||||
return curateTags(tags);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
associateTags,
|
||||
fetchTags,
|
||||
matchTags,
|
||||
associateTags,
|
||||
fetchTags,
|
||||
matchTags,
|
||||
};
|
||||
|
||||
132
src/tags.js
132
src/tags.js
@@ -4,104 +4,104 @@ const knex = require('./knex');
|
||||
const slugify = require('./utils/slugify');
|
||||
|
||||
async function matchReleaseTags(releases) {
|
||||
const rawTags = releases
|
||||
.map(release => release.tags).flat()
|
||||
.filter(Boolean);
|
||||
const rawTags = releases
|
||||
.map(release => release.tags).flat()
|
||||
.filter(Boolean);
|
||||
|
||||
const casedTags = [...new Set(
|
||||
rawTags
|
||||
.concat(rawTags.map(tag => tag.toLowerCase()))
|
||||
.concat(rawTags.map(tag => tag.toUpperCase())),
|
||||
)];
|
||||
const casedTags = [...new Set(
|
||||
rawTags
|
||||
.concat(rawTags.map(tag => tag.toLowerCase()))
|
||||
.concat(rawTags.map(tag => tag.toUpperCase())),
|
||||
)];
|
||||
|
||||
const tagEntries = await knex('tags')
|
||||
.select('tags.id', 'tags.name', 'tags.alias_for')
|
||||
.whereIn('tags.name', casedTags);
|
||||
const tagEntries = await knex('tags')
|
||||
.select('tags.id', 'tags.name', 'tags.alias_for')
|
||||
.whereIn('tags.name', casedTags);
|
||||
|
||||
const tagIdsBySlug = tagEntries
|
||||
.reduce((acc, tag) => ({
|
||||
...acc,
|
||||
[slugify(tag.name)]: tag.alias_for || tag.id,
|
||||
}), {});
|
||||
const tagIdsBySlug = tagEntries
|
||||
.reduce((acc, tag) => ({
|
||||
...acc,
|
||||
[slugify(tag.name)]: tag.alias_for || tag.id,
|
||||
}), {});
|
||||
|
||||
return tagIdsBySlug;
|
||||
return tagIdsBySlug;
|
||||
}
|
||||
|
||||
async function getSiteTags(releases) {
|
||||
const siteIds = releases.map(release => release.site.id);
|
||||
const siteTags = await knex('sites_tags').whereIn('site_id', siteIds);
|
||||
const siteIds = releases.map(release => release.site.id);
|
||||
const siteTags = await knex('sites_tags').whereIn('site_id', siteIds);
|
||||
|
||||
const siteTagIdsBySiteId = siteTags.reduce((acc, siteTag) => {
|
||||
if (!acc[siteTag.site_id]) {
|
||||
acc[siteTag.site_id] = [];
|
||||
}
|
||||
const siteTagIdsBySiteId = siteTags.reduce((acc, siteTag) => {
|
||||
if (!acc[siteTag.site_id]) {
|
||||
acc[siteTag.site_id] = [];
|
||||
}
|
||||
|
||||
acc[siteTag.site_id].push(siteTag.tag_id);
|
||||
acc[siteTag.site_id].push(siteTag.tag_id);
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
return siteTagIdsBySiteId;
|
||||
return siteTagIdsBySiteId;
|
||||
}
|
||||
|
||||
function buildReleaseTagAssociations(releases, tagIdsBySlug, siteTagIdsBySiteId) {
|
||||
const tagAssociations = releases
|
||||
.map((release) => {
|
||||
const siteTagIds = siteTagIdsBySiteId[release.site.id];
|
||||
const releaseTags = release.tags || [];
|
||||
const tagAssociations = releases
|
||||
.map((release) => {
|
||||
const siteTagIds = siteTagIdsBySiteId[release.site.id];
|
||||
const releaseTags = release.tags || [];
|
||||
|
||||
const releaseTagIds = releaseTags.every(tag => typeof tag === 'number')
|
||||
? releaseTags // obsolete scraper returned pre-matched tags
|
||||
: releaseTags.map(tag => tagIdsBySlug[slugify(tag)]);
|
||||
const releaseTagIds = releaseTags.every(tag => typeof tag === 'number')
|
||||
? releaseTags // obsolete scraper returned pre-matched tags
|
||||
: releaseTags.map(tag => tagIdsBySlug[slugify(tag)]);
|
||||
|
||||
const tags = [...new Set(
|
||||
// filter duplicates and empties
|
||||
releaseTagIds
|
||||
.concat(siteTagIds)
|
||||
.filter(Boolean),
|
||||
)]
|
||||
.map(tagId => ({
|
||||
release_id: release.id,
|
||||
tag_id: tagId,
|
||||
}));
|
||||
const tags = [...new Set(
|
||||
// filter duplicates and empties
|
||||
releaseTagIds
|
||||
.concat(siteTagIds)
|
||||
.filter(Boolean),
|
||||
)]
|
||||
.map(tagId => ({
|
||||
release_id: release.id,
|
||||
tag_id: tagId,
|
||||
}));
|
||||
|
||||
return tags;
|
||||
})
|
||||
.flat();
|
||||
return tags;
|
||||
})
|
||||
.flat();
|
||||
|
||||
return tagAssociations;
|
||||
return tagAssociations;
|
||||
}
|
||||
|
||||
async function filterUniqueAssociations(tagAssociations) {
|
||||
const duplicateAssociations = await knex('releases_tags')
|
||||
.whereIn(['release_id', 'tag_id'], tagAssociations.map(association => [association.release_id, association.tag_id]));
|
||||
const duplicateAssociations = await knex('releases_tags')
|
||||
.whereIn(['release_id', 'tag_id'], tagAssociations.map(association => [association.release_id, association.tag_id]));
|
||||
|
||||
const duplicateAssociationsByReleaseIdAndTagId = duplicateAssociations.reduce((acc, association) => {
|
||||
if (!acc[association.release_id]) {
|
||||
acc[association.release_id] = {};
|
||||
}
|
||||
const duplicateAssociationsByReleaseIdAndTagId = duplicateAssociations.reduce((acc, association) => {
|
||||
if (!acc[association.release_id]) {
|
||||
acc[association.release_id] = {};
|
||||
}
|
||||
|
||||
acc[association.release_id][association.tag_id] = true;
|
||||
acc[association.release_id][association.tag_id] = true;
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
const uniqueAssociations = tagAssociations
|
||||
.filter(association => !duplicateAssociationsByReleaseIdAndTagId[association.release_id]?.[association.tag_id]);
|
||||
const uniqueAssociations = tagAssociations
|
||||
.filter(association => !duplicateAssociationsByReleaseIdAndTagId[association.release_id]?.[association.tag_id]);
|
||||
|
||||
return uniqueAssociations;
|
||||
return uniqueAssociations;
|
||||
}
|
||||
|
||||
async function associateReleaseTags(releases) {
|
||||
const tagIdsBySlug = await matchReleaseTags(releases);
|
||||
const siteTagIdsBySiteId = await getSiteTags(releases);
|
||||
const tagIdsBySlug = await matchReleaseTags(releases);
|
||||
const siteTagIdsBySiteId = await getSiteTags(releases);
|
||||
|
||||
const tagAssociations = buildReleaseTagAssociations(releases, tagIdsBySlug, siteTagIdsBySiteId);
|
||||
const uniqueAssociations = await filterUniqueAssociations(tagAssociations);
|
||||
const tagAssociations = buildReleaseTagAssociations(releases, tagIdsBySlug, siteTagIdsBySiteId);
|
||||
const uniqueAssociations = await filterUniqueAssociations(tagAssociations);
|
||||
|
||||
await knex('releases_tags').insert(uniqueAssociations);
|
||||
await knex('releases_tags').insert(uniqueAssociations);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
associateReleaseTags,
|
||||
associateReleaseTags,
|
||||
};
|
||||
|
||||
304
src/updates.js
304
src/updates.js
@@ -11,228 +11,228 @@ const scrapers = require('./scrapers/scrapers');
|
||||
const { fetchSitesFromArgv, fetchSitesFromConfig } = require('./sites');
|
||||
|
||||
const afterDate = (() => {
|
||||
if (/\d{2,4}-\d{2}-\d{2,4}/.test(argv.after)) {
|
||||
// using date
|
||||
return moment
|
||||
.utc(argv.after, ['YYYY-MM-DD', 'DD-MM-YYYY'])
|
||||
.toDate();
|
||||
}
|
||||
if (/\d{2,4}-\d{2}-\d{2,4}/.test(argv.after)) {
|
||||
// using date
|
||||
return moment
|
||||
.utc(argv.after, ['YYYY-MM-DD', 'DD-MM-YYYY'])
|
||||
.toDate();
|
||||
}
|
||||
|
||||
// using time distance (e.g. "1 month")
|
||||
return moment
|
||||
.utc()
|
||||
.subtract(...argv.after.split(' '))
|
||||
.toDate();
|
||||
// using time distance (e.g. "1 month")
|
||||
return moment
|
||||
.utc()
|
||||
.subtract(...argv.after.split(' '))
|
||||
.toDate();
|
||||
})();
|
||||
|
||||
async function filterUniqueReleases(latestReleases, accReleases) {
|
||||
const latestReleaseIdentifiers = latestReleases
|
||||
.map(release => [release.site.id, release.entryId]);
|
||||
const latestReleaseIdentifiers = latestReleases
|
||||
.map(release => [release.site.id, release.entryId]);
|
||||
|
||||
const duplicateReleases = await knex('releases')
|
||||
.whereIn(['site_id', 'entry_id'], latestReleaseIdentifiers);
|
||||
const duplicateReleases = await knex('releases')
|
||||
.whereIn(['site_id', 'entry_id'], latestReleaseIdentifiers);
|
||||
|
||||
// add entry IDs of accumulated releases to prevent an infinite scrape loop
|
||||
// when one page contains the same release as the previous
|
||||
const duplicateReleasesSiteIdAndEntryIds = duplicateReleases
|
||||
.concat(accReleases)
|
||||
.reduce((acc, release) => {
|
||||
const siteId = release.site_id || release.site.id;
|
||||
const entryId = release.entry_id || release.entryId;
|
||||
// add entry IDs of accumulated releases to prevent an infinite scrape loop
|
||||
// when one page contains the same release as the previous
|
||||
const duplicateReleasesSiteIdAndEntryIds = duplicateReleases
|
||||
.concat(accReleases)
|
||||
.reduce((acc, release) => {
|
||||
const siteId = release.site_id || release.site.id;
|
||||
const entryId = release.entry_id || release.entryId;
|
||||
|
||||
if (!acc[siteId]) acc[siteId] = {};
|
||||
acc[siteId][entryId] = true;
|
||||
if (!acc[siteId]) acc[siteId] = {};
|
||||
acc[siteId][entryId] = true;
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
const uniqueReleases = latestReleases
|
||||
.filter(release => !duplicateReleasesSiteIdAndEntryIds[release.site.id]?.[release.entryId]);
|
||||
const uniqueReleases = latestReleases
|
||||
.filter(release => !duplicateReleasesSiteIdAndEntryIds[release.site.id]?.[release.entryId]);
|
||||
|
||||
return uniqueReleases;
|
||||
return uniqueReleases;
|
||||
}
|
||||
|
||||
function needNextPage(uniqueReleases, pageAccReleases) {
|
||||
if (uniqueReleases.length === 0) {
|
||||
return false;
|
||||
}
|
||||
if (uniqueReleases.length === 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (argv.last && pageAccReleases.length < argv.last) {
|
||||
// request for last N releases not yet satisfied
|
||||
return true;
|
||||
}
|
||||
if (argv.last && pageAccReleases.length < argv.last) {
|
||||
// request for last N releases not yet satisfied
|
||||
return true;
|
||||
}
|
||||
|
||||
if (uniqueReleases.every(release => !!release.date)) {
|
||||
const oldestReleaseOnPage = uniqueReleases
|
||||
.sort((releaseA, releaseB) => releaseB.date - releaseA.date)
|
||||
.slice(-1)[0];
|
||||
if (uniqueReleases.every(release => !!release.date)) {
|
||||
const oldestReleaseOnPage = uniqueReleases
|
||||
.sort((releaseA, releaseB) => releaseB.date - releaseA.date)
|
||||
.slice(-1)[0];
|
||||
|
||||
if (moment(oldestReleaseOnPage.date).isAfter(afterDate)) {
|
||||
// oldest release on page is newer than the specified date cut-off
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (moment(oldestReleaseOnPage.date).isAfter(afterDate)) {
|
||||
// oldest release on page is newer than the specified date cut-off
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// dates missing, and limit for scenes without dates not yet reached
|
||||
return pageAccReleases.length <= argv.nullDateLimit;
|
||||
// dates missing, and limit for scenes without dates not yet reached
|
||||
return pageAccReleases.length <= argv.nullDateLimit;
|
||||
}
|
||||
|
||||
async function scrapeReleases(scraper, site, preData, upcoming = false) {
|
||||
const scrapePage = async (page = 1, accReleases = []) => {
|
||||
const latestReleases = upcoming
|
||||
? await scraper.fetchUpcoming(site, page, preData, include)
|
||||
: await scraper.fetchLatest(site, page, preData, include);
|
||||
const scrapePage = async (page = 1, accReleases = []) => {
|
||||
const latestReleases = upcoming
|
||||
? await scraper.fetchUpcoming(site, page, preData, include)
|
||||
: await scraper.fetchLatest(site, page, preData, include);
|
||||
|
||||
if (!Array.isArray(latestReleases)) {
|
||||
// scraper is unable to fetch the releases and returned a HTTP code or null
|
||||
logger.warn(`Scraper returned ${latestReleases} when fetching latest from '${site.name}' (${site.network.name})`);
|
||||
return accReleases;
|
||||
}
|
||||
if (!Array.isArray(latestReleases)) {
|
||||
// scraper is unable to fetch the releases and returned a HTTP code or null
|
||||
logger.warn(`Scraper returned ${latestReleases} when fetching latest from '${site.name}' (${site.network.name})`);
|
||||
return accReleases;
|
||||
}
|
||||
|
||||
if (latestReleases.length === 0) {
|
||||
// scraper successfully requested releases, but found none
|
||||
return accReleases;
|
||||
}
|
||||
if (latestReleases.length === 0) {
|
||||
// scraper successfully requested releases, but found none
|
||||
return accReleases;
|
||||
}
|
||||
|
||||
const latestReleasesWithSite = latestReleases.map(release => ({ ...release, site: release.site || site })); // attach site release is assigned to when stored
|
||||
const latestReleasesWithSite = latestReleases.map(release => ({ ...release, site: release.site || site })); // attach site release is assigned to when stored
|
||||
|
||||
const uniqueReleases = argv.redownload
|
||||
? latestReleasesWithSite
|
||||
: await filterUniqueReleases(latestReleasesWithSite, accReleases);
|
||||
const uniqueReleases = argv.redownload
|
||||
? latestReleasesWithSite
|
||||
: await filterUniqueReleases(latestReleasesWithSite, accReleases);
|
||||
|
||||
const pageAccReleases = accReleases.concat(uniqueReleases);
|
||||
const pageAccReleases = accReleases.concat(uniqueReleases);
|
||||
|
||||
logger.verbose(`Scraped '${site.name}' (${site.network.name}) ${upcoming ? 'upcoming' : 'latest'} page ${page}, found ${uniqueReleases.length} unique updates`);
|
||||
logger.verbose(`Scraped '${site.name}' (${site.network.name}) ${upcoming ? 'upcoming' : 'latest'} page ${page}, found ${uniqueReleases.length} unique updates`);
|
||||
|
||||
if (needNextPage(uniqueReleases, pageAccReleases)) {
|
||||
return scrapePage(page + 1, pageAccReleases);
|
||||
}
|
||||
if (needNextPage(uniqueReleases, pageAccReleases)) {
|
||||
return scrapePage(page + 1, pageAccReleases);
|
||||
}
|
||||
|
||||
return pageAccReleases;
|
||||
};
|
||||
return pageAccReleases;
|
||||
};
|
||||
|
||||
const rawReleases = await scrapePage(argv.page || 1, []);
|
||||
const releases = upcoming
|
||||
? rawReleases.map(rawRelease => ({ ...rawRelease, upcoming: true }))
|
||||
: rawReleases;
|
||||
const rawReleases = await scrapePage(argv.page || 1, []);
|
||||
const releases = upcoming
|
||||
? rawReleases.map(rawRelease => ({ ...rawRelease, upcoming: true }))
|
||||
: rawReleases;
|
||||
|
||||
if (argv.last) {
|
||||
return releases.slice(0, argv.last);
|
||||
}
|
||||
if (argv.last) {
|
||||
return releases.slice(0, argv.last);
|
||||
}
|
||||
|
||||
if (releases.every(release => release.date)) {
|
||||
return releases.filter(release => moment(release.date).isAfter(afterDate));
|
||||
}
|
||||
if (releases.every(release => release.date)) {
|
||||
return releases.filter(release => moment(release.date).isAfter(afterDate));
|
||||
}
|
||||
|
||||
return releases.slice(0, argv.nullDateLimit);
|
||||
return releases.slice(0, argv.nullDateLimit);
|
||||
}
|
||||
|
||||
async function scrapeLatestReleases(scraper, site, preData) {
|
||||
if (!scraper.fetchLatest) {
|
||||
return [];
|
||||
}
|
||||
if (!scraper.fetchLatest) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
return await scrapeReleases(scraper, site, preData, false);
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to scrape latest updates for '${site.slug}' (${site.network.slug}): ${error.message}`);
|
||||
}
|
||||
try {
|
||||
return await scrapeReleases(scraper, site, preData, false);
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to scrape latest updates for '${site.slug}' (${site.network.slug}): ${error.message}`);
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
async function scrapeUpcomingReleases(scraper, site, preData) {
|
||||
if (!scraper.fetchUpcoming) {
|
||||
return [];
|
||||
}
|
||||
if (!scraper.fetchUpcoming) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
return await scrapeReleases(scraper, site, preData, true);
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to scrape upcoming updates for '${site.slug}' (${site.network.slug}): ${error.message}`);
|
||||
}
|
||||
try {
|
||||
return await scrapeReleases(scraper, site, preData, true);
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to scrape upcoming updates for '${site.slug}' (${site.network.slug}): ${error.message}`);
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
async function scrapeSiteReleases(scraper, site, preData) {
|
||||
const [latestReleases, upcomingReleases] = await Promise.all([
|
||||
argv.latest
|
||||
? scrapeLatestReleases(scraper, site, preData)
|
||||
: [],
|
||||
argv.upcoming
|
||||
? scrapeUpcomingReleases(scraper, site, preData)
|
||||
: [],
|
||||
]);
|
||||
const [latestReleases, upcomingReleases] = await Promise.all([
|
||||
argv.latest
|
||||
? scrapeLatestReleases(scraper, site, preData)
|
||||
: [],
|
||||
argv.upcoming
|
||||
? scrapeUpcomingReleases(scraper, site, preData)
|
||||
: [],
|
||||
]);
|
||||
|
||||
logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${site.name}' (${site.network.name})`);
|
||||
logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${site.name}' (${site.network.name})`);
|
||||
|
||||
return [...latestReleases, ...upcomingReleases];
|
||||
return [...latestReleases, ...upcomingReleases];
|
||||
}
|
||||
|
||||
async function scrapeSite(site, accSiteReleases) {
|
||||
const scraper = scrapers.releases[site.slug]
|
||||
const scraper = scrapers.releases[site.slug]
|
||||
|| scrapers.releases[site.network.slug]
|
||||
|| scrapers.releases[site.network.parent?.slug];
|
||||
|
||||
if (!scraper) {
|
||||
logger.warn(`No scraper found for '${site.name}' (${site.network.name})`);
|
||||
return [];
|
||||
}
|
||||
if (!scraper) {
|
||||
logger.warn(`No scraper found for '${site.name}' (${site.network.name})`);
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
const beforeFetchLatest = await scraper.beforeFetchLatest?.(site);
|
||||
try {
|
||||
const beforeFetchLatest = await scraper.beforeFetchLatest?.(site);
|
||||
|
||||
const siteReleases = await scrapeSiteReleases(scraper, site, {
|
||||
accSiteReleases,
|
||||
beforeFetchLatest,
|
||||
});
|
||||
const siteReleases = await scrapeSiteReleases(scraper, site, {
|
||||
accSiteReleases,
|
||||
beforeFetchLatest,
|
||||
});
|
||||
|
||||
return siteReleases.map(release => ({ ...release, site }));
|
||||
} catch (error) {
|
||||
logger.error(`Failed to scrape releases from ${site.name} using ${scraper.slug}: ${error.message}`);
|
||||
return siteReleases.map(release => ({ ...release, site }));
|
||||
} catch (error) {
|
||||
logger.error(`Failed to scrape releases from ${site.name} using ${scraper.slug}: ${error.message}`);
|
||||
|
||||
return [];
|
||||
}
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function scrapeNetworkSequential(network) {
|
||||
return Promise.reduce(
|
||||
network.sites,
|
||||
async (chain, site) => {
|
||||
const accSiteReleases = await chain;
|
||||
const siteReleases = await scrapeSite(site, network, accSiteReleases);
|
||||
return Promise.reduce(
|
||||
network.sites,
|
||||
async (chain, site) => {
|
||||
const accSiteReleases = await chain;
|
||||
const siteReleases = await scrapeSite(site, network, accSiteReleases);
|
||||
|
||||
return accSiteReleases.concat(siteReleases);
|
||||
},
|
||||
Promise.resolve([]),
|
||||
);
|
||||
return accSiteReleases.concat(siteReleases);
|
||||
},
|
||||
Promise.resolve([]),
|
||||
);
|
||||
}
|
||||
|
||||
async function scrapeNetworkParallel(network) {
|
||||
return Promise.map(
|
||||
network.sites,
|
||||
async site => scrapeSite(site, network),
|
||||
{ concurrency: 3 },
|
||||
);
|
||||
return Promise.map(
|
||||
network.sites,
|
||||
async site => scrapeSite(site, network),
|
||||
{ concurrency: 3 },
|
||||
);
|
||||
}
|
||||
|
||||
async function fetchUpdates() {
|
||||
const includedNetworks = argv.sites || argv.networks
|
||||
? await fetchSitesFromArgv()
|
||||
: await fetchSitesFromConfig();
|
||||
const includedNetworks = argv.sites || argv.networks
|
||||
? await fetchSitesFromArgv()
|
||||
: await fetchSitesFromConfig();
|
||||
|
||||
const scrapedNetworks = await Promise.map(
|
||||
includedNetworks,
|
||||
async network => (network.parameters?.sequential
|
||||
? scrapeNetworkSequential(network)
|
||||
: scrapeNetworkParallel(network)),
|
||||
{ concurrency: 5 },
|
||||
);
|
||||
const scrapedNetworks = await Promise.map(
|
||||
includedNetworks,
|
||||
async network => (network.parameters?.sequential
|
||||
? scrapeNetworkSequential(network)
|
||||
: scrapeNetworkParallel(network)),
|
||||
{ concurrency: 5 },
|
||||
);
|
||||
|
||||
const releases = scrapedNetworks.flat(2);
|
||||
const releases = scrapedNetworks.flat(2);
|
||||
|
||||
return releases;
|
||||
return releases;
|
||||
}
|
||||
|
||||
module.exports = fetchUpdates;
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
'use strict';
|
||||
|
||||
function include(argv) {
|
||||
return {
|
||||
covers: argv.media && argv.covers,
|
||||
media: argv.media,
|
||||
photos: argv.media && argv.photos,
|
||||
poster: argv.media && argv.posters,
|
||||
posters: argv.media && argv.posters,
|
||||
releases: argv.withReleases,
|
||||
scenes: argv.withReleases,
|
||||
teaser: argv.media && argv.videos && argv.teasers,
|
||||
teasers: argv.media && argv.videos && argv.teasers,
|
||||
trailer: argv.media && argv.videos && argv.trailers,
|
||||
trailers: argv.media && argv.videos && argv.trailers,
|
||||
videos: argv.videos,
|
||||
};
|
||||
return {
|
||||
covers: argv.media && argv.covers,
|
||||
media: argv.media,
|
||||
photos: argv.media && argv.photos,
|
||||
poster: argv.media && argv.posters,
|
||||
posters: argv.media && argv.posters,
|
||||
releases: argv.withReleases,
|
||||
scenes: argv.withReleases,
|
||||
teaser: argv.media && argv.videos && argv.teasers,
|
||||
teasers: argv.media && argv.videos && argv.teasers,
|
||||
trailer: argv.media && argv.videos && argv.trailers,
|
||||
trailers: argv.media && argv.videos && argv.trailers,
|
||||
videos: argv.videos,
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = include;
|
||||
|
||||
@@ -13,106 +13,106 @@ const file = 'https://speed.hetzner.de/100MB.bin';
|
||||
// const file = 'https://speed.hetzner.de/10GB.bin';
|
||||
|
||||
function getMemoryUsage() {
|
||||
return process.memoryUsage().rss / (10 ** 6);
|
||||
return process.memoryUsage().rss / (10 ** 6);
|
||||
}
|
||||
|
||||
const stats = {
|
||||
peakMemoryUsage: getMemoryUsage(),
|
||||
done: false,
|
||||
downloads: {},
|
||||
peakMemoryUsage: getMemoryUsage(),
|
||||
done: false,
|
||||
downloads: {},
|
||||
};
|
||||
|
||||
function render() {
|
||||
const downloads = Object.entries(stats.downloads);
|
||||
const downloads = Object.entries(stats.downloads);
|
||||
|
||||
process.stdout.clearScreenDown();
|
||||
process.stdout.clearScreenDown();
|
||||
|
||||
process.stdout.write(`peak memory: ${stats.peakMemoryUsage.toFixed(2)} MB\n`);
|
||||
process.stdout.write(`peak memory: ${stats.peakMemoryUsage.toFixed(2)} MB\n`);
|
||||
|
||||
downloads.forEach(([download, progress]) => {
|
||||
process.stdout.write(`${download}: ${progress}${typeof progress === 'string' ? '' : '%'}\n`);
|
||||
});
|
||||
downloads.forEach(([download, progress]) => {
|
||||
process.stdout.write(`${download}: ${progress}${typeof progress === 'string' ? '' : '%'}\n`);
|
||||
});
|
||||
|
||||
process.stdout.moveCursor(0, -(downloads.length + 1));
|
||||
process.stdout.cursorTo(0);
|
||||
process.stdout.moveCursor(0, -(downloads.length + 1));
|
||||
process.stdout.cursorTo(0);
|
||||
|
||||
if (downloads.length === 0 || !downloads.every(([_label, download]) => typeof download === 'string')) {
|
||||
setTimeout(() => render(), 1000);
|
||||
return;
|
||||
}
|
||||
if (downloads.length === 0 || !downloads.every(([_label, download]) => typeof download === 'string')) {
|
||||
setTimeout(() => render(), 1000);
|
||||
return;
|
||||
}
|
||||
|
||||
process.stdout.moveCursor(0, downloads.length + 1);
|
||||
process.stdout.moveCursor(0, downloads.length + 1);
|
||||
}
|
||||
|
||||
function setProgress(label, completedBytes, totalBytes, hash) {
|
||||
const memory = getMemoryUsage();
|
||||
const memory = getMemoryUsage();
|
||||
|
||||
stats.peakMemoryUsage = Math.max(memory, stats.peakMemoryUsage);
|
||||
stats.downloads[label] = hash || Math.round((completedBytes / totalBytes) * 100);
|
||||
stats.peakMemoryUsage = Math.max(memory, stats.peakMemoryUsage);
|
||||
stats.downloads[label] = hash || Math.round((completedBytes / totalBytes) * 100);
|
||||
}
|
||||
|
||||
async function buffered(label) {
|
||||
const hash = new blake2.Hash('blake2b');
|
||||
const hash = new blake2.Hash('blake2b');
|
||||
|
||||
const imageRes = await bhttp.get(file, {
|
||||
onDownloadProgress(completedBytes, totalBytes) {
|
||||
setProgress(label, completedBytes, totalBytes);
|
||||
},
|
||||
});
|
||||
const imageRes = await bhttp.get(file, {
|
||||
onDownloadProgress(completedBytes, totalBytes) {
|
||||
setProgress(label, completedBytes, totalBytes);
|
||||
},
|
||||
});
|
||||
|
||||
hash.update(imageRes.body);
|
||||
setProgress(label, null, null, hash.digest('hex'));
|
||||
hash.update(imageRes.body);
|
||||
setProgress(label, null, null, hash.digest('hex'));
|
||||
|
||||
await fsPromises.writeFile(`/mnt/stor/Pictures/traxxx/temp/buffered-${label}.bin`, imageRes.body);
|
||||
await fsPromises.writeFile(`/mnt/stor/Pictures/traxxx/temp/buffered-${label}.bin`, imageRes.body);
|
||||
}
|
||||
|
||||
async function streamed(label) {
|
||||
const hash = new blake2.Hash('blake2b');
|
||||
hash.setEncoding('hex');
|
||||
const hash = new blake2.Hash('blake2b');
|
||||
hash.setEncoding('hex');
|
||||
|
||||
const hashStream = new PassThrough();
|
||||
const targetStream = fs.createWriteStream(`/mnt/stor/Pictures/traxxx/temp/streamed-${label}.bin`);
|
||||
const hashStream = new PassThrough();
|
||||
const targetStream = fs.createWriteStream(`/mnt/stor/Pictures/traxxx/temp/streamed-${label}.bin`);
|
||||
|
||||
const imageRes = await bhttp.get(file, {
|
||||
stream: true,
|
||||
});
|
||||
const imageRes = await bhttp.get(file, {
|
||||
stream: true,
|
||||
});
|
||||
|
||||
const stream = imageRes
|
||||
.pipe(hashStream)
|
||||
.pipe(targetStream);
|
||||
const stream = imageRes
|
||||
.pipe(hashStream)
|
||||
.pipe(targetStream);
|
||||
|
||||
imageRes.on('progress', (completedBytes, totalBytes) => {
|
||||
setProgress(label, completedBytes, totalBytes);
|
||||
});
|
||||
imageRes.on('progress', (completedBytes, totalBytes) => {
|
||||
setProgress(label, completedBytes, totalBytes);
|
||||
});
|
||||
|
||||
hashStream.on('data', (chunk) => {
|
||||
hash.write(chunk);
|
||||
});
|
||||
hashStream.on('data', (chunk) => {
|
||||
hash.write(chunk);
|
||||
});
|
||||
|
||||
stream.on('finish', () => {
|
||||
hash.end();
|
||||
setProgress(label, null, null, hash.read());
|
||||
});
|
||||
stream.on('finish', () => {
|
||||
hash.end();
|
||||
setProgress(label, null, null, hash.read());
|
||||
});
|
||||
}
|
||||
|
||||
async function init() {
|
||||
const n = argv.n || 1;
|
||||
const n = argv.n || 1;
|
||||
|
||||
if (argv._.includes('stream')) {
|
||||
console.log('using streams');
|
||||
render();
|
||||
if (argv._.includes('stream')) {
|
||||
console.log('using streams');
|
||||
render();
|
||||
|
||||
await Promise.map(Array.from({ length: n }), async (value, index) => streamed(index + 1));
|
||||
await Promise.map(Array.from({ length: n }), async (value, index) => streamed(index + 1));
|
||||
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (argv._.includes('buffer')) {
|
||||
console.log('using buffers');
|
||||
render();
|
||||
if (argv._.includes('buffer')) {
|
||||
console.log('using buffers');
|
||||
render();
|
||||
|
||||
await Promise.map(Array.from({ length: n }), async (value, index) => buffered(index + 1));
|
||||
}
|
||||
await Promise.map(Array.from({ length: n }), async (value, index) => buffered(index + 1));
|
||||
}
|
||||
}
|
||||
|
||||
init();
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
'use strict';
|
||||
|
||||
function capitalize(string, trim = true) {
|
||||
if (!string) {
|
||||
return '';
|
||||
}
|
||||
if (!string) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const capitalized = string
|
||||
.split(/\s+/)
|
||||
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
|
||||
.join(' ');
|
||||
const capitalized = string
|
||||
.split(/\s+/)
|
||||
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
|
||||
.join(' ');
|
||||
|
||||
return trim ? capitalized.trim() : capitalized;
|
||||
return trim ? capitalized.trim() : capitalized;
|
||||
}
|
||||
|
||||
module.exports = capitalize;
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
'use strict';
|
||||
|
||||
function chunk(array, chunkSize) {
|
||||
return Array.from({ length: Math.ceil(array.length / chunkSize) })
|
||||
.map((value, index) => array.slice(index * chunkSize, (index * chunkSize) + chunkSize));
|
||||
return Array.from({ length: Math.ceil(array.length / chunkSize) })
|
||||
.map((value, index) => array.slice(index * chunkSize, (index * chunkSize) + chunkSize));
|
||||
}
|
||||
|
||||
module.exports = chunk;
|
||||
|
||||
@@ -1,48 +1,48 @@
|
||||
'use strict';
|
||||
|
||||
function inchesToCm(inches) {
|
||||
return Math.round(Number(inches) * 2.54);
|
||||
return Math.round(Number(inches) * 2.54);
|
||||
}
|
||||
|
||||
function feetInchesToCm(feet, inches) {
|
||||
if (typeof feet === 'string' && !inches) {
|
||||
const [feetPart, inchesPart] = feet.match(/\d+/g);
|
||||
return feetInchesToCm(feetPart, inchesPart);
|
||||
}
|
||||
if (typeof feet === 'string' && !inches) {
|
||||
const [feetPart, inchesPart] = feet.match(/\d+/g);
|
||||
return feetInchesToCm(feetPart, inchesPart);
|
||||
}
|
||||
|
||||
return Math.round((Number(feet) * 30.48) + (Number(inches) * 2.54));
|
||||
return Math.round((Number(feet) * 30.48) + (Number(inches) * 2.54));
|
||||
}
|
||||
|
||||
function cmToFeetInches(centimeters) {
|
||||
const feet = Math.floor(centimeters / 30.48);
|
||||
const inches = Math.round((centimeters / 2.54) % (feet * 12));
|
||||
const feet = Math.floor(centimeters / 30.48);
|
||||
const inches = Math.round((centimeters / 2.54) % (feet * 12));
|
||||
|
||||
return { feet, inches };
|
||||
return { feet, inches };
|
||||
}
|
||||
|
||||
function heightToCm(height) {
|
||||
const [feet, inches] = height.match(/\d+/g);
|
||||
const [feet, inches] = height.match(/\d+/g);
|
||||
|
||||
return feetInchesToCm(feet, inches);
|
||||
return feetInchesToCm(feet, inches);
|
||||
}
|
||||
|
||||
function lbsToKg(lbs) {
|
||||
const pounds = lbs.toString().match(/\d+/)[0];
|
||||
const pounds = lbs.toString().match(/\d+/)[0];
|
||||
|
||||
return Math.round(Number(pounds) * 0.453592);
|
||||
return Math.round(Number(pounds) * 0.453592);
|
||||
}
|
||||
|
||||
function kgToLbs(kgs) {
|
||||
const kilos = kgs.toString().match(/\d+/)[0];
|
||||
const kilos = kgs.toString().match(/\d+/)[0];
|
||||
|
||||
return Math.round(Number(kilos) / 0.453592);
|
||||
return Math.round(Number(kilos) / 0.453592);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
cmToFeetInches,
|
||||
feetInchesToCm,
|
||||
heightToCm,
|
||||
inchesToCm,
|
||||
lbsToKg,
|
||||
kgToLbs,
|
||||
cmToFeetInches,
|
||||
feetInchesToCm,
|
||||
heightToCm,
|
||||
inchesToCm,
|
||||
lbsToKg,
|
||||
kgToLbs,
|
||||
};
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
'use strict';
|
||||
|
||||
function cookieToData(cookieString) {
|
||||
return cookieString.split('; ').reduce((acc, cookie) => {
|
||||
const [key, value] = cookie.split('=');
|
||||
return cookieString.split('; ').reduce((acc, cookie) => {
|
||||
const [key, value] = cookie.split('=');
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[key]: value,
|
||||
};
|
||||
}, {});
|
||||
return {
|
||||
...acc,
|
||||
[key]: value,
|
||||
};
|
||||
}, {});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
cookieToData,
|
||||
cookieToData,
|
||||
};
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
function escapeHtml(text) {
|
||||
return text
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, ''');
|
||||
return text
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, ''');
|
||||
}
|
||||
|
||||
module.exports = escapeHtml;
|
||||
|
||||
@@ -11,107 +11,107 @@ const pipeline = util.promisify(stream.pipeline);
|
||||
const logger = require('../logger')(__filename);
|
||||
|
||||
const defaultHeaders = {
|
||||
'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1',
|
||||
'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1',
|
||||
};
|
||||
|
||||
const defaultOptions = {
|
||||
responseTimeout: 30000,
|
||||
responseTimeout: 30000,
|
||||
};
|
||||
|
||||
const proxyAgent = tunnel.httpsOverHttp({
|
||||
proxy: {
|
||||
host: config.proxy.host,
|
||||
port: config.proxy.port,
|
||||
},
|
||||
proxy: {
|
||||
host: config.proxy.host,
|
||||
port: config.proxy.port,
|
||||
},
|
||||
});
|
||||
|
||||
function useProxy(url) {
|
||||
if (!config.proxy.enable) {
|
||||
return false;
|
||||
}
|
||||
if (!config.proxy.enable) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const { hostname } = new URL(url);
|
||||
return config.proxy.hostnames.includes(hostname);
|
||||
const { hostname } = new URL(url);
|
||||
return config.proxy.hostnames.includes(hostname);
|
||||
}
|
||||
|
||||
const queue = taskQueue();
|
||||
|
||||
queue.on('concurrencyReached:http', () => {
|
||||
logger.silly('Queueing requests');
|
||||
logger.silly('Queueing requests');
|
||||
});
|
||||
|
||||
queue.define('http', async ({
|
||||
url,
|
||||
method = 'GET',
|
||||
body,
|
||||
headers = {},
|
||||
options = {},
|
||||
url,
|
||||
method = 'GET',
|
||||
body,
|
||||
headers = {},
|
||||
options = {},
|
||||
}) => {
|
||||
if (body) {
|
||||
logger.silly(`${method.toUpperCase()} ${url} with ${JSON.stringify(body)}`);
|
||||
} else {
|
||||
logger.silly(`${method.toUpperCase()} ${url}`);
|
||||
}
|
||||
if (body) {
|
||||
logger.silly(`${method.toUpperCase()} ${url} with ${JSON.stringify(body)}`);
|
||||
} else {
|
||||
logger.silly(`${method.toUpperCase()} ${url}`);
|
||||
}
|
||||
|
||||
const reqOptions = {
|
||||
headers: {
|
||||
...(options.defaultHeaders !== false && defaultHeaders),
|
||||
...headers,
|
||||
},
|
||||
...defaultOptions,
|
||||
...options,
|
||||
...(options.timeout && { responseTimeout: options.timeout }),
|
||||
};
|
||||
const reqOptions = {
|
||||
headers: {
|
||||
...(options.defaultHeaders !== false && defaultHeaders),
|
||||
...headers,
|
||||
},
|
||||
...defaultOptions,
|
||||
...options,
|
||||
...(options.timeout && { responseTimeout: options.timeout }),
|
||||
};
|
||||
|
||||
if (useProxy(url)) {
|
||||
reqOptions.agent = proxyAgent;
|
||||
}
|
||||
if (useProxy(url)) {
|
||||
reqOptions.agent = proxyAgent;
|
||||
}
|
||||
|
||||
const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())
|
||||
? await bhttp[method.toLowerCase()](url, body, reqOptions)
|
||||
: await bhttp[method.toLowerCase()](url, reqOptions);
|
||||
const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())
|
||||
? await bhttp[method.toLowerCase()](url, body, reqOptions)
|
||||
: await bhttp[method.toLowerCase()](url, reqOptions);
|
||||
|
||||
if (options.stream && options.destination) {
|
||||
await pipeline(res, ...(options.transforms || []), options.destination);
|
||||
}
|
||||
if (options.stream && options.destination) {
|
||||
await pipeline(res, ...(options.transforms || []), options.destination);
|
||||
}
|
||||
|
||||
const html = Buffer.isBuffer(res.body) ? res.body.toString() : null;
|
||||
const json = Buffer.isBuffer(res.body) ? null : res.body;
|
||||
const html = Buffer.isBuffer(res.body) ? res.body.toString() : null;
|
||||
const json = Buffer.isBuffer(res.body) ? null : res.body;
|
||||
|
||||
return {
|
||||
...res,
|
||||
originalRes: res,
|
||||
html,
|
||||
json,
|
||||
pipe: res.pipe,
|
||||
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
||||
code: res.statusCode,
|
||||
status: res.statusCode,
|
||||
};
|
||||
return {
|
||||
...res,
|
||||
originalRes: res,
|
||||
html,
|
||||
json,
|
||||
pipe: res.pipe,
|
||||
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
||||
code: res.statusCode,
|
||||
status: res.statusCode,
|
||||
};
|
||||
}, {
|
||||
concurrency: 20,
|
||||
concurrency: 20,
|
||||
});
|
||||
|
||||
async function get(url, headers, options) {
|
||||
return queue.push('http', {
|
||||
method: 'GET',
|
||||
url,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
return queue.push('http', {
|
||||
method: 'GET',
|
||||
url,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
async function post(url, body, headers, options) {
|
||||
return queue.push('http', {
|
||||
method: 'POST',
|
||||
url,
|
||||
body,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
return queue.push('http', {
|
||||
method: 'POST',
|
||||
url,
|
||||
body,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
get,
|
||||
post,
|
||||
get,
|
||||
post,
|
||||
};
|
||||
|
||||
@@ -7,12 +7,12 @@ const { argv } = require('yargs');
|
||||
const url = argv.url || 'http://localhost:5000/media/actors/tommy-pistol/1580341442712.jpeg';
|
||||
|
||||
async function scan() {
|
||||
console.log(url);
|
||||
console.log(url);
|
||||
|
||||
const res = await bhttp.get(url);
|
||||
const stats = await sharp(res.body).stats();
|
||||
const res = await bhttp.get(url);
|
||||
const stats = await sharp(res.body).stats();
|
||||
|
||||
console.log(stats);
|
||||
console.log(stats);
|
||||
}
|
||||
|
||||
scan();
|
||||
|
||||
@@ -4,33 +4,33 @@ const Promise = require('bluebird');
|
||||
const knex = require('../knex');
|
||||
|
||||
async function listSites() {
|
||||
const [networks, allSites] = await Promise.all([
|
||||
knex('networks').orderBy('name'),
|
||||
knex('sites').orderBy('name'),
|
||||
]);
|
||||
const [networks, allSites] = await Promise.all([
|
||||
knex('networks').orderBy('name'),
|
||||
knex('sites').orderBy('name'),
|
||||
]);
|
||||
|
||||
await Promise.each(networks, async (network) => {
|
||||
console.log(`* **${network.name}**`);
|
||||
await Promise.each(networks, async (network) => {
|
||||
console.log(`* **${network.name}**`);
|
||||
|
||||
const sites = await knex('sites')
|
||||
.where({ network_id: network.id })
|
||||
.orderBy('name');
|
||||
const sites = await knex('sites')
|
||||
.where({ network_id: network.id })
|
||||
.orderBy('name');
|
||||
|
||||
if (sites.length === 1 && sites[0].name === network.name) {
|
||||
return;
|
||||
}
|
||||
if (sites.length === 1 && sites[0].name === network.name) {
|
||||
return;
|
||||
}
|
||||
|
||||
sites.forEach((site) => {
|
||||
const rkSpecial = network.id === 'realitykings'
|
||||
sites.forEach((site) => {
|
||||
const rkSpecial = network.id === 'realitykings'
|
||||
&& (new URL(site.url).hostname === 'www.realitykings.com'
|
||||
|| (site.parameters?.altLayout))
|
||||
? '\\*' : ''; // Reality Kings alt layout sites do not support scene fetch by URL
|
||||
? '\\*' : ''; // Reality Kings alt layout sites do not support scene fetch by URL
|
||||
|
||||
console.log(` * ${site.name}${rkSpecial}`);
|
||||
});
|
||||
});
|
||||
console.log(` * ${site.name}${rkSpecial}`);
|
||||
});
|
||||
});
|
||||
|
||||
console.log(`${networks.length} networks with ${allSites.length} sites total`);
|
||||
console.log(`${networks.length} networks with ${allSites.length} sites total`);
|
||||
}
|
||||
|
||||
listSites();
|
||||
|
||||
@@ -12,99 +12,99 @@ const { PassThrough } = require('stream');
|
||||
const http = require('./http');
|
||||
|
||||
function getMemoryUsage() {
|
||||
return process.memoryUsage().rss / (10 ** 6);
|
||||
return process.memoryUsage().rss / (10 ** 6);
|
||||
}
|
||||
|
||||
let peakMemoryUsage = getMemoryUsage();
|
||||
|
||||
async function fetchSource(link) {
|
||||
const id = nanoid();
|
||||
const id = nanoid();
|
||||
|
||||
const hasher = new blake2.Hash('blake2b');
|
||||
hasher.setEncoding('hex');
|
||||
const hasher = new blake2.Hash('blake2b');
|
||||
hasher.setEncoding('hex');
|
||||
|
||||
const tempFilePath = `/home/niels/Pictures/thumbs/temp/${id}.jpeg`;
|
||||
const tempFileStream = fs.createWriteStream(tempFilePath);
|
||||
const hashStream = new PassThrough();
|
||||
const tempFilePath = `/home/niels/Pictures/thumbs/temp/${id}.jpeg`;
|
||||
const tempFileStream = fs.createWriteStream(tempFilePath);
|
||||
const hashStream = new PassThrough();
|
||||
|
||||
hashStream.on('data', chunk => hasher.write(chunk));
|
||||
hashStream.on('data', chunk => hasher.write(chunk));
|
||||
|
||||
try {
|
||||
const res = await http.get(link, null, {
|
||||
stream: true,
|
||||
transforms: [hashStream],
|
||||
destination: tempFileStream,
|
||||
timeout: 5000,
|
||||
});
|
||||
try {
|
||||
const res = await http.get(link, null, {
|
||||
stream: true,
|
||||
transforms: [hashStream],
|
||||
destination: tempFileStream,
|
||||
timeout: 5000,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(res.status);
|
||||
}
|
||||
if (!res.ok) {
|
||||
throw new Error(res.status);
|
||||
}
|
||||
|
||||
hasher.end();
|
||||
const hash = hasher.read();
|
||||
hasher.end();
|
||||
const hash = hasher.read();
|
||||
|
||||
const memoryUsage = getMemoryUsage();
|
||||
peakMemoryUsage = Math.max(memoryUsage, peakMemoryUsage);
|
||||
const memoryUsage = getMemoryUsage();
|
||||
peakMemoryUsage = Math.max(memoryUsage, peakMemoryUsage);
|
||||
|
||||
console.log(`Stored ${tempFilePath}, memory usage: ${memoryUsage.toFixed(2)} MB`);
|
||||
console.log(`Stored ${tempFilePath}, memory usage: ${memoryUsage.toFixed(2)} MB`);
|
||||
|
||||
return {
|
||||
id,
|
||||
path: tempFilePath,
|
||||
hash,
|
||||
};
|
||||
} catch (error) {
|
||||
await fsPromises.unlink(tempFilePath);
|
||||
return {
|
||||
id,
|
||||
path: tempFilePath,
|
||||
hash,
|
||||
};
|
||||
} catch (error) {
|
||||
await fsPromises.unlink(tempFilePath);
|
||||
|
||||
throw error;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async function init() {
|
||||
const linksFile = await fsPromises.readFile('/home/niels/Pictures/photos', 'utf8');
|
||||
const links = linksFile.split('\n').filter(Boolean);
|
||||
const linksFile = await fsPromises.readFile('/home/niels/Pictures/photos', 'utf8');
|
||||
const links = linksFile.split('\n').filter(Boolean);
|
||||
|
||||
await fsPromises.mkdir('/home/niels/Pictures/thumbs/temp', { recursive: true });
|
||||
await fsPromises.mkdir('/home/niels/Pictures/thumbs/temp', { recursive: true });
|
||||
|
||||
console.time('thumbs');
|
||||
console.time('thumbs');
|
||||
|
||||
const files = await Promise.map(links, async (link) => {
|
||||
try {
|
||||
return await fetchSource(link);
|
||||
} catch (error) {
|
||||
console.log(`Failed to fetch ${link}: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
const files = await Promise.map(links, async (link) => {
|
||||
try {
|
||||
return await fetchSource(link);
|
||||
} catch (error) {
|
||||
console.log(`Failed to fetch ${link}: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
|
||||
await Promise.map(files.filter(Boolean), async (file) => {
|
||||
const image = sharp(file.path).jpeg();
|
||||
await Promise.map(files.filter(Boolean), async (file) => {
|
||||
const image = sharp(file.path).jpeg();
|
||||
|
||||
const [{ width, height }, { size }] = await Promise.all([
|
||||
image.metadata(),
|
||||
fsPromises.stat(file.path),
|
||||
]);
|
||||
const [{ width, height }, { size }] = await Promise.all([
|
||||
image.metadata(),
|
||||
fsPromises.stat(file.path),
|
||||
]);
|
||||
|
||||
await Promise.all([
|
||||
image
|
||||
.toFile(`/home/niels/Pictures/thumbs/${file.hash}.jpeg`),
|
||||
image
|
||||
.resize({
|
||||
height: config.media.thumbnailSize,
|
||||
withoutEnlargement: true,
|
||||
})
|
||||
.toFile(`/home/niels/Pictures/thumbs/${file.hash}_thumb.jpeg`),
|
||||
]);
|
||||
await Promise.all([
|
||||
image
|
||||
.toFile(`/home/niels/Pictures/thumbs/${file.hash}.jpeg`),
|
||||
image
|
||||
.resize({
|
||||
height: config.media.thumbnailSize,
|
||||
withoutEnlargement: true,
|
||||
})
|
||||
.toFile(`/home/niels/Pictures/thumbs/${file.hash}_thumb.jpeg`),
|
||||
]);
|
||||
|
||||
const memoryUsage = getMemoryUsage();
|
||||
peakMemoryUsage = Math.max(memoryUsage, peakMemoryUsage);
|
||||
const memoryUsage = getMemoryUsage();
|
||||
peakMemoryUsage = Math.max(memoryUsage, peakMemoryUsage);
|
||||
|
||||
console.log(`Resized ${file.id} (${width}, ${height}, ${size}), memory usage: ${memoryUsage.toFixed(2)} MB`);
|
||||
}, { concurrency: 10 });
|
||||
console.log(`Resized ${file.id} (${width}, ${height}, ${size}), memory usage: ${memoryUsage.toFixed(2)} MB`);
|
||||
}, { concurrency: 10 });
|
||||
|
||||
console.log(`Peak memory usage: ${peakMemoryUsage.toFixed(2)} MB`);
|
||||
console.timeEnd('thumbs');
|
||||
console.log(`Peak memory usage: ${peakMemoryUsage.toFixed(2)} MB`);
|
||||
console.timeEnd('thumbs');
|
||||
}
|
||||
|
||||
init();
|
||||
|
||||
@@ -6,16 +6,16 @@ const bhttp = require('bhttp');
|
||||
const knex = require('../knex');
|
||||
|
||||
async function run() {
|
||||
const network = await knex('networks').where('slug', 'mofos').first();
|
||||
const sites = await knex('sites').where('network_id', network.id);
|
||||
const network = await knex('networks').where('slug', 'mofos').first();
|
||||
const sites = await knex('sites').where('network_id', network.id);
|
||||
|
||||
await Promise.map(sites, async (site) => {
|
||||
const res = await bhttp.get(site.url);
|
||||
await Promise.map(sites, async (site) => {
|
||||
const res = await bhttp.get(site.url);
|
||||
|
||||
console.log(site.url, res.statusCode);
|
||||
}, {
|
||||
concurrency: 5,
|
||||
});
|
||||
console.log(site.url, res.statusCode);
|
||||
}, {
|
||||
concurrency: 5,
|
||||
});
|
||||
}
|
||||
|
||||
run();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
function pickRandom(array) {
|
||||
return array[Math.floor(Math.random() * array.length)];
|
||||
return array[Math.floor(Math.random() * array.length)];
|
||||
}
|
||||
|
||||
module.exports = pickRandom;
|
||||
|
||||
@@ -9,32 +9,32 @@ const argv = require('../argv');
|
||||
const knex = require('../knex');
|
||||
|
||||
async function init() {
|
||||
const posters = await knex('actors')
|
||||
.select('actors.name as actor_name', 'releases.title', 'releases.date', 'media.path', 'media.index', 'sites.name as site_name', 'networks.name as network_name')
|
||||
.whereIn('actors.name', (argv.actors || []).concat(argv._))
|
||||
.join('releases_actors', 'releases_actors.actor_id', 'actors.id')
|
||||
.join('releases', 'releases_actors.release_id', 'releases.id')
|
||||
.join('sites', 'sites.id', 'releases.site_id')
|
||||
.join('networks', 'networks.id', 'sites.network_id')
|
||||
.join('releases_posters', 'releases_posters.release_id', 'releases.id')
|
||||
.join('media', 'releases_posters.media_id', 'media.id');
|
||||
// .join('releases_photos', 'releases_photos.release_id', 'releases.id')
|
||||
// .join('media', 'releases_photos.media_id', 'media.id');
|
||||
const posters = await knex('actors')
|
||||
.select('actors.name as actor_name', 'releases.title', 'releases.date', 'media.path', 'media.index', 'sites.name as site_name', 'networks.name as network_name')
|
||||
.whereIn('actors.name', (argv.actors || []).concat(argv._))
|
||||
.join('releases_actors', 'releases_actors.actor_id', 'actors.id')
|
||||
.join('releases', 'releases_actors.release_id', 'releases.id')
|
||||
.join('sites', 'sites.id', 'releases.site_id')
|
||||
.join('networks', 'networks.id', 'sites.network_id')
|
||||
.join('releases_posters', 'releases_posters.release_id', 'releases.id')
|
||||
.join('media', 'releases_posters.media_id', 'media.id');
|
||||
// .join('releases_photos', 'releases_photos.release_id', 'releases.id')
|
||||
// .join('media', 'releases_photos.media_id', 'media.id');
|
||||
|
||||
await Promise.all(posters.map(async (poster) => {
|
||||
const source = path.join(config.media.path, poster.path);
|
||||
await Promise.all(posters.map(async (poster) => {
|
||||
const source = path.join(config.media.path, poster.path);
|
||||
|
||||
const directory = path.join(config.media.path, 'extracted', poster.actor_name);
|
||||
const target = path.join(directory, `${poster.actor_name} - ${poster.network_name}: ${poster.site_name} - ${poster.title.replace(/[/.]/g, '_')} (${moment.utc(poster.date).format('YYYY-MM-DD')})-${poster.index}.jpeg`);
|
||||
await fs.mkdir(path.join(directory), { recursive: true });
|
||||
const directory = path.join(config.media.path, 'extracted', poster.actor_name);
|
||||
const target = path.join(directory, `${poster.actor_name} - ${poster.network_name}: ${poster.site_name} - ${poster.title.replace(/[/.]/g, '_')} (${moment.utc(poster.date).format('YYYY-MM-DD')})-${poster.index}.jpeg`);
|
||||
await fs.mkdir(path.join(directory), { recursive: true });
|
||||
|
||||
const file = await fs.readFile(source);
|
||||
await fs.writeFile(target, file);
|
||||
const file = await fs.readFile(source);
|
||||
await fs.writeFile(target, file);
|
||||
|
||||
return file;
|
||||
}));
|
||||
return file;
|
||||
}));
|
||||
|
||||
knex.destroy();
|
||||
knex.destroy();
|
||||
}
|
||||
|
||||
init();
|
||||
|
||||
424
src/utils/qu.js
424
src/utils/qu.js
@@ -5,341 +5,341 @@ const moment = require('moment');
|
||||
const http = require('./http');
|
||||
|
||||
function trim(str) {
|
||||
if (!str) return null;
|
||||
return str.trim().replace(/\s+/g, ' ');
|
||||
if (!str) return null;
|
||||
return str.trim().replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
function extractDate(dateString, format, match) {
|
||||
if (match) {
|
||||
const dateStamp = trim(dateString).match(match);
|
||||
if (match) {
|
||||
const dateStamp = trim(dateString).match(match);
|
||||
|
||||
if (dateStamp) {
|
||||
const dateValue = moment.utc(dateStamp[0], format);
|
||||
if (dateStamp) {
|
||||
const dateValue = moment.utc(dateStamp[0], format);
|
||||
|
||||
return dateValue.isValid() ? dateValue.toDate() : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
return dateValue.isValid() ? dateValue.toDate() : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
const dateValue = moment.utc(trim(dateString), format);
|
||||
const dateValue = moment.utc(trim(dateString), format);
|
||||
|
||||
return dateValue.isValid() ? dateValue.toDate() : null;
|
||||
return dateValue.isValid() ? dateValue.toDate() : null;
|
||||
}
|
||||
|
||||
function formatDate(dateValue, format, inputFormat) {
|
||||
if (inputFormat) {
|
||||
return moment(dateValue, inputFormat).format(format);
|
||||
}
|
||||
if (inputFormat) {
|
||||
return moment(dateValue, inputFormat).format(format);
|
||||
}
|
||||
|
||||
return moment(dateValue).format(format);
|
||||
return moment(dateValue).format(format);
|
||||
}
|
||||
|
||||
function prefixUrl(urlValue, origin, protocol = 'https') {
|
||||
if (protocol && /^\/\//.test(urlValue)) {
|
||||
return `${protocol}:${urlValue}`;
|
||||
}
|
||||
if (protocol && /^\/\//.test(urlValue)) {
|
||||
return `${protocol}:${urlValue}`;
|
||||
}
|
||||
|
||||
if (origin && /^\//.test(urlValue)) {
|
||||
return `${origin}${urlValue}`;
|
||||
}
|
||||
if (origin && /^\//.test(urlValue)) {
|
||||
return `${origin}${urlValue}`;
|
||||
}
|
||||
|
||||
return urlValue;
|
||||
return urlValue;
|
||||
}
|
||||
|
||||
function q(context, selector, attrArg, applyTrim = true) {
|
||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||
|
||||
if (attr) {
|
||||
const value = selector
|
||||
? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value
|
||||
: context[attr] || context.attributes[attr]?.value;
|
||||
if (attr) {
|
||||
const value = selector
|
||||
? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value
|
||||
: context[attr] || context.attributes[attr]?.value;
|
||||
|
||||
return applyTrim && value ? trim(value) : value;
|
||||
}
|
||||
return applyTrim && value ? trim(value) : value;
|
||||
}
|
||||
|
||||
return selector ? context.querySelector(selector) : context;
|
||||
return selector ? context.querySelector(selector) : context;
|
||||
}
|
||||
|
||||
function all(context, selector, attrArg, applyTrim = true) {
|
||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||
|
||||
if (attr) {
|
||||
return Array.from(context.querySelectorAll(selector), el => q(el, null, attr, applyTrim));
|
||||
}
|
||||
if (attr) {
|
||||
return Array.from(context.querySelectorAll(selector), el => q(el, null, attr, applyTrim));
|
||||
}
|
||||
|
||||
return Array.from(context.querySelectorAll(selector));
|
||||
return Array.from(context.querySelectorAll(selector));
|
||||
}
|
||||
|
||||
function exists(context, selector) {
|
||||
return !!q(context, selector);
|
||||
return !!q(context, selector);
|
||||
}
|
||||
|
||||
function html(context, selector) {
|
||||
const el = q(context, selector, null, true);
|
||||
const el = q(context, selector, null, true);
|
||||
|
||||
return el && el.innerHTML;
|
||||
return el && el.innerHTML;
|
||||
}
|
||||
|
||||
function texts(context, selector, applyTrim = true, filter = true) {
|
||||
const el = q(context, selector, null, applyTrim);
|
||||
if (!el) return null;
|
||||
const el = q(context, selector, null, applyTrim);
|
||||
if (!el) return null;
|
||||
|
||||
const nodes = Array.from(el.childNodes)
|
||||
.filter(node => node.nodeName === '#text')
|
||||
.map(node => (applyTrim ? trim(node.textContent) : node.textContent));
|
||||
const nodes = Array.from(el.childNodes)
|
||||
.filter(node => node.nodeName === '#text')
|
||||
.map(node => (applyTrim ? trim(node.textContent) : node.textContent));
|
||||
|
||||
return filter ? nodes.filter(Boolean) : nodes;
|
||||
return filter ? nodes.filter(Boolean) : nodes;
|
||||
}
|
||||
|
||||
function text(context, selector, applyTrim = true) {
|
||||
const nodes = texts(context, selector, applyTrim, true);
|
||||
if (!nodes) return null;
|
||||
const nodes = texts(context, selector, applyTrim, true);
|
||||
if (!nodes) return null;
|
||||
|
||||
const textValue = nodes.join(' ');
|
||||
const textValue = nodes.join(' ');
|
||||
|
||||
return applyTrim ? trim(textValue) : textValue;
|
||||
return applyTrim ? trim(textValue) : textValue;
|
||||
}
|
||||
|
||||
function meta(context, selector, attrArg = 'content', applyTrim = true) {
|
||||
if (/meta\[.*\]/.test(selector)) {
|
||||
return q(context, selector, attrArg, applyTrim);
|
||||
}
|
||||
if (/meta\[.*\]/.test(selector)) {
|
||||
return q(context, selector, attrArg, applyTrim);
|
||||
}
|
||||
|
||||
return q(context, `meta[${selector}]`, attrArg, applyTrim);
|
||||
return q(context, `meta[${selector}]`, attrArg, applyTrim);
|
||||
}
|
||||
|
||||
function date(context, selector, format, match, attr = 'textContent') {
|
||||
const dateString = q(context, selector, attr, true);
|
||||
const dateString = q(context, selector, attr, true);
|
||||
|
||||
if (!dateString) return null;
|
||||
if (!dateString) return null;
|
||||
|
||||
return extractDate(dateString, format, match);
|
||||
return extractDate(dateString, format, match);
|
||||
}
|
||||
|
||||
function image(context, selector = 'img', attr = 'src', origin, protocol = 'https') {
|
||||
const imageEl = q(context, selector, attr);
|
||||
const imageEl = q(context, selector, attr);
|
||||
|
||||
// no attribute means q output will be HTML element
|
||||
return attr ? prefixUrl(imageEl, origin, protocol) : imageEl;
|
||||
// no attribute means q output will be HTML element
|
||||
return attr ? prefixUrl(imageEl, origin, protocol) : imageEl;
|
||||
}
|
||||
|
||||
function images(context, selector = 'img', attr = 'src', origin, protocol = 'https') {
|
||||
const imageEls = all(context, selector, attr);
|
||||
const imageEls = all(context, selector, attr);
|
||||
|
||||
return attr ? imageEls.map(imageEl => prefixUrl(imageEl, origin, protocol)) : imageEls;
|
||||
return attr ? imageEls.map(imageEl => prefixUrl(imageEl, origin, protocol)) : imageEls;
|
||||
}
|
||||
|
||||
function url(context, selector = 'a', attr = 'href', origin, protocol = 'https') {
|
||||
const urlEl = q(context, selector, attr);
|
||||
const urlEl = q(context, selector, attr);
|
||||
|
||||
return attr ? prefixUrl(urlEl, origin, protocol) : urlEl;
|
||||
return attr ? prefixUrl(urlEl, origin, protocol) : urlEl;
|
||||
}
|
||||
|
||||
function urls(context, selector = 'a', attr = 'href', origin, protocol = 'https') {
|
||||
const urlEls = all(context, selector, attr);
|
||||
const urlEls = all(context, selector, attr);
|
||||
|
||||
return attr ? urlEls.map(urlEl => prefixUrl(urlEl, origin, protocol)) : urlEls;
|
||||
return attr ? urlEls.map(urlEl => prefixUrl(urlEl, origin, protocol)) : urlEls;
|
||||
}
|
||||
|
||||
function poster(context, selector = 'video', attr = 'poster', origin, protocol = 'https') {
|
||||
const posterEl = q(context, selector, attr);
|
||||
const posterEl = q(context, selector, attr);
|
||||
|
||||
return attr ? prefixUrl(posterEl, origin, protocol) : posterEl;
|
||||
return attr ? prefixUrl(posterEl, origin, protocol) : posterEl;
|
||||
}
|
||||
|
||||
function video(context, selector = 'source', attr = 'src', origin, protocol = 'https') {
|
||||
const trailerEl = q(context, selector, attr);
|
||||
const trailerEl = q(context, selector, attr);
|
||||
|
||||
return attr ? prefixUrl(trailerEl, origin, protocol) : trailerEl;
|
||||
return attr ? prefixUrl(trailerEl, origin, protocol) : trailerEl;
|
||||
}
|
||||
|
||||
function videos(context, selector = 'source', attr = 'src', origin, protocol = 'https') {
|
||||
const trailerEls = all(context, selector, attr);
|
||||
const trailerEls = all(context, selector, attr);
|
||||
|
||||
return attr ? trailerEls.map(trailerEl => prefixUrl(trailerEl, origin, protocol)) : trailerEls;
|
||||
return attr ? trailerEls.map(trailerEl => prefixUrl(trailerEl, origin, protocol)) : trailerEls;
|
||||
}
|
||||
|
||||
function duration(context, selector, match, attr = 'textContent') {
|
||||
const durationString = q(context, selector, attr);
|
||||
const durationString = q(context, selector, attr);
|
||||
|
||||
if (!durationString) return null;
|
||||
const durationMatch = durationString.match(match || /(\d+:)?\d+:\d+/);
|
||||
if (!durationString) return null;
|
||||
const durationMatch = durationString.match(match || /(\d+:)?\d+:\d+/);
|
||||
|
||||
if (durationMatch) {
|
||||
const segments = ['00'].concat(durationMatch[0].split(':')).slice(-3);
|
||||
if (durationMatch) {
|
||||
const segments = ['00'].concat(durationMatch[0].split(':')).slice(-3);
|
||||
|
||||
return moment.duration(segments.join(':')).asSeconds();
|
||||
}
|
||||
return moment.duration(segments.join(':')).asSeconds();
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
const legacyFuncs = {
|
||||
q,
|
||||
qa: all,
|
||||
qall: all,
|
||||
qd: date,
|
||||
qdate: date,
|
||||
qh: html,
|
||||
qhtml: html,
|
||||
qi: image,
|
||||
qimage: image,
|
||||
qimages: images,
|
||||
qis: images,
|
||||
ql: duration,
|
||||
qlength: duration,
|
||||
qm: meta,
|
||||
qmeta: meta,
|
||||
qp: poster,
|
||||
qposter: poster,
|
||||
qs: all,
|
||||
qt: video,
|
||||
qtext: text,
|
||||
qtexts: texts,
|
||||
qtrailer: video,
|
||||
qtrailers: videos,
|
||||
qts: videos,
|
||||
qtx: text,
|
||||
qtxs: texts,
|
||||
qtxt: text,
|
||||
qtxts: texts,
|
||||
// qu: url,
|
||||
qurl: url,
|
||||
qurls: urls,
|
||||
qus: urls,
|
||||
q,
|
||||
qa: all,
|
||||
qall: all,
|
||||
qd: date,
|
||||
qdate: date,
|
||||
qh: html,
|
||||
qhtml: html,
|
||||
qi: image,
|
||||
qimage: image,
|
||||
qimages: images,
|
||||
qis: images,
|
||||
ql: duration,
|
||||
qlength: duration,
|
||||
qm: meta,
|
||||
qmeta: meta,
|
||||
qp: poster,
|
||||
qposter: poster,
|
||||
qs: all,
|
||||
qt: video,
|
||||
qtext: text,
|
||||
qtexts: texts,
|
||||
qtrailer: video,
|
||||
qtrailers: videos,
|
||||
qts: videos,
|
||||
qtx: text,
|
||||
qtxs: texts,
|
||||
qtxt: text,
|
||||
qtxts: texts,
|
||||
// qu: url,
|
||||
qurl: url,
|
||||
qurls: urls,
|
||||
qus: urls,
|
||||
};
|
||||
|
||||
const quFuncs = {
|
||||
all,
|
||||
html,
|
||||
date,
|
||||
dur: duration,
|
||||
duration,
|
||||
exists,
|
||||
image,
|
||||
images,
|
||||
img: image,
|
||||
imgs: images,
|
||||
length: duration,
|
||||
meta,
|
||||
poster,
|
||||
q,
|
||||
text,
|
||||
texts,
|
||||
trailer: video,
|
||||
url,
|
||||
urls,
|
||||
video,
|
||||
videos,
|
||||
all,
|
||||
html,
|
||||
date,
|
||||
dur: duration,
|
||||
duration,
|
||||
exists,
|
||||
image,
|
||||
images,
|
||||
img: image,
|
||||
imgs: images,
|
||||
length: duration,
|
||||
meta,
|
||||
poster,
|
||||
q,
|
||||
text,
|
||||
texts,
|
||||
trailer: video,
|
||||
url,
|
||||
urls,
|
||||
video,
|
||||
videos,
|
||||
};
|
||||
|
||||
function init(element, window) {
|
||||
if (!element) return null;
|
||||
if (!element) return null;
|
||||
|
||||
const legacyContextFuncs = Object.entries(legacyFuncs) // dynamically attach methods with context
|
||||
.reduce((acc, [key, func]) => ({
|
||||
...acc,
|
||||
[key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context
|
||||
? func(...args)
|
||||
: func(element, ...args)),
|
||||
}), {});
|
||||
const legacyContextFuncs = Object.entries(legacyFuncs) // dynamically attach methods with context
|
||||
.reduce((acc, [key, func]) => ({
|
||||
...acc,
|
||||
[key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context
|
||||
? func(...args)
|
||||
: func(element, ...args)),
|
||||
}), {});
|
||||
|
||||
const quContextFuncs = Object.entries(quFuncs) // dynamically attach methods with context
|
||||
.reduce((acc, [key, func]) => ({
|
||||
...acc,
|
||||
[key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context
|
||||
? func(...args)
|
||||
: func(element, ...args)),
|
||||
}), {});
|
||||
const quContextFuncs = Object.entries(quFuncs) // dynamically attach methods with context
|
||||
.reduce((acc, [key, func]) => ({
|
||||
...acc,
|
||||
[key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context
|
||||
? func(...args)
|
||||
: func(element, ...args)),
|
||||
}), {});
|
||||
|
||||
return {
|
||||
element,
|
||||
el: element,
|
||||
html: element.outerHTML || element.body.outerHTML,
|
||||
text: trim(element.textContent),
|
||||
...(window && {
|
||||
window,
|
||||
document: window.document,
|
||||
}),
|
||||
...legacyContextFuncs,
|
||||
qu: quContextFuncs,
|
||||
};
|
||||
return {
|
||||
element,
|
||||
el: element,
|
||||
html: element.outerHTML || element.body.outerHTML,
|
||||
text: trim(element.textContent),
|
||||
...(window && {
|
||||
window,
|
||||
document: window.document,
|
||||
}),
|
||||
...legacyContextFuncs,
|
||||
qu: quContextFuncs,
|
||||
};
|
||||
}
|
||||
|
||||
function initAll(context, selector, window) {
|
||||
if (Array.isArray(context)) {
|
||||
return context.map(element => init(element, window));
|
||||
}
|
||||
if (Array.isArray(context)) {
|
||||
return context.map(element => init(element, window));
|
||||
}
|
||||
|
||||
return Array.from(context.querySelectorAll(selector))
|
||||
.map(element => init(element, window));
|
||||
return Array.from(context.querySelectorAll(selector))
|
||||
.map(element => init(element, window));
|
||||
}
|
||||
|
||||
function extract(htmlValue, selector) {
|
||||
const { window } = new JSDOM(htmlValue);
|
||||
const { window } = new JSDOM(htmlValue);
|
||||
|
||||
if (selector) {
|
||||
return init(window.document.querySelector(selector), window);
|
||||
}
|
||||
if (selector) {
|
||||
return init(window.document.querySelector(selector), window);
|
||||
}
|
||||
|
||||
return init(window.document, window);
|
||||
return init(window.document, window);
|
||||
}
|
||||
|
||||
function extractAll(htmlValue, selector) {
|
||||
const { window } = new JSDOM(htmlValue);
|
||||
const { window } = new JSDOM(htmlValue);
|
||||
|
||||
return initAll(window.document, selector, window);
|
||||
return initAll(window.document, selector, window);
|
||||
}
|
||||
|
||||
async function get(urlValue, selector, headers, options, queryAll = false) {
|
||||
const res = await http.get(urlValue, headers);
|
||||
const res = await http.get(urlValue, headers);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const item = queryAll
|
||||
? extractAll(res.body.toString(), selector)
|
||||
: extract(res.body.toString(), selector);
|
||||
if (res.statusCode === 200) {
|
||||
const item = queryAll
|
||||
? extractAll(res.body.toString(), selector)
|
||||
: extract(res.body.toString(), selector);
|
||||
|
||||
return {
|
||||
item,
|
||||
items: all ? item : [item],
|
||||
res,
|
||||
ok: true,
|
||||
status: res.statusCode,
|
||||
};
|
||||
}
|
||||
return {
|
||||
item,
|
||||
items: all ? item : [item],
|
||||
res,
|
||||
ok: true,
|
||||
status: res.statusCode,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
item: null,
|
||||
items: [],
|
||||
res,
|
||||
ok: false,
|
||||
status: res.statusCode,
|
||||
};
|
||||
return {
|
||||
item: null,
|
||||
items: [],
|
||||
res,
|
||||
ok: false,
|
||||
status: res.statusCode,
|
||||
};
|
||||
}
|
||||
|
||||
async function getAll(urlValue, selector, headers, options) {
|
||||
return get(urlValue, selector, headers, options, true);
|
||||
return get(urlValue, selector, headers, options, true);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
extractDate,
|
||||
extract,
|
||||
extractAll,
|
||||
init,
|
||||
initAll,
|
||||
formatDate,
|
||||
get,
|
||||
getAll,
|
||||
context: init,
|
||||
contextAll: initAll,
|
||||
ed: extractDate,
|
||||
ex: extract,
|
||||
exa: extractAll,
|
||||
fd: formatDate,
|
||||
parseDate: extractDate,
|
||||
ctx: init,
|
||||
ctxa: initAll,
|
||||
geta: getAll,
|
||||
qu: quFuncs,
|
||||
...legacyFuncs,
|
||||
extractDate,
|
||||
extract,
|
||||
extractAll,
|
||||
init,
|
||||
initAll,
|
||||
formatDate,
|
||||
get,
|
||||
getAll,
|
||||
context: init,
|
||||
contextAll: initAll,
|
||||
ed: extractDate,
|
||||
ex: extract,
|
||||
exa: extractAll,
|
||||
fd: formatDate,
|
||||
parseDate: extractDate,
|
||||
ctx: init,
|
||||
ctxa: initAll,
|
||||
geta: getAll,
|
||||
qu: quFuncs,
|
||||
...legacyFuncs,
|
||||
};
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
const path = require('path');
|
||||
const Promise = require('bluebird');
|
||||
const fs = require('fs-extra');
|
||||
const fetchScene = require('../scrape-releases');
|
||||
|
||||
const argv = require('../argv');
|
||||
|
||||
async function renameFiles() {
|
||||
const filenames = await fs.readdir(process.cwd());
|
||||
|
||||
const curated = await Promise.map(filenames, async (filename) => {
|
||||
const shootId = filename.split(' ')[1];
|
||||
const scene = await fetchScene(`https://kink.com/shoot/${shootId}`);
|
||||
|
||||
if (argv.confirm) {
|
||||
await fs.rename(path.join(process.cwd(), filename), path.join(process.cwd(), `${scene.filename}.mp4`));
|
||||
}
|
||||
|
||||
return scene.filename;
|
||||
}, {
|
||||
concurrency: 5,
|
||||
});
|
||||
|
||||
console.log(curated);
|
||||
}
|
||||
|
||||
renameFiles();
|
||||
@@ -3,26 +3,26 @@
|
||||
const bhttp = require('bhttp');
|
||||
|
||||
async function resolvePlace(query) {
|
||||
if (!query) {
|
||||
return null;
|
||||
}
|
||||
if (!query) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const res = await bhttp.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`);
|
||||
const [item] = res.body;
|
||||
const res = await bhttp.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`);
|
||||
const [item] = res.body;
|
||||
|
||||
if (item && item.address) {
|
||||
const rawPlace = item.address;
|
||||
const place = {};
|
||||
if (item && item.address) {
|
||||
const rawPlace = item.address;
|
||||
const place = {};
|
||||
|
||||
if (rawPlace.city) place.city = rawPlace.city;
|
||||
if (rawPlace.state) place.state = rawPlace.state;
|
||||
if (rawPlace.country_code) place.country = rawPlace.country_code.toUpperCase();
|
||||
if (rawPlace.continent) place.continent = rawPlace.continent;
|
||||
if (rawPlace.city) place.city = rawPlace.city;
|
||||
if (rawPlace.state) place.state = rawPlace.state;
|
||||
if (rawPlace.country_code) place.country = rawPlace.country_code.toUpperCase();
|
||||
if (rawPlace.continent) place.continent = rawPlace.continent;
|
||||
|
||||
return place;
|
||||
}
|
||||
return place;
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = resolvePlace;
|
||||
|
||||
@@ -6,32 +6,32 @@ const fs = require('fs-extra');
|
||||
const knex = require('../knex');
|
||||
|
||||
async function init() {
|
||||
const sites = await knex('sites')
|
||||
.select('networks.name', 'sites.slug')
|
||||
.join('networks', 'networks.id', 'sites.network_id')
|
||||
.where('networks.slug', 'score');
|
||||
const sites = await knex('sites')
|
||||
.select('networks.name', 'sites.slug')
|
||||
.join('networks', 'networks.id', 'sites.network_id')
|
||||
.where('networks.slug', 'score');
|
||||
|
||||
await Promise.map(sites, async (site) => {
|
||||
const url = `https://cdn77.scoreuniverse.com/${site.slug}/images/logo.png`;
|
||||
await Promise.map(sites, async (site) => {
|
||||
const url = `https://cdn77.scoreuniverse.com/${site.slug}/images/logo.png`;
|
||||
|
||||
console.log(url);
|
||||
console.log(url);
|
||||
|
||||
const res = await bhttp.get(url, {
|
||||
responseTimeout: 5000,
|
||||
});
|
||||
const res = await bhttp.get(url, {
|
||||
responseTimeout: 5000,
|
||||
});
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
console.log(`Saving logo for ${site.slug}`);
|
||||
if (res.statusCode === 200) {
|
||||
console.log(`Saving logo for ${site.slug}`);
|
||||
|
||||
await fs.writeFile(`./score/${site.slug}.png`, res.body);
|
||||
}
|
||||
await fs.writeFile(`./score/${site.slug}.png`, res.body);
|
||||
}
|
||||
|
||||
console.log(`No logo found for ${site.slug}`);
|
||||
}, {
|
||||
concurrency: 10,
|
||||
});
|
||||
console.log(`No logo found for ${site.slug}`);
|
||||
}, {
|
||||
concurrency: 10,
|
||||
});
|
||||
|
||||
knex.destroy();
|
||||
knex.destroy();
|
||||
}
|
||||
|
||||
init();
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user