Added Kink profile scraper. Fixed --force causing media collisions.
This commit is contained in:
parent
a7707b7b28
commit
6fd2bc2687
|
@ -123,6 +123,7 @@ module.exports = {
|
||||||
'julesjordan',
|
'julesjordan',
|
||||||
'bang',
|
'bang',
|
||||||
'pervcity',
|
'pervcity',
|
||||||
|
'kink',
|
||||||
'peternorth',
|
'peternorth',
|
||||||
'naughtyamerica',
|
'naughtyamerica',
|
||||||
'cherrypimps',
|
'cherrypimps',
|
||||||
|
|
|
@ -64,9 +64,10 @@ const ethnicities = {
|
||||||
black: 'black',
|
black: 'black',
|
||||||
caucasian: 'white',
|
caucasian: 'white',
|
||||||
european: 'white',
|
european: 'white',
|
||||||
hispanic: 'latina',
|
hispanic: 'latin',
|
||||||
indian: 'indian',
|
indian: 'indian',
|
||||||
japanese: 'japanese',
|
japanese: 'japanese',
|
||||||
|
latin: 'latin',
|
||||||
latina: 'latina',
|
latina: 'latina',
|
||||||
latino: 'latino',
|
latino: 'latino',
|
||||||
white: 'white',
|
white: 'white',
|
||||||
|
|
11
src/media.js
11
src/media.js
|
@ -591,13 +591,20 @@ async function storeMedias(baseMedias) {
|
||||||
);
|
);
|
||||||
|
|
||||||
const [uniqueHashMedias, existingHashMedias] = await findHashDuplicates(fetchedMedias);
|
const [uniqueHashMedias, existingHashMedias] = await findHashDuplicates(fetchedMedias);
|
||||||
const newMedias = argv.force ? uniqueHashMedias.concat(existingHashMedias) : uniqueHashMedias;
|
|
||||||
|
|
||||||
const savedMedias = await Promise.map(
|
const savedMedias = await Promise.map(
|
||||||
newMedias,
|
uniqueHashMedias,
|
||||||
async baseMedia => storeFile(baseMedia),
|
async baseMedia => storeFile(baseMedia),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if (argv.force) {
|
||||||
|
// overwrite files in case image processing was changed
|
||||||
|
await Promise.map(
|
||||||
|
existingHashMedias,
|
||||||
|
async baseMedia => storeFile(baseMedia),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
const newMediaWithEntries = savedMedias.map((media, index) => curateMediaEntry(media, index));
|
const newMediaWithEntries = savedMedias.map((media, index) => curateMediaEntry(media, index));
|
||||||
const newMediaEntries = newMediaWithEntries.filter(media => media.newEntry).map(media => media.entry);
|
const newMediaEntries = newMediaWithEntries.filter(media => media.newEntry).map(media => media.entry);
|
||||||
|
|
||||||
|
|
|
@ -1,54 +1,54 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const { get, getAll } = require('../utils/qu');
|
const qu = require('../utils/qu');
|
||||||
|
|
||||||
function scrapeLatest(scenes) {
|
function scrapeAll(scenes) {
|
||||||
return scenes.map(({ qu }) => {
|
return scenes.map(({ query }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
const href = qu.url('.shoot-thumb-title a');
|
const href = query.url('.shoot-thumb-title a');
|
||||||
release.url = `https://kink.com${href}`;
|
release.url = `https://kink.com${href}`;
|
||||||
|
|
||||||
release.shootId = href.split('/').slice(-1)[0];
|
release.shootId = href.split('/').slice(-1)[0];
|
||||||
release.entryId = release.shootId;
|
release.entryId = release.shootId;
|
||||||
|
|
||||||
release.title = qu.q('.shoot-thumb-title a', true);
|
release.title = query.q('.shoot-thumb-title a', true);
|
||||||
release.date = qu.date('.date', 'MMM DD, YYYY');
|
release.date = query.date('.date', 'MMM DD, YYYY');
|
||||||
|
|
||||||
release.actors = qu.all('.shoot-thumb-models a', true);
|
release.actors = query.all('.shoot-thumb-models a', true);
|
||||||
release.stars = qu.q('.average-rating', 'data-rating') / 10;
|
release.stars = query.q('.average-rating', 'data-rating') / 10;
|
||||||
|
|
||||||
release.poster = qu.img('.adimage');
|
release.poster = query.img('.adimage');
|
||||||
release.photos = qu.imgs('.rollover .roll-image', 'data-imagesrc').map(photo => [
|
release.photos = query.imgs('.rollover .roll-image', 'data-imagesrc').map(photo => [
|
||||||
photo.replace('410/', '830/'),
|
photo.replace('410/', '830/'),
|
||||||
photo,
|
photo,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
release.duration = qu.dur('.video span');
|
release.duration = query.dur('.video span');
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeScene({ qu }, url) {
|
async function scrapeScene({ query }, url) {
|
||||||
const release = { url };
|
const release = { url };
|
||||||
|
|
||||||
release.shootId = new URL(url).pathname.split('/')[2];
|
release.shootId = new URL(url).pathname.split('/')[2];
|
||||||
release.entryId = release.shootId;
|
release.entryId = release.shootId;
|
||||||
|
|
||||||
release.title = qu.q('.shoot-title span.favorite-button', 'data-title');
|
release.title = query.q('.shoot-title span.favorite-button', 'data-title');
|
||||||
release.description = qu.q('.description-text', true);
|
release.description = query.q('.description-text', true);
|
||||||
|
|
||||||
release.date = qu.date('.shoot-date', 'MMMM DD, YYYY');
|
release.date = query.date('.shoot-date', 'MMMM DD, YYYY');
|
||||||
release.actors = qu.all('.names a', true).map(actor => actor.replace(/,\s*/, ''));
|
release.actors = query.all('.names a', true).map(actor => actor.replace(/,\s*/, ''));
|
||||||
release.director = qu.q('.director-name', true);
|
release.director = query.q('.director-name', true);
|
||||||
|
|
||||||
release.photos = qu.imgs('.gallery .thumb img', 'data-image-file');
|
release.photos = query.imgs('.gallery .thumb img', 'data-image-file');
|
||||||
release.poster = qu.poster();
|
release.poster = query.poster();
|
||||||
|
|
||||||
release.tags = qu.all('.tag-list a[href*="/tag"]', true).map(tag => tag.replace(/,\s*/, ''));
|
release.tags = query.all('.tag-list a[href*="/tag"]', true).map(tag => tag.replace(/,\s*/, ''));
|
||||||
|
|
||||||
const trailer = qu.q('.player span[data-type="trailer-src"]', 'data-url');
|
const trailer = query.q('.player span[data-type="trailer-src"]', 'data-url');
|
||||||
|
|
||||||
release.trailer = [
|
release.trailer = [
|
||||||
{
|
{
|
||||||
|
@ -69,23 +69,77 @@ async function scrapeScene({ qu }, url) {
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
release.channel = qu.url('.shoot-logo a').split('/').slice(-1)[0];
|
release.channel = query.url('.shoot-logo a').split('/').slice(-1)[0];
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1) {
|
async function fetchActorReleases(actorUrl, page = 1, accReleases = []) {
|
||||||
const res = await getAll(`${site.url}/latest/page/${page}`, '.shoot-list .shoot');
|
const res = await qu.get(`${actorUrl}?page=${page}`);
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
return scrapeLatest(res.items, site);
|
const releases = scrapeAll(qu.initAll(res.item.el, '.shoot-list .shoot'));
|
||||||
|
const hasNextPage = res.item.query.exists('.paginated-nav li:last-child:not(.disabled)');
|
||||||
|
|
||||||
|
if (hasNextPage) {
|
||||||
|
return fetchActorReleases(actorUrl, page + 1, accReleases.concat(releases));
|
||||||
|
}
|
||||||
|
|
||||||
|
return accReleases.concat(releases);
|
||||||
|
}
|
||||||
|
|
||||||
|
return accReleases;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrapeProfile({ query }, actorUrl, include) {
|
||||||
|
const profile = {};
|
||||||
|
|
||||||
|
profile.description = query.q('.bio #expand-text', true);
|
||||||
|
|
||||||
|
const tags = query.all('.bio-tags a', true);
|
||||||
|
|
||||||
|
if (tags.includes('brunette') || tags.includes('brunet')) profile.hairColor = 'brown';
|
||||||
|
if (tags.includes('blonde') || tags.includes('blond')) profile.hairColor = 'blonde';
|
||||||
|
if (tags.includes('black hair')) profile.hairColor = 'black';
|
||||||
|
if (tags.includes('redhead')) profile.hairColor = 'red';
|
||||||
|
|
||||||
|
if (tags.includes('natural boobs')) profile.naturalBoobs = true;
|
||||||
|
if (tags.includes('fake boobs')) profile.naturalBoobs = false;
|
||||||
|
|
||||||
|
if (tags.includes('white')) profile.ethnicity = 'white';
|
||||||
|
if (tags.includes('latin')) profile.ethnicity = 'latin';
|
||||||
|
if (tags.includes('Black')) profile.ethnicity = 'black';
|
||||||
|
|
||||||
|
if (tags.includes('pierced nipples')) profile.hasPiercings = true;
|
||||||
|
if (tags.includes('tattoo')) profile.hasTattoos = true;
|
||||||
|
|
||||||
|
if (tags.includes('foreskin')) profile.hasForeskin = true;
|
||||||
|
|
||||||
|
if ((tags.includes('big dick') || tags.includes('foreskin'))
|
||||||
|
&& (tags.includes('fake boobs') || tags.includes('big tits'))) profile.gender = 'transsexual';
|
||||||
|
|
||||||
|
profile.avatar = query.img('.bio-slider-img, .bio-img:not([src*="Missing"])');
|
||||||
|
profile.social = query.urls('a.social-link');
|
||||||
|
|
||||||
|
if (include.releases) {
|
||||||
|
profile.releases = await fetchActorReleases(actorUrl);
|
||||||
|
}
|
||||||
|
|
||||||
|
return profile;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchLatest(site, page = 1) {
|
||||||
|
const res = await qu.getAll(`${site.url}/latest/page/${page}`, '.shoot-list .shoot');
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return scrapeAll(res.items, site);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site) {
|
||||||
const res = await get(url);
|
const res = await qu.get(url);
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
return scrapeScene(res.item, url, site);
|
return scrapeScene(res.item, url, site);
|
||||||
|
@ -94,7 +148,32 @@ async function fetchScene(url, site) {
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function fetchProfile(actorName, entity, include) {
|
||||||
|
const searchRes = await qu.getAll(`https://kink.com/search?type=performers&q=${actorName}`, '.model');
|
||||||
|
|
||||||
|
if (searchRes.ok) {
|
||||||
|
const actorItem = searchRes.items.find(() => qu.query.exists(`.model-link img[alt="${actorName}"]`));
|
||||||
|
|
||||||
|
if (actorItem) {
|
||||||
|
const actorPath = actorItem.query.url('.model-link');
|
||||||
|
const actorUrl = `https://kink.com${actorPath}`;
|
||||||
|
const actorRes = await qu.get(actorUrl);
|
||||||
|
|
||||||
|
if (actorRes.ok) {
|
||||||
|
return scrapeProfile(actorRes.item, actorUrl, include);
|
||||||
|
}
|
||||||
|
|
||||||
|
return actorRes.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return searchRes.status;
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
fetchScene,
|
fetchScene,
|
||||||
|
fetchProfile,
|
||||||
};
|
};
|
||||||
|
|
|
@ -180,6 +180,7 @@ module.exports = {
|
||||||
julesjordan,
|
julesjordan,
|
||||||
kellymadison,
|
kellymadison,
|
||||||
killergram,
|
killergram,
|
||||||
|
kink,
|
||||||
legalporno,
|
legalporno,
|
||||||
men,
|
men,
|
||||||
metrohd,
|
metrohd,
|
||||||
|
|
|
@ -358,6 +358,7 @@ module.exports = {
|
||||||
ctxa: initAll,
|
ctxa: initAll,
|
||||||
geta: getAll,
|
geta: getAll,
|
||||||
qu: quFuncs,
|
qu: quFuncs,
|
||||||
|
query: quFuncs,
|
||||||
prefixUrl,
|
prefixUrl,
|
||||||
...legacyFuncs,
|
...legacyFuncs,
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue