Added upcoming, profile and detailed scene actor scraping to InTheCrack. Fixed clip upsert.
|
@ -86,12 +86,14 @@ export default {
|
||||||
}
|
}
|
||||||
|
|
||||||
.clip-duration {
|
.clip-duration {
|
||||||
|
background: var(--darken);
|
||||||
color: var(--text-light);
|
color: var(--text-light);
|
||||||
display: block;
|
display: block;
|
||||||
position: absolute;
|
position: absolute;
|
||||||
bottom: 0;
|
top: 0;
|
||||||
left: 0;
|
right: 0;
|
||||||
padding: .5rem .5rem .75rem 1rem;
|
padding: .25rem .5rem;
|
||||||
|
font-size: .9rem;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
text-shadow: 0 0 2px var(--darken-strong);
|
text-shadow: 0 0 2px var(--darken-strong);
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
<div class="column">
|
<div class="column">
|
||||||
<div class="tidbits">
|
<div class="tidbits">
|
||||||
<a
|
<a
|
||||||
v-if="release.date"
|
|
||||||
:title="release.url && `View scene on ${release.entity.name}`"
|
:title="release.url && `View scene on ${release.entity.name}`"
|
||||||
:href="release.url"
|
:href="release.url"
|
||||||
:class="{ link: release.url }"
|
:class="{ link: release.url }"
|
||||||
|
@ -11,8 +10,8 @@
|
||||||
rel="noopener noreferrer"
|
rel="noopener noreferrer"
|
||||||
class="tidbit date nolink"
|
class="tidbit date nolink"
|
||||||
>
|
>
|
||||||
<span class="date-compact">{{ formatDate(release.date, 'MMM D, YYYY', release.datePrecision) }}</span>
|
<span class="date-compact">{{ release.date ? formatDate(release.date, 'MMM D, YYYY', release.datePrecision) : 'Date N/A' }}</span>
|
||||||
<span class="date-full">{{ formatDate(release.date, 'MMMM D, YYYY', release.datePrecision) }}</span>
|
<span class="date-full">{{ release.date ? formatDate(release.date, 'MMMM D, YYYY', release.datePrecision) : 'Date unknown' }}</span>
|
||||||
|
|
||||||
<Icon
|
<Icon
|
||||||
v-if="release.url"
|
v-if="release.url"
|
||||||
|
|
|
@ -103,6 +103,7 @@ async function mounted() {
|
||||||
'femdom',
|
'femdom',
|
||||||
],
|
],
|
||||||
toys: [
|
toys: [
|
||||||
|
'anal-toys',
|
||||||
'double-dildo',
|
'double-dildo',
|
||||||
'double-dildo-blowjob',
|
'double-dildo-blowjob',
|
||||||
],
|
],
|
||||||
|
|
After Width: | Height: | Size: 372 KiB |
After Width: | Height: | Size: 6.6 KiB |
After Width: | Height: | Size: 32 KiB |
Before Width: | Height: | Size: 932 KiB After Width: | Height: | Size: 1019 KiB |
Before Width: | Height: | Size: 6.1 KiB After Width: | Height: | Size: 6.0 KiB |
Before Width: | Height: | Size: 34 KiB After Width: | Height: | Size: 26 KiB |
|
@ -2649,7 +2649,7 @@ const sites = [
|
||||||
{
|
{
|
||||||
slug: 'inthecrack',
|
slug: 'inthecrack',
|
||||||
name: 'InTheCrack',
|
name: 'InTheCrack',
|
||||||
url: 'https://inthecrack.com/',
|
url: 'https://inthecrack.com',
|
||||||
},
|
},
|
||||||
// INTERRACIAL PASS
|
// INTERRACIAL PASS
|
||||||
{
|
{
|
||||||
|
|
|
@ -589,6 +589,7 @@ const tagPosters = [
|
||||||
['airtight', 6, 'Remy Lacroix in "Ass Worship 14" for Jules Jordan'],
|
['airtight', 6, 'Remy Lacroix in "Ass Worship 14" for Jules Jordan'],
|
||||||
['anal', 0, 'Adriana Chechik in "Manuel Creampies Their Asses 3" for Jules Jordan'],
|
['anal', 0, 'Adriana Chechik in "Manuel Creampies Their Asses 3" for Jules Jordan'],
|
||||||
['anal-creampie', 1, 'Aleska Diamond in "Aleska Wants More" for Asshole Fever'],
|
['anal-creampie', 1, 'Aleska Diamond in "Aleska Wants More" for Asshole Fever'],
|
||||||
|
['anal-toys', 0, 'Kira Noir in 1225 for InTheCrack'],
|
||||||
['ass-eating', 0, 'Angelica Heart and Leanna Sweet in "ATM Bitches" for Asshole Fever'],
|
['ass-eating', 0, 'Angelica Heart and Leanna Sweet in "ATM Bitches" for Asshole Fever'],
|
||||||
['asian', 0, 'Jade Kush for Erotica X'],
|
['asian', 0, 'Jade Kush for Erotica X'],
|
||||||
['atm', 2, 'Jureka Del Mar in "Stretched Out" for Her Limit'],
|
['atm', 2, 'Jureka Del Mar in "Stretched Out" for Her Limit'],
|
||||||
|
|
|
@ -67,6 +67,7 @@ const { argv } = yargs
|
||||||
describe: 'Fetch all scenes for an actor',
|
describe: 'Fetch all scenes for an actor',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
default: false,
|
default: false,
|
||||||
|
alias: 'actor-scenes',
|
||||||
})
|
})
|
||||||
.option('actors-sources', {
|
.option('actors-sources', {
|
||||||
describe: 'Use these scrapers for actor data',
|
describe: 'Use these scrapers for actor data',
|
||||||
|
|
|
@ -135,6 +135,10 @@ async function scrapeRelease(baseRelease, entities, type = 'scene') {
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Deep scrape failed for ${baseRelease.url}: ${error.message}`);
|
logger.error(`Deep scrape failed for ${baseRelease.url}: ${error.message}`);
|
||||||
|
|
||||||
|
if (argv.debug) {
|
||||||
|
console.error(error);
|
||||||
|
}
|
||||||
|
|
||||||
if (error.code === 'NO_ENTRY_ID') {
|
if (error.code === 'NO_ENTRY_ID') {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,17 +4,19 @@ const moment = require('moment');
|
||||||
|
|
||||||
const qu = require('../utils/q');
|
const qu = require('../utils/q');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
const { feetInchesToCm, lbsToKg } = require('../utils/convert');
|
||||||
|
|
||||||
function scrapeAll(scenes, channel) {
|
function scrapeAll(scenes, channel) {
|
||||||
return scenes.map(({ query }) => {
|
return scenes.map(({ query }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.url = query.url('a', 'href', { origin: channel.url });
|
release.url = query.url('a', 'href', { origin: channel.url });
|
||||||
release.entryId = new URL(release.url).pathname.match(/\/Collection\/(\d+)/)[1];
|
// release.entryId = new URL(release.url).pathname.match(/\/Collection\/(\d+)/)[1]; can't be matched with upcoming scenes
|
||||||
|
|
||||||
release.shootId = query.cnt('a span:nth-of-type(1)').match(/^\d+/)?.[0];
|
release.shootId = query.cnt('a span:nth-of-type(1)').match(/^\d+/)?.[0];
|
||||||
release.date = query.date('a span:nth-of-type(2)', 'YYYY-MM-DD');
|
release.entryId = release.shootId;
|
||||||
|
|
||||||
|
release.date = query.date('a span:nth-of-type(2)', 'YYYY-MM-DD');
|
||||||
release.actors = (query.q('a img', 'alt') || query.cnt('a span:nth-of-type(1)'))?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g);
|
release.actors = (query.q('a img', 'alt') || query.cnt('a span:nth-of-type(1)'))?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g);
|
||||||
|
|
||||||
release.poster = release.shootId
|
release.poster = release.shootId
|
||||||
|
@ -25,13 +27,145 @@ function scrapeAll(scenes, channel) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene({ query, html }, url, channel) {
|
function scrapeUpcoming(scenes, channel) {
|
||||||
|
return scenes.map(({ query }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.entryId = new URL(url).pathname.match(/\/Collection\/(\d+)/)[1];
|
const title = query.cnt('span');
|
||||||
release.shootId = query.cnt('h2 span').match(/^\d+/)?.[0];
|
|
||||||
|
|
||||||
release.actors = query.cnt('h2 span')?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g);
|
release.entryId = title.match(/^\d+/)[0];
|
||||||
|
release.actors = title.slice(0, title.indexOf('-')).match(/[a-zA-Z]+(\s[a-zA-Z]+)*/g);
|
||||||
|
|
||||||
|
const date = moment.utc(title.match(/\w+ \d+\w+$/)[0], 'MMM Do');
|
||||||
|
|
||||||
|
if (date.isBefore()) {
|
||||||
|
// date is next year
|
||||||
|
release.date = date.add(1, 'year').toDate();
|
||||||
|
} else {
|
||||||
|
release.date = date.toDate();
|
||||||
|
}
|
||||||
|
|
||||||
|
release.poster = [
|
||||||
|
`https://inthecrack.com/assets/images/posters/collections/${release.entryId}.jpg`,
|
||||||
|
query.img('img', 'src', { origin: channel.url }),
|
||||||
|
];
|
||||||
|
|
||||||
|
return release;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeProfileScenes(items, actorName, channel) {
|
||||||
|
return items.map(({ query }) => {
|
||||||
|
const release = {};
|
||||||
|
|
||||||
|
if (slugify(query.cnt()) === 'no-other-collections') {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const details = query.cnts('figure p').reduce((acc, info) => {
|
||||||
|
const [key, value] = info.split(':');
|
||||||
|
|
||||||
|
return {
|
||||||
|
...acc,
|
||||||
|
[slugify(key, '_')]: value?.trim(),
|
||||||
|
};
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
release.url = query.url('a', 'href', { origin: channel.url });
|
||||||
|
|
||||||
|
release.shootId = details.collection.match(/\d+/)[0];
|
||||||
|
release.entryId = release.shootId;
|
||||||
|
|
||||||
|
release.date = qu.parseDate(details.release_date, 'YYYY-MM-DD');
|
||||||
|
release.actors = [actorName];
|
||||||
|
|
||||||
|
/* rely on clip length
|
||||||
|
const durationString = Object.keys(details).find(info => /\d+_min_video/.test(info));
|
||||||
|
release.duration = durationString && Number(durationString.match(/^\d+/)?.[0]) * 60;
|
||||||
|
*/
|
||||||
|
|
||||||
|
release.productionLocation = details.shoot_location;
|
||||||
|
|
||||||
|
release.poster = [
|
||||||
|
`https://inthecrack.com/assets/images/posters/collections/${release.entryId}.jpg`,
|
||||||
|
query.img('img', 'src', { origin: channel.url }),
|
||||||
|
];
|
||||||
|
|
||||||
|
return release;
|
||||||
|
}).filter(Boolean);
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeProfile({ query }, actorName, actorAvatar, channel, releasesFromScene) {
|
||||||
|
const profile = {};
|
||||||
|
|
||||||
|
const bio = query.cnts(releasesFromScene ? 'ul li' : 'div.modelInfo li').reduce((acc, info) => {
|
||||||
|
const [key, value] = info.split(':');
|
||||||
|
|
||||||
|
return {
|
||||||
|
...acc,
|
||||||
|
[slugify(key, '_')]: value.trim(),
|
||||||
|
};
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
profile.name = actorName || bio.name;
|
||||||
|
profile.gender = 'female';
|
||||||
|
profile.birthPlace = bio.nationality;
|
||||||
|
|
||||||
|
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
||||||
|
if (bio.weight) profile.weight = lbsToKg(bio.weight);
|
||||||
|
|
||||||
|
profile.releases = releasesFromScene?.[profile.name] || scrapeProfileScenes(qu.initAll(query.all('.Models li')), actorName, channel);
|
||||||
|
|
||||||
|
// avatar is the poster of a scene, find scene and use its high quality poster instead
|
||||||
|
const avatarRelease = profile.releases.find(release => new URL(release.poster[1]).pathname === new URL(actorAvatar).pathname);
|
||||||
|
profile.avatar = avatarRelease?.poster[0];
|
||||||
|
|
||||||
|
return profile;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchSceneActors(entryId, _release, channel) {
|
||||||
|
const url = `https://inthecrack.com/Collection/Biography/${entryId}`;
|
||||||
|
const res = await qu.get(url);
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
const actorTabs = qu.initAll(res.item.query.all('#ModelTabs li')).map(({ query }) => ({
|
||||||
|
name: query.cnt('a'),
|
||||||
|
id: query.q('a', 'data-model'),
|
||||||
|
}));
|
||||||
|
|
||||||
|
const actorReleasesByActorName = actorTabs.reduce((acc, { name, id }) => {
|
||||||
|
const releaseEls = qu.initAll(res.item.query.all(`#Model-${id} li`));
|
||||||
|
const releases = scrapeProfileScenes(releaseEls, name, channel);
|
||||||
|
|
||||||
|
return {
|
||||||
|
...acc,
|
||||||
|
[name]: releases,
|
||||||
|
};
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
const actors = qu.initAll(res.item.query.all('.modelInfo > li')).map((item) => {
|
||||||
|
const avatar = item.query.img('img', 'src', { origin: channel.url });
|
||||||
|
const profile = scrapeProfile(item, null, avatar, channel, actorReleasesByActorName);
|
||||||
|
|
||||||
|
return profile;
|
||||||
|
});
|
||||||
|
|
||||||
|
return actors;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrapeScene({ query, html }, url, channel) {
|
||||||
|
const release = {};
|
||||||
|
|
||||||
|
const entryId = new URL(url).pathname.match(/\/Collection\/(\d+)/)[1];
|
||||||
|
|
||||||
|
release.shootId = query.cnt('h2 span').match(/^\d+/)?.[0];
|
||||||
|
release.entryId = release.shootId; // site entry ID can't be matched with upcoming scenes
|
||||||
|
|
||||||
|
const actors = await fetchSceneActors(entryId, release, channel);
|
||||||
|
release.actors = actors || query.cnt('h2 span')?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g);
|
||||||
|
|
||||||
release.description = query.cnt('p#CollectionDescription');
|
release.description = query.cnt('p#CollectionDescription');
|
||||||
release.productionLocation = query.cnt('.modelCollectionHeader p')?.match(/Shoot Location: (.*)/)?.[1];
|
release.productionLocation = query.cnt('.modelCollectionHeader p')?.match(/Shoot Location: (.*)/)?.[1];
|
||||||
|
@ -67,22 +201,6 @@ function scrapeScene({ query, html }, url, channel) {
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeProfile({ query, el }, actorName, entity, include) {
|
|
||||||
const profile = {};
|
|
||||||
|
|
||||||
profile.description = query.cnt('.bio-text');
|
|
||||||
profile.birthPlace = query.cnt('.birth-place span');
|
|
||||||
|
|
||||||
profile.avatar = query.img('.actor-photo img');
|
|
||||||
|
|
||||||
if (include.releases) {
|
|
||||||
return scrapeAll(qu.initAll(el, '.scene'));
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(profile);
|
|
||||||
return profile;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchLatest(channel, page = 1) {
|
async function fetchLatest(channel, page = 1) {
|
||||||
const year = moment().subtract(page - 1, ' year').year();
|
const year = moment().subtract(page - 1, ' year').year();
|
||||||
|
|
||||||
|
@ -96,6 +214,16 @@ async function fetchLatest(channel, page = 1) {
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function fetchUpcoming(channel) {
|
||||||
|
const res = await qu.getAll(channel.url, '#ComingSoon li');
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return scrapeUpcoming(res.items, channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
async function fetchScene(url, channel) {
|
async function fetchScene(url, channel) {
|
||||||
const res = await qu.get(url);
|
const res = await qu.get(url);
|
||||||
|
|
||||||
|
@ -106,12 +234,27 @@ async function fetchScene(url, channel) {
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchProfile({ name: actorName }, entity, include) {
|
async function fetchProfile({ name: actorName }, channel, _include) {
|
||||||
const url = `${entity.url}/actors/${slugify(actorName, '_')}`;
|
const firstLetter = actorName.charAt(0).toUpperCase();
|
||||||
const res = await qu.get(url);
|
const url = `${channel.url}/Collections/Name/${firstLetter}`;
|
||||||
|
const res = await qu.getAll(url, '.collectionGridLayout li');
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
return scrapeProfile(res.item, actorName, entity, include);
|
const actorItem = res.items.find(({ query }) => slugify(query.cnt('span')) === slugify(actorName));
|
||||||
|
|
||||||
|
if (actorItem) {
|
||||||
|
const actorUrl = actorItem.query.url('a', 'href', { origin: channel.url });
|
||||||
|
const actorAvatar = actorItem.query.img('img', 'src', { origin: channel.url });
|
||||||
|
const actorRes = await qu.get(actorUrl);
|
||||||
|
|
||||||
|
if (actorRes.ok) {
|
||||||
|
return scrapeProfile(actorRes.item, actorName, actorAvatar, channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return actorRes.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
return res.status;
|
||||||
|
@ -119,6 +262,7 @@ async function fetchProfile({ name: actorName }, entity, include) {
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
|
fetchUpcoming,
|
||||||
fetchScene,
|
fetchScene,
|
||||||
// fetchProfile,
|
fetchProfile,
|
||||||
};
|
};
|
||||||
|
|
|
@ -197,6 +197,7 @@ module.exports = {
|
||||||
iconmale,
|
iconmale,
|
||||||
interracialpass: hush,
|
interracialpass: hush,
|
||||||
interracialpovs: hush,
|
interracialpovs: hush,
|
||||||
|
inthecrack,
|
||||||
jamesdeen: fullpornnetwork,
|
jamesdeen: fullpornnetwork,
|
||||||
julesjordan,
|
julesjordan,
|
||||||
kellymadison,
|
kellymadison,
|
||||||
|
|
|
@ -263,7 +263,7 @@ async function storeClips(releases) {
|
||||||
clip: clip.clip,
|
clip: clip.clip,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
const storedClips = await bulkInsert('clips', curatedClipEntries);
|
const storedClips = await bulkInsert('clips', curatedClipEntries, ['release_id', 'clip']);
|
||||||
const clipIdsByReleaseIdAndClip = storedClips.reduce((acc, clip) => ({
|
const clipIdsByReleaseIdAndClip = storedClips.reduce((acc, clip) => ({
|
||||||
...acc,
|
...acc,
|
||||||
[clip.release_id]: {
|
[clip.release_id]: {
|
||||||
|
|