Compare commits

..

2 Commits

Author SHA1 Message Date
DebaucheryLibrarian 5b886b3917 Improved actor extraction for fcuk scraper. Changed 'copyright' to 'credit'. Redused entity page favicon size. 2020-07-15 05:12:29 +02:00
DebaucheryLibrarian c62df2228b Added scraper for FCUK's coed sites. 2020-07-15 04:51:39 +02:00
33 changed files with 113 additions and 19 deletions

View File

@ -43,7 +43,7 @@
>
<img
:src="sfw ? `/img/${actor.avatar.sfw.thumbnail}` : `/media/${actor.avatar.thumbnail}`"
:title="actor.avatar.copyright && `© ${actor.avatar.copyright}`"
:title="actor.avatar.credit && `© ${actor.avatar.credit}`"
class="avatar"
>
</a>

View File

@ -18,7 +18,7 @@
:src="sfw ? `/img/${actor.avatar.sfw.thumbnail}` : `/media/${actor.avatar.thumbnail}`"
:data-src="sfw ? `/img/${actor.avatar.sfw.thumbnail}` : `/media/${actor.avatar.thumbnail}`"
:data-loading="sfw ? `/img/${actor.avatar.sfw.lazy}` : `/media/${actor.avatar.lazy}`"
:title="actor.avatar.copyright && `© ${actor.avatar.copyright}`"
:title="actor.avatar.credit && `© ${actor.avatar.credit}`"
class="avatar photo"
@load="$parent.$emit('load')"
>
@ -36,7 +36,7 @@
:src="sfw ? `/img/${photo.sfw.thumbnail}` : `/media/${photo.thumbnail}`"
:data-src="sfw ? `/img/${photo.sfw.thumbnail}` : `/media/${photo.thumbnail}`"
:data-loading="sfw ? `/img/${photo.sfw.lazy}` : `/media/${photo.lazy}`"
:title="`© ${photo.copyright || photo.entity.name}`"
:title="`© ${photo.credit || photo.entity.name}`"
class="photo"
@load="$parent.$emit('load')"
>

View File

@ -215,11 +215,14 @@ export default {
height: 2.5rem;
}
.logo-parent,
.favicon {
.logo-parent {
height: 1.5rem;
}
.favicon {
height: 1rem;
}
.name {
color: var(--text-light);
display: flex;

View File

@ -116,7 +116,7 @@ function initUiActions(_store, _router) {
thumbnail
lazy
comment
copyright
credit
}
birthCountry: countryByBirthCountryAlpha2 {
alpha2
@ -135,7 +135,7 @@ function initUiActions(_store, _router) {
thumbnail
lazy
comment
copyright
credit
}
birthCountry: countryByBirthCountryAlpha2 {
alpha2

Binary file not shown.

Before

Width:  |  Height:  |  Size: 932 B

After

Width:  |  Height:  |  Size: 1006 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.3 KiB

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.0 KiB

After

Width:  |  Height:  |  Size: 3.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.1 KiB

After

Width:  |  Height:  |  Size: 3.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.9 KiB

After

Width:  |  Height:  |  Size: 9.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.8 KiB

After

Width:  |  Height:  |  Size: 7.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.1 KiB

After

Width:  |  Height:  |  Size: 5.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.2 KiB

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.3 KiB

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.7 KiB

After

Width:  |  Height:  |  Size: 8.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 33 KiB

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 5.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 49 KiB

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 5.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 84 KiB

After

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 16 KiB

View File

@ -180,6 +180,7 @@ const tags = [
{
name: 'behind the scenes',
slug: 'behind-the-scenes',
priority: 6,
},
{
name: 'big dick',

View File

@ -2139,6 +2139,12 @@ const sites = [
url: 'https://eurocoeds.com',
parent: 'fcuk',
},
{
name: 'After Hours Exposed',
slug: 'afterhoursexposed',
url: 'https://afterhoursexposed.com',
parent: 'fcuk',
},
// FOR BONDAGE
{
name: 'Crowd Bondage',

View File

@ -2,7 +2,9 @@
const qu = require('../utils/qu');
function scrapeLatest(scenes, channel) {
// TODO: profile scraping
function scrapeLatestBlog(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
@ -18,6 +20,14 @@ function scrapeLatest(scenes, channel) {
release.description = query.text('p');
release.date = query.date('h5 strong, .videos h3', 'MMM. DD, YYYY', /\w+. \d{2}, \d{4}/);
// remove common patterns so only the name is left
const curatedTitle = release.title.replace(/\b(part \d|\banal|bts)\b/gi, '').trim();
if (!/\band\b/.test(curatedTitle) && new RegExp(curatedTitle).test(release.description)) {
// scene title is probably the actor name
release.actors = [release.title];
}
release.poster = query.img('.bigthumb', null, { origin: channel.url });
release.photos = query.imgs('.smallthumb', null, { origin: channel.url });
@ -27,15 +37,41 @@ function scrapeLatest(scenes, channel) {
});
}
function scrapeScene({ query }, url, channel) {
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('.updateInfo h5 a:not([href*="content/"]):not([href*="#coming"])');
release.entryId = query.url('.updateThumb img', 'alt');
release.title = query.q('.updateInfo h5 a', true);
release.actors = query.all('.tour_update_models a', true);
release.date = query.date('.availdate, .updateInfo p span:nth-child(2)', 'MM/DD/YYYY');
release.poster = query.img('.updateThumb img');
const trailer = query.q('.updateInfo h5 a', 'onclick')?.match(/'(.+)'/)?.[1];
if (trailer) {
release.trailer = {
src: `${channel.url}${trailer}`,
};
}
return release;
});
}
function scrapeSceneBlog({ query }, url, channel) {
const release = {};
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-');
release.title = query.q('h4 strong, .videos h3', true);
release.title = query.text('h4 strong, .videos h3');
release.description = query.q('#about p, .videos p', true);
const actors = query.urls('a[href*="/girl/"]').map(actorUrl => actorUrl.match(/video-(\w+)/)?.[1]).filter(Boolean);
const actors = query.urls('a[href*="/girl/"]').map(actorUrl => actorUrl.match(/video-([\w\s]+)/)?.[1]).filter(Boolean);
if (actors.length > 0) {
release.actors = actors;
@ -52,20 +88,68 @@ function scrapeScene({ query }, url, channel) {
return release;
}
async function fetchLatest(channel, page = 1) {
function scrapeScene({ query, html }, url, channel) {
const release = {};
release.title = query.q('.updatesBlock h2', true);
release.poster = query.meta('property="og:image"');
release.entryId = release.poster.match(/\/content\/(.*)\//)?.[1];
const trailer = html.match(/src="(.+\.mp4)"/)?.[1];
if (trailer) {
release.trailer = {
src: `${channel.url}${trailer}`,
};
}
return release;
}
async function fetchLatestBlog(channel, page) {
const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`;
const res = await qu.getAll(url, '.videos');
return res.ok ? scrapeLatest(res.items, channel) : res.status;
return res.ok ? scrapeLatestBlog(res.items, channel) : res.status;
}
async function fetchLatest(channel, page = 1) {
if (channel.parameters?.blog) {
return fetchLatestBlog(channel, page);
}
const url = `${channel.url}/categories/Movies_${page}_d.html`;
const res = await qu.getAll(url, '.bodyArea .updateItem');
return res.ok ? scrapeAll(res.items, channel) : res.status;
}
async function fetchUpcoming(channel) {
if (channel.parameters?.blog) {
return [];
}
const res = await qu.getAll(channel.url, '#owl-upcomingScenes .updateItem');
return res.ok ? scrapeAll(res.items, channel) : res.status;
}
async function fetchScene(url, channel) {
const res = await qu.get(url);
return res.ok ? scrapeScene(res.item, url, channel) : res.status;
if (res.ok) {
if (channel.parameters?.blog) {
return scrapeSceneBlog(res.item, url, channel);
}
return scrapeScene(res.item, url, channel);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchUpcoming,
};

View File

@ -53,15 +53,15 @@ async function filterUniqueReleases(latestReleases, accReleases) {
}
function needNextPage(uniqueReleases, pageAccReleases) {
if (argv.last && pageAccReleases.length < argv.last) {
// request for last N releases not yet satisfied
return true;
}
if (uniqueReleases.length === 0) {
return false;
}
if (argv.last && pageAccReleases.length < argv.last) {
// TODO: find a way to paginate if scraper filters page with multiple channels, see Kelly Madison
return true;
}
if (uniqueReleases.every(release => !!release.date)) {
const oldestReleaseOnPage = uniqueReleases
.sort((releaseA, releaseB) => releaseB.date - releaseA.date)