Added scraper for FCUK's coed sites.
Before Width: | Height: | Size: 932 B After Width: | Height: | Size: 1006 B |
Before Width: | Height: | Size: 2.3 KiB After Width: | Height: | Size: 22 KiB |
Before Width: | Height: | Size: 3.0 KiB After Width: | Height: | Size: 3.0 KiB |
Before Width: | Height: | Size: 1.9 KiB After Width: | Height: | Size: 1.9 KiB |
Before Width: | Height: | Size: 3.1 KiB After Width: | Height: | Size: 3.1 KiB |
Before Width: | Height: | Size: 9.9 KiB After Width: | Height: | Size: 9.9 KiB |
Before Width: | Height: | Size: 7.8 KiB After Width: | Height: | Size: 7.8 KiB |
Before Width: | Height: | Size: 2.1 KiB After Width: | Height: | Size: 3.7 KiB |
Before Width: | Height: | Size: 3.6 KiB After Width: | Height: | Size: 3.6 KiB |
Before Width: | Height: | Size: 2.1 KiB After Width: | Height: | Size: 3.7 KiB |
Before Width: | Height: | Size: 5.1 KiB After Width: | Height: | Size: 5.1 KiB |
Before Width: | Height: | Size: 2.2 KiB After Width: | Height: | Size: 2.2 KiB |
Before Width: | Height: | Size: 2.3 KiB After Width: | Height: | Size: 22 KiB |
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 24 KiB |
Before Width: | Height: | Size: 8.7 KiB After Width: | Height: | Size: 8.7 KiB |
Before Width: | Height: | Size: 31 KiB After Width: | Height: | Size: 31 KiB |
Before Width: | Height: | Size: 33 KiB After Width: | Height: | Size: 33 KiB |
Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB |
Before Width: | Height: | Size: 3.5 KiB After Width: | Height: | Size: 5.6 KiB |
Before Width: | Height: | Size: 49 KiB After Width: | Height: | Size: 49 KiB |
Before Width: | Height: | Size: 3.5 KiB After Width: | Height: | Size: 5.6 KiB |
Before Width: | Height: | Size: 84 KiB After Width: | Height: | Size: 84 KiB |
Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 16 KiB |
|
@ -2139,6 +2139,12 @@ const sites = [
|
||||||
url: 'https://eurocoeds.com',
|
url: 'https://eurocoeds.com',
|
||||||
parent: 'fcuk',
|
parent: 'fcuk',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: 'After Hours Exposed',
|
||||||
|
slug: 'afterhoursexposed',
|
||||||
|
url: 'https://afterhoursexposed.com',
|
||||||
|
parent: 'fcuk',
|
||||||
|
},
|
||||||
// FOR BONDAGE
|
// FOR BONDAGE
|
||||||
{
|
{
|
||||||
name: 'Crowd Bondage',
|
name: 'Crowd Bondage',
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
const qu = require('../utils/qu');
|
const qu = require('../utils/qu');
|
||||||
|
|
||||||
function scrapeLatest(scenes, channel) {
|
function scrapeLatestBlog(scenes, channel) {
|
||||||
return scenes.map(({ query }) => {
|
return scenes.map(({ query }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
|
@ -18,6 +18,11 @@ function scrapeLatest(scenes, channel) {
|
||||||
release.description = query.text('p');
|
release.description = query.text('p');
|
||||||
release.date = query.date('h5 strong, .videos h3', 'MMM. DD, YYYY', /\w+. \d{2}, \d{4}/);
|
release.date = query.date('h5 strong, .videos h3', 'MMM. DD, YYYY', /\w+. \d{2}, \d{4}/);
|
||||||
|
|
||||||
|
if (!/\band\b/.test(release.title) && new RegExp(release.title).test(release.description)) {
|
||||||
|
// scene title is probably the actor name
|
||||||
|
release.actors = [release.title];
|
||||||
|
}
|
||||||
|
|
||||||
release.poster = query.img('.bigthumb', null, { origin: channel.url });
|
release.poster = query.img('.bigthumb', null, { origin: channel.url });
|
||||||
release.photos = query.imgs('.smallthumb', null, { origin: channel.url });
|
release.photos = query.imgs('.smallthumb', null, { origin: channel.url });
|
||||||
|
|
||||||
|
@ -27,7 +32,33 @@ function scrapeLatest(scenes, channel) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene({ query }, url, channel) {
|
function scrapeAll(scenes, channel) {
|
||||||
|
return scenes.map(({ query }) => {
|
||||||
|
const release = {};
|
||||||
|
|
||||||
|
release.url = query.url('.updateInfo h5 a:not([href*="content/"]):not([href*="#coming"])');
|
||||||
|
release.entryId = query.url('.updateThumb img', 'alt');
|
||||||
|
|
||||||
|
release.title = query.q('.updateInfo h5 a', true);
|
||||||
|
|
||||||
|
release.actors = query.all('.tour_update_models a', true);
|
||||||
|
release.date = query.date('.availdate, .updateInfo p span:nth-child(2)', 'MM/DD/YYYY');
|
||||||
|
|
||||||
|
release.poster = query.img('.updateThumb img');
|
||||||
|
|
||||||
|
const trailer = query.q('.updateInfo h5 a', 'onclick')?.match(/'(.+)'/)?.[1];
|
||||||
|
|
||||||
|
if (trailer) {
|
||||||
|
release.trailer = {
|
||||||
|
src: `${channel.url}${trailer}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return release;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeSceneBlog({ query }, url, channel) {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-');
|
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-');
|
||||||
|
@ -35,7 +66,7 @@ function scrapeScene({ query }, url, channel) {
|
||||||
release.title = query.q('h4 strong, .videos h3', true);
|
release.title = query.q('h4 strong, .videos h3', true);
|
||||||
release.description = query.q('#about p, .videos p', true);
|
release.description = query.q('#about p, .videos p', true);
|
||||||
|
|
||||||
const actors = query.urls('a[href*="/girl/"]').map(actorUrl => actorUrl.match(/video-(\w+)/)?.[1]).filter(Boolean);
|
const actors = query.urls('a[href*="/girl/"]').map(actorUrl => actorUrl.match(/video-([\w\s]+)/)?.[1]).filter(Boolean);
|
||||||
|
|
||||||
if (actors.length > 0) {
|
if (actors.length > 0) {
|
||||||
release.actors = actors;
|
release.actors = actors;
|
||||||
|
@ -52,20 +83,69 @@ function scrapeScene({ query }, url, channel) {
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(channel, page = 1) {
|
function scrapeScene({ query, html }, url, channel) {
|
||||||
|
const release = {};
|
||||||
|
|
||||||
|
release.title = query.q('.updatesBlock h2', true);
|
||||||
|
release.poster = query.meta('property="og:image"');
|
||||||
|
release.entryId = release.poster.match(/\/content\/(.*)\//)?.[1];
|
||||||
|
|
||||||
|
const trailer = html.match(/src="(.+\.mp4)"/)?.[1];
|
||||||
|
|
||||||
|
if (trailer) {
|
||||||
|
release.trailer = {
|
||||||
|
src: `${channel.url}${trailer}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(release);
|
||||||
|
return release;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchLatestBlog(channel, page) {
|
||||||
const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`;
|
const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`;
|
||||||
const res = await qu.getAll(url, '.videos');
|
const res = await qu.getAll(url, '.videos');
|
||||||
|
|
||||||
return res.ok ? scrapeLatest(res.items, channel) : res.status;
|
return res.ok ? scrapeLatestBlog(res.items, channel) : res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchLatest(channel, page = 1) {
|
||||||
|
if (channel.parameters?.blog) {
|
||||||
|
return fetchLatestBlog(channel, page);
|
||||||
|
}
|
||||||
|
|
||||||
|
const url = `${channel.url}/categories/Movies_${page}_d.html`;
|
||||||
|
const res = await qu.getAll(url, '.bodyArea .updateItem');
|
||||||
|
|
||||||
|
return res.ok ? scrapeAll(res.items, channel) : res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchUpcoming(channel) {
|
||||||
|
if (channel.parameters?.blog) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const res = await qu.getAll(channel.url, '#owl-upcomingScenes .updateItem');
|
||||||
|
|
||||||
|
return res.ok ? scrapeAll(res.items, channel) : res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, channel) {
|
async function fetchScene(url, channel) {
|
||||||
const res = await qu.get(url);
|
const res = await qu.get(url);
|
||||||
|
|
||||||
return res.ok ? scrapeScene(res.item, url, channel) : res.status;
|
if (res.ok) {
|
||||||
|
if (channel.parameters?.blog) {
|
||||||
|
return scrapeSceneBlog(res.item, url, channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return scrapeScene(res.item, url, channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
fetchScene,
|
fetchScene,
|
||||||
|
fetchUpcoming,
|
||||||
};
|
};
|
||||||
|
|
|
@ -53,15 +53,15 @@ async function filterUniqueReleases(latestReleases, accReleases) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function needNextPage(uniqueReleases, pageAccReleases) {
|
function needNextPage(uniqueReleases, pageAccReleases) {
|
||||||
if (argv.last && pageAccReleases.length < argv.last) {
|
|
||||||
// request for last N releases not yet satisfied
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (uniqueReleases.length === 0) {
|
if (uniqueReleases.length === 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (argv.last && pageAccReleases.length < argv.last) {
|
||||||
|
// TODO: find a way to paginate if scraper filters page with multiple channels, see Kelly Madison
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
if (uniqueReleases.every(release => !!release.date)) {
|
if (uniqueReleases.every(release => !!release.date)) {
|
||||||
const oldestReleaseOnPage = uniqueReleases
|
const oldestReleaseOnPage = uniqueReleases
|
||||||
.sort((releaseA, releaseB) => releaseB.date - releaseA.date)
|
.sort((releaseA, releaseB) => releaseB.date - releaseA.date)
|
||||||
|
|