Added Teen Mega World scraper.
This commit is contained in:
@@ -278,8 +278,8 @@ function curateActorEntry(baseActor, batchId) {
|
||||
return {
|
||||
name: baseActor.name,
|
||||
slug: baseActor.slug,
|
||||
entity_id: collisionLikely ? baseActor.entity.id : null,
|
||||
entry_id: collisionLikely ? baseActor.entryId : null,
|
||||
entity_id: (collisionLikely && baseActor.entity?.id) || null,
|
||||
entry_id: (collisionLikely && baseActor.entryId) || null,
|
||||
batch_id: batchId,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -54,6 +54,7 @@ const score = require('./score');
|
||||
const spizoo = require('./spizoo');
|
||||
const teamskeet = require('./teamskeet');
|
||||
const teencoreclub = require('./teencoreclub');
|
||||
const teenmegaworld = require('./teenmegaworld');
|
||||
const topwebmodels = require('./topwebmodels');
|
||||
const traxxx = require('./traxxx');
|
||||
const vivid = require('./vivid');
|
||||
@@ -138,6 +139,7 @@ const scrapers = {
|
||||
spizoo,
|
||||
swallowsalon: julesjordan,
|
||||
teencoreclub,
|
||||
teenmegaworld,
|
||||
teamskeet,
|
||||
topwebmodels,
|
||||
transbella: porndoe,
|
||||
@@ -262,6 +264,7 @@ const scrapers = {
|
||||
swallowed: mikeadriano,
|
||||
teamskeet,
|
||||
teencoreclub,
|
||||
teenmegaworld,
|
||||
thatsitcomshow: nubiles,
|
||||
topwebmodels,
|
||||
transangels: mindgeek,
|
||||
|
||||
118
src/scrapers/teenmegaworld.js
Normal file
118
src/scrapers/teenmegaworld.js
Normal file
@@ -0,0 +1,118 @@
|
||||
'use strict';
|
||||
|
||||
const qu = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
const network = channel.type === 'network' ? channel : channel.parent;
|
||||
|
||||
release.url = query.url('a.title');
|
||||
release.entryId = new URL(release.url).pathname.match(/\/trailers\/(.+).html/)?.[1].toLowerCase();
|
||||
|
||||
release.title = query.cnt('.title');
|
||||
|
||||
release.date = query.date('time', 'MMMM D, YYYY');
|
||||
release.actors = query.all('.actors a').map(el => ({
|
||||
name: query.cnt(el),
|
||||
url: query.url(el, null),
|
||||
}));
|
||||
|
||||
[release.poster, ...release.photos] = Object
|
||||
.entries(query.el('.images img').dataset)
|
||||
.filter(([key]) => /src/.test(key))
|
||||
.map(([, value]) => qu.prefixUrl(value, network.url));
|
||||
|
||||
release.likes = query.number('.rating-on-thumb');
|
||||
|
||||
const siteId = query.url('.site a', 'href', { origin: network.url, object: true })?.searchParams.get('site[]');
|
||||
|
||||
if (siteId) {
|
||||
release.channel = network.children.find(child => child.parameters.siteId.toString() === siteId)?.slug;
|
||||
}
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, url, channel) {
|
||||
const release = {};
|
||||
const network = channel.type === 'network' ? channel : channel.parent;
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/trailers\/(.+).html/)[1].toLowerCase();
|
||||
|
||||
release.title = query.cnt('.title-line h1');
|
||||
release.description = query.cnt('.shadow .text');
|
||||
|
||||
release.date = query.date('.title-line .date', 'MMMM D, YYYY');
|
||||
release.duration = query.number('.dur') * 60;
|
||||
|
||||
release.actors = query.all('.site a[href*="/models"]').map(el => ({
|
||||
name: query.cnt(el),
|
||||
url: query.url(el, null),
|
||||
}));
|
||||
|
||||
release.tags = query.cnts('.tag-list a');
|
||||
|
||||
release.poster = query.poster();
|
||||
release.trailer = query.video();
|
||||
|
||||
release.likes = query.number('.rating-data .value');
|
||||
|
||||
const siteId = query.url('.site a[href*="site[]"]', 'href', { origin: network.url, object: true })?.searchParams.get('site[]');
|
||||
|
||||
if (siteId) {
|
||||
release.channel = network.children.find(child => child.parameters.siteId.toString() === siteId)?.slug;
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query, el }, url, entity) {
|
||||
const profile = {};
|
||||
|
||||
const bio = query.all('.stats li').reduce((acc, bioEl) => ({
|
||||
...acc,
|
||||
[slugify(query.cnt(bioEl, '.title'))]: query.cnt(bioEl, '.value'),
|
||||
}), {});
|
||||
|
||||
profile.url = url;
|
||||
profile.description = query.cnt('.bio .text');
|
||||
|
||||
profile.hairColor = bio.hair;
|
||||
profile.eyes = bio.eyes;
|
||||
|
||||
profile.avatar = query.img('.model-info .photo img', 'data-src', { origin: entity.url });
|
||||
profile.scenes = scrapeAll(qu.initAll(el, '.video-list .video[data-type="vids"]'), entity);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.type === 'network' ? channel.url : channel.parent.url}/search.php?site[]=${channel.parameters.siteId}&page=${page}`;
|
||||
const res = await qu.getAll(url, '.video-list .video');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, entity, include) {
|
||||
const url = `${entity.url}/models/${slugify(actorName)}.html`;
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item, url, entity, include);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
scrapeScene,
|
||||
};
|
||||
@@ -275,10 +275,15 @@ function images(context, selector = 'img', attr, { origin, protocol = 'https' }
|
||||
return imageEls.map(imageEl => prefixUrl(imageEl, origin, protocol));
|
||||
}
|
||||
|
||||
function url(context, selector = 'a', attr = 'href', { origin, protocol = 'https' } = {}) {
|
||||
function url(context, selector = 'a', attr = 'href', { origin, protocol = 'https', object = false } = {}) {
|
||||
const urlEl = q(context, selector, attr);
|
||||
const prefixedUrl = prefixUrl(urlEl, origin, protocol);
|
||||
|
||||
return prefixUrl(urlEl, origin, protocol);
|
||||
if (prefixedUrl && object) {
|
||||
return new URL(prefixedUrl);
|
||||
}
|
||||
|
||||
return prefixedUrl;
|
||||
}
|
||||
|
||||
function urls(context, selector = 'a', attr = 'href', { origin, protocol = 'https' } = {}) {
|
||||
@@ -349,7 +354,10 @@ function videos(context, selector = 'source', attr = 'src', { origin, protocol =
|
||||
function duration(context, selector, match, attr = 'textContent') {
|
||||
const durationString = q(context, selector, attr);
|
||||
|
||||
if (!durationString) return null;
|
||||
if (!durationString) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const durationMatch = durationToSeconds(durationString, match);
|
||||
|
||||
if (durationMatch) {
|
||||
|
||||
Reference in New Issue
Block a user