mirror of
https://github.com/wukko/cobalt.git
synced 2024-11-17 22:00:00 +00:00
clean up url handling
merge pull request #271 from dumbmoron/cleanup-url-handling
This commit is contained in:
commit
4b9d61b13f
12 changed files with 163 additions and 114 deletions
|
@ -36,6 +36,7 @@
|
|||
"hls-parser": "^0.10.7",
|
||||
"nanoid": "^4.0.2",
|
||||
"node-cache": "^5.1.2",
|
||||
"psl": "1.9.0",
|
||||
"set-cookie-parser": "2.6.0",
|
||||
"undici": "^5.19.1",
|
||||
"url-pattern": "1.0.3",
|
||||
|
|
|
@ -97,7 +97,7 @@ export function runAPI(express, app, gitCommit, gitBranch, __dirname) {
|
|||
let chck = checkJSONPost(request);
|
||||
if (!chck) throw new Error();
|
||||
|
||||
j = await getJSON(chck["url"], lang, chck);
|
||||
j = await getJSON(chck.url, lang, chck);
|
||||
} else {
|
||||
j = apiJSON(0, {
|
||||
t: !contentCon ? "invalid content type header" : loc(lang, 'ErrorNoLink')
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
const version = 39;
|
||||
const version = 40;
|
||||
|
||||
const ua = navigator.userAgent.toLowerCase();
|
||||
const isIOS = ua.match("iphone os");
|
||||
|
@ -358,7 +358,7 @@ async function download(url) {
|
|||
eid("url-clear").style.display = "none";
|
||||
eid("url-input-area").disabled = true;
|
||||
let req = {
|
||||
url: encodeURIComponent(url.split("&")[0].split('%')[0]),
|
||||
url,
|
||||
aFormat: sGet("aFormat").slice(0, 4),
|
||||
filenamePattern: sGet("filenamePattern"),
|
||||
dubLang: false
|
||||
|
|
|
@ -1,33 +1,30 @@
|
|||
import UrlPattern from "url-pattern";
|
||||
import { services } from "./config.js";
|
||||
|
||||
import { services as patterns } from "./config.js";
|
||||
|
||||
import { cleanURL, apiJSON } from "./sub/utils.js";
|
||||
import { apiJSON } from "./sub/utils.js";
|
||||
import { errorUnsupported } from "./sub/errors.js";
|
||||
import loc from "../localization/manager.js";
|
||||
import match from "./processing/match.js";
|
||||
import hostOverrides from "./processing/hostOverrides.js";
|
||||
import { getHostIfValid } from "./processing/url.js";
|
||||
|
||||
export async function getJSON(originalURL, lang, obj) {
|
||||
export async function getJSON(url, lang, obj) {
|
||||
try {
|
||||
let patternMatch, url = encodeURI(decodeURIComponent(originalURL)),
|
||||
hostname = new URL(url).hostname.split('.'),
|
||||
host = hostname[hostname.length - 2];
|
||||
const host = getHostIfValid(url);
|
||||
|
||||
if (!url.startsWith('https://')) return apiJSON(0, { t: errorUnsupported(lang) });
|
||||
|
||||
let overrides = hostOverrides(host, url);
|
||||
host = overrides.host;
|
||||
url = overrides.url;
|
||||
|
||||
if (!(host && host.length < 20 && host in patterns && patterns[host]["enabled"])) return apiJSON(0, { t: errorUnsupported(lang) });
|
||||
|
||||
let pathToMatch = cleanURL(url, host).split(`.${patterns[host]['tld'] ? patterns[host]['tld'] : "com"}/`)[1].replace('.', '');
|
||||
for (let i in patterns[host]["patterns"]) {
|
||||
patternMatch = new UrlPattern(patterns[host]["patterns"][i]).match(pathToMatch);
|
||||
if (patternMatch) break
|
||||
if (!host || !services[host].enabled) {
|
||||
return apiJSON(0, { t: errorUnsupported(lang) });
|
||||
}
|
||||
|
||||
let patternMatch;
|
||||
for (const pattern of services[host].patterns) {
|
||||
patternMatch = pattern.match(
|
||||
url.pathname.substring(1) + url.search
|
||||
);
|
||||
if (patternMatch) break;
|
||||
}
|
||||
|
||||
if (!patternMatch) {
|
||||
return apiJSON(0, { t: errorUnsupported(lang) });
|
||||
}
|
||||
if (!patternMatch) return apiJSON(0, { t: errorUnsupported(lang) });
|
||||
|
||||
return await match(host, patternMatch, url, lang, obj)
|
||||
} catch (e) {
|
||||
|
|
|
@ -1,8 +1,17 @@
|
|||
import UrlPattern from "url-pattern";
|
||||
import { loadJSON } from "./sub/loadFromFs.js";
|
||||
const config = loadJSON("./src/config.json");
|
||||
const packageJson = loadJSON("./package.json");
|
||||
const servicesConfigJson = loadJSON("./src/modules/processing/servicesConfig.json");
|
||||
|
||||
Object.values(servicesConfigJson.config).forEach(service => {
|
||||
service.patterns = service.patterns.map(
|
||||
pattern => new UrlPattern(pattern, {
|
||||
segmentValueCharset: UrlPattern.defaultOptions.segmentValueCharset + '@\\.'
|
||||
})
|
||||
)
|
||||
})
|
||||
|
||||
export const
|
||||
services = servicesConfigJson.config,
|
||||
audioIgnore = servicesConfigJson.audioIgnore,
|
||||
|
|
|
@ -1,48 +0,0 @@
|
|||
export default function (inHost, inURL) {
|
||||
let host = String(inHost);
|
||||
let url = String(inURL);
|
||||
|
||||
switch(host) {
|
||||
case "youtube":
|
||||
if (url.startsWith("https://youtube.com/live/") || url.startsWith("https://www.youtube.com/live/")) {
|
||||
url = url.split("?")[0].replace("www.", "");
|
||||
url = `https://youtube.com/watch?v=${url.replace("https://youtube.com/live/", "")}`
|
||||
}
|
||||
if (url.includes('youtube.com/shorts/')) {
|
||||
url = url.split('?')[0].replace('shorts/', 'watch?v=');
|
||||
}
|
||||
break;
|
||||
case "youtu":
|
||||
if (url.startsWith("https://youtu.be/")) {
|
||||
host = "youtube";
|
||||
url = `https://youtube.com/watch?v=${url.replace("https://youtu.be/", "")}`
|
||||
}
|
||||
break;
|
||||
case "vxtwitter":
|
||||
case "x":
|
||||
if (url.startsWith("https://x.com/")) {
|
||||
host = "twitter";
|
||||
url = url.replace("https://x.com/", "https://twitter.com/")
|
||||
}
|
||||
if (url.startsWith("https://vxtwitter.com/")) {
|
||||
host = "twitter";
|
||||
url = url.replace("https://vxtwitter.com/", "https://twitter.com/")
|
||||
}
|
||||
break;
|
||||
case "tumblr":
|
||||
if (!url.includes("blog/view")) {
|
||||
if (url.slice(-1) === '/') url = url.slice(0, -1);
|
||||
url = url.replace(url.split('/')[5], '')
|
||||
}
|
||||
break;
|
||||
case "twitch":
|
||||
if (url.includes('clips.twitch.tv')) {
|
||||
url = url.split('?')[0].replace('clips.twitch.tv/', 'twitch.tv/_/clip/');
|
||||
}
|
||||
break;
|
||||
}
|
||||
return {
|
||||
host: host,
|
||||
url: url
|
||||
}
|
||||
}
|
|
@ -1,3 +1,5 @@
|
|||
import { strict as assert } from "node:assert";
|
||||
|
||||
import { apiJSON } from "../sub/utils.js";
|
||||
import { errorUnsupported, genericError, brokenLink } from "../sub/errors.js";
|
||||
|
||||
|
@ -23,6 +25,8 @@ import twitch from "./services/twitch.js";
|
|||
import rutube from "./services/rutube.js";
|
||||
|
||||
export default async function(host, patternMatch, url, lang, obj) {
|
||||
assert(url instanceof URL);
|
||||
|
||||
try {
|
||||
let r, isAudioOnly = !!obj.isAudioOnly, disableMetadata = !!obj.disableMetadata;
|
||||
|
||||
|
@ -37,7 +41,6 @@ export default async function(host, patternMatch, url, lang, obj) {
|
|||
break;
|
||||
case "vk":
|
||||
r = await vk({
|
||||
url: url,
|
||||
userId: patternMatch["userId"],
|
||||
videoId: patternMatch["videoId"],
|
||||
quality: obj.vQuality
|
||||
|
@ -57,11 +60,13 @@ export default async function(host, patternMatch, url, lang, obj) {
|
|||
isAudioMuted: obj.isAudioMuted,
|
||||
dubLang: obj.dubLang
|
||||
}
|
||||
if (url.match('music.youtube.com') || isAudioOnly === true) {
|
||||
|
||||
if (url.hostname === 'music.youtube.com' || isAudioOnly === true) {
|
||||
fetchInfo.quality = "max";
|
||||
fetchInfo.format = "vp9";
|
||||
fetchInfo.isAudioOnly = true
|
||||
}
|
||||
|
||||
r = await youtube(fetchInfo);
|
||||
break;
|
||||
case "reddit":
|
||||
|
@ -83,9 +88,9 @@ export default async function(host, patternMatch, url, lang, obj) {
|
|||
break;
|
||||
case "tumblr":
|
||||
r = await tumblr({
|
||||
id: patternMatch["id"],
|
||||
url: url,
|
||||
user: patternMatch["user"] || false
|
||||
id: patternMatch.id,
|
||||
user: patternMatch.user,
|
||||
url
|
||||
});
|
||||
break;
|
||||
case "vimeo":
|
||||
|
@ -99,7 +104,7 @@ export default async function(host, patternMatch, url, lang, obj) {
|
|||
case "soundcloud":
|
||||
isAudioOnly = true;
|
||||
r = await soundcloud({
|
||||
url: url,
|
||||
url,
|
||||
author: patternMatch["author"],
|
||||
song: patternMatch["song"],
|
||||
shortLink: patternMatch["shortLink"] || false,
|
||||
|
|
|
@ -39,17 +39,18 @@ export default async function(obj) {
|
|||
if (!clientId) return { error: 'ErrorSoundCloudNoClientId' };
|
||||
|
||||
let link;
|
||||
if (obj.shortLink && !obj.author && !obj.song) {
|
||||
if (obj.url.hostname === 'on.soundcloud.com' && obj.shortLink) {
|
||||
link = await fetch(`https://on.soundcloud.com/${obj.shortLink}/`, { redirect: "manual" }).then((r) => {
|
||||
if (r.status === 302 && r.headers.get("location").startsWith("https://soundcloud.com/")) {
|
||||
return r.headers.get("location").split('?', 1)[0]
|
||||
}
|
||||
return false
|
||||
}).catch(() => { return false });
|
||||
}).catch(() => {});
|
||||
}
|
||||
|
||||
if (!link && obj.author && obj.song) {
|
||||
link = `https://soundcloud.com/${obj.author}/${obj.song}${obj.accessKey ? `/s-${obj.accessKey}` : ''}`
|
||||
}
|
||||
|
||||
if (!link) return { error: 'ErrorCouldntFetch' };
|
||||
|
||||
let json = await fetch(`https://api-v2.soundcloud.com/resolve?url=${link}&client_id=${clientId}`).then((r) => {
|
||||
|
|
|
@ -1,9 +1,14 @@
|
|||
import psl from "psl";
|
||||
import { genericUserAgent } from "../../config.js";
|
||||
|
||||
export default async function(obj) {
|
||||
let html = await fetch(`https://${
|
||||
obj.user ? obj.user : obj.url.split('.')[0].replace('https://', '')
|
||||
}.tumblr.com/post/${obj.id}`, {
|
||||
let { subdomain } = psl.parse(obj.url.hostname);
|
||||
if (subdomain?.includes('.'))
|
||||
return { error: ['ErrorBrokenLink', 'tumblr'] }
|
||||
else if (subdomain === 'www')
|
||||
subdomain = undefined;
|
||||
|
||||
let html = await fetch(`https://${subdomain ?? obj.user}.tumblr.com/post/${obj.id}`, {
|
||||
headers: { "user-agent": genericUserAgent }
|
||||
}).then((r) => { return r.text() }).catch(() => { return false });
|
||||
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
},
|
||||
"twitter": {
|
||||
"alias": "twitter videos & voice",
|
||||
"altDomains": ["x.com", "vxtwitter.com", "fixvx.com"],
|
||||
"subdomains": ["mobile"],
|
||||
"patterns": [":user/status/:id", ":user/status/:id/video/:v"],
|
||||
"enabled": true
|
||||
},
|
||||
|
@ -24,22 +26,26 @@
|
|||
"youtube": {
|
||||
"alias": "youtube videos, shorts & music",
|
||||
"patterns": ["watch?v=:id", "embed/:id", "watch/:id"],
|
||||
"subdomains": ["music", "m"],
|
||||
"bestAudio": "opus",
|
||||
"enabled": true
|
||||
},
|
||||
"tumblr": {
|
||||
"patterns": ["post/:id", "blog/view/:user/:id", ":user/:id", ":user/:id/:trackingId"],
|
||||
"subdomains": "*",
|
||||
"enabled": true
|
||||
},
|
||||
"tiktok": {
|
||||
"alias": "tiktok videos, photos & audio",
|
||||
"patterns": [":user/video/:postId", ":id", "t/:id"],
|
||||
"subdomains": ["vt", "vm"],
|
||||
"audioFormats": ["best", "m4a", "mp3"],
|
||||
"enabled": true
|
||||
},
|
||||
"douyin": {
|
||||
"alias": "douyin videos & audio",
|
||||
"patterns": ["video/:postId", ":id"],
|
||||
"subdomains": ["v"],
|
||||
"enabled": false
|
||||
},
|
||||
"vimeo": {
|
||||
|
@ -49,6 +55,7 @@
|
|||
},
|
||||
"soundcloud": {
|
||||
"patterns": [":author/:song/s-:accessKey", ":author/:song", ":shortLink"],
|
||||
"subdomains": ["on"],
|
||||
"bestAudio": "opus",
|
||||
"enabled": true
|
||||
},
|
||||
|
|
96
src/modules/processing/url.js
Normal file
96
src/modules/processing/url.js
Normal file
|
@ -0,0 +1,96 @@
|
|||
import { services } from "../config.js";
|
||||
import { strict as assert } from "node:assert";
|
||||
import psl from "psl";
|
||||
|
||||
export function aliasURL(url) {
|
||||
assert(url instanceof URL);
|
||||
|
||||
const host = psl.parse(url.hostname);
|
||||
const parts = url.pathname.split('/');
|
||||
|
||||
switch (host.sld) {
|
||||
case "youtube":
|
||||
if (url.pathname.startsWith('/live/') || url.pathname.startsWith('/shorts/')) {
|
||||
url.pathname = '/watch';
|
||||
// parts := ['', 'live' || 'shorts', id, ...rest]
|
||||
url.search = `?v=${encodeURIComponent(parts[2])}`
|
||||
}
|
||||
break;
|
||||
case "youtu":
|
||||
if (url.hostname === 'youtu.be' && parts.length >= 2) {
|
||||
/* youtu.be urls can be weird, e.g. https://youtu.be/<id>//asdasd// still works
|
||||
** but we only care about the 1st segment of the path */
|
||||
url = new URL(`https://youtube.com/watch?v=${
|
||||
encodeURIComponent(parts[1])
|
||||
}`)
|
||||
}
|
||||
break;
|
||||
|
||||
case "vxtwitter":
|
||||
case "fixvx":
|
||||
case "x":
|
||||
if (services.twitter.altDomains.includes(url.hostname)) {
|
||||
url.hostname = 'twitter.com'
|
||||
}
|
||||
break;
|
||||
|
||||
case "twitch":
|
||||
if (url.hostname === 'clips.twitch.tv' && parts.length >= 2) {
|
||||
url = new URL(`https://twitch.tv/_/clip/${parts[1]}`);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return url
|
||||
}
|
||||
|
||||
export function cleanURL(url) {
|
||||
assert(url instanceof URL);
|
||||
const host = psl.parse(url.hostname).sld;
|
||||
let stripQuery = true;
|
||||
|
||||
if (host === 'pinterest') {
|
||||
url.hostname = 'pinterest.com'
|
||||
} else if (host === 'vk' && url.pathname.includes('/clip')) {
|
||||
if (url.searchParams.get('z'))
|
||||
url.search = '?z=' + encodeURIComponent(url.searchParams.get('z'));
|
||||
stripQuery = false;
|
||||
} else if (host === 'youtube' && url.searchParams.get('v')) {
|
||||
url.search = '?v=' + encodeURIComponent(url.searchParams.get('v'));
|
||||
stripQuery = false;
|
||||
}
|
||||
|
||||
if (stripQuery) {
|
||||
url.search = ''
|
||||
}
|
||||
|
||||
url.username = url.password = url.port = url.hash = ''
|
||||
|
||||
if (url.pathname.endsWith('/'))
|
||||
url.pathname = url.pathname.slice(0, -1);
|
||||
|
||||
return url
|
||||
}
|
||||
|
||||
export function normalizeURL(url) {
|
||||
return cleanURL(
|
||||
aliasURL(
|
||||
new URL(url.replace(/^https\/\//, 'https://'))
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
export function getHostIfValid(url) {
|
||||
const host = psl.parse(url.hostname);
|
||||
if (host.error) return;
|
||||
|
||||
const service = services[host.sld];
|
||||
if (!service) return;
|
||||
if ((service.tld ?? 'com') !== host.tld) return;
|
||||
|
||||
const anySubdomainAllowed = service.subdomains === '*';
|
||||
const validSubdomain = [null, 'www', ...(service.subdomains ?? [])].includes(host.subdomain);
|
||||
if (!validSubdomain && !anySubdomainAllowed) return;
|
||||
|
||||
return host.sld;
|
||||
}
|
|
@ -1,3 +1,4 @@
|
|||
import { normalizeURL } from "../processing/url.js";
|
||||
import { createStream } from "../stream/manage.js";
|
||||
|
||||
const apiVar = {
|
||||
|
@ -52,29 +53,7 @@ export function metadataManager(obj) {
|
|||
for (let i in keys) { if (tags.includes(keys[i])) commands.push('-metadata', `${keys[i]}=${obj[keys[i]]}`) }
|
||||
return commands;
|
||||
}
|
||||
export function cleanURL(url, host) {
|
||||
switch (host) {
|
||||
case "vk":
|
||||
url = url.includes('clip') ? url.split('&')[0] : url.split('?')[0];
|
||||
break;
|
||||
case "youtube":
|
||||
url = url.split('&')[0];
|
||||
break;
|
||||
case "tiktok":
|
||||
url = url.replace(/@([a-zA-Z]+(\.[a-zA-Z]+)+)/, "@a")
|
||||
case "pinterest":
|
||||
url = url.replace(/:\/\/(?:www.)pinterest(?:\.[a-z.]+)/, "://pinterest.com")
|
||||
default:
|
||||
url = url.split('?')[0];
|
||||
if (url.substring(url.length - 1) === "/") url = url.substring(0, url.length - 1);
|
||||
break;
|
||||
}
|
||||
for (let i in forbiddenChars) {
|
||||
url = url.replaceAll(forbiddenChars[i], '')
|
||||
}
|
||||
url = url.replace('https//', 'https://')
|
||||
return url.slice(0, 128)
|
||||
}
|
||||
|
||||
export function cleanString(string) {
|
||||
for (let i in forbiddenCharsString) {
|
||||
string = string.replaceAll("/", "_").replaceAll(forbiddenCharsString[i], '')
|
||||
|
@ -94,6 +73,7 @@ export function unicodeDecode(str) {
|
|||
}
|
||||
export function checkJSONPost(obj) {
|
||||
let def = {
|
||||
url: normalizeURL(decodeURIComponent(obj.url)),
|
||||
vCodec: "h264",
|
||||
vQuality: "720",
|
||||
aFormat: "mp3",
|
||||
|
@ -121,12 +101,8 @@ export function checkJSONPost(obj) {
|
|||
}
|
||||
}
|
||||
|
||||
if (def.dubLang) def.dubLang = verifyLanguageCode(obj.dubLang);
|
||||
|
||||
obj["url"] = decodeURIComponent(String(obj["url"]));
|
||||
let hostname = obj["url"].replace("https://", "").replace(' ', '').split('&')[0].split("/")[0].split("."),
|
||||
host = hostname[hostname.length - 2];
|
||||
def["url"] = encodeURIComponent(cleanURL(obj["url"], host));
|
||||
if (def.dubLang)
|
||||
def.dubLang = verifyLanguageCode(obj.dubLang);
|
||||
|
||||
return def
|
||||
} catch (e) {
|
||||
|
|
Loading…
Reference in a new issue