From dd563eb752f0883e295b6042db3387f802d5871f Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Sat, 9 Dec 2023 11:00:54 +0000 Subject: [PATCH 01/22] api: rework url parsing - tlds are now parsed and validated correctly (e.g. ".co.uk" works now) - url patterns are pre-compiled instead of being compiled for every request - aliases are computed in a safe manner using the URL object where possible --- package.json | 1 + src/modules/api.js | 41 ++++----- src/modules/config.js | 9 ++ src/modules/processing/hostOverrides.js | 112 ++++++++++++++++++------ src/modules/sub/utils.js | 34 +------ 5 files changed, 116 insertions(+), 81 deletions(-) diff --git a/package.json b/package.json index c4b03eb4..ed639ba2 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,7 @@ "hls-parser": "^0.10.7", "nanoid": "^4.0.2", "node-cache": "^5.1.2", + "psl": "^1.9.0", "set-cookie-parser": "2.6.0", "undici": "^5.19.1", "url-pattern": "1.0.3", diff --git a/src/modules/api.js b/src/modules/api.js index 62e9a7c6..19f657cc 100644 --- a/src/modules/api.js +++ b/src/modules/api.js @@ -1,35 +1,32 @@ -import UrlPattern from "url-pattern"; +import { services } from "./config.js"; -import { services as patterns } from "./config.js"; - -import { cleanURL, apiJSON } from "./sub/utils.js"; +import { apiJSON } from "./sub/utils.js"; import { errorUnsupported } from "./sub/errors.js"; import loc from "../localization/manager.js"; import match from "./processing/match.js"; -import hostOverrides from "./processing/hostOverrides.js"; +import { hasValidHostname, normalizeURL } from "./processing/url.js"; export async function getJSON(originalURL, lang, obj) { try { - let patternMatch, url = encodeURI(decodeURIComponent(originalURL)), - hostname = new URL(url).hostname.split('.'), - host = hostname[hostname.length - 2]; + const url = normalizeURL(decodeURIComponent(originalURL)); - if (!url.startsWith('https://')) return apiJSON(0, { t: errorUnsupported(lang) }); - - let overrides = hostOverrides(host, url); - host = overrides.host; - url = overrides.url; - - if (!(host && host.length < 20 && host in patterns && patterns[host]["enabled"])) return apiJSON(0, { t: errorUnsupported(lang) }); - - let pathToMatch = cleanURL(url, host).split(`.${patterns[host]['tld'] ? patterns[host]['tld'] : "com"}/`)[1].replace('.', ''); - for (let i in patterns[host]["patterns"]) { - patternMatch = new UrlPattern(patterns[host]["patterns"][i]).match(pathToMatch); - if (patternMatch) break + if (!hasValidHostname(url) || !services[host].enabled) { + return apiJSON(0, { t: errorUnsupported(lang) }); } - if (!patternMatch) return apiJSON(0, { t: errorUnsupported(lang) }); - return await match(host, patternMatch, url, lang, obj) + let patternMatch; + for (const pattern of services[host].patterns) { + patternMatch = pattern.match( + url.pathname.substring(1) + url.search + ); + if (patternMatch) break; + } + + if (!patternMatch) { + return apiJSON(0, { t: errorUnsupported(lang) }); + } + + return await match(host, patternMatch, url.toString(), lang, obj) } catch (e) { return apiJSON(0, { t: loc(lang, 'ErrorSomethingWentWrong') }) } diff --git a/src/modules/config.js b/src/modules/config.js index 6fbe9d43..a0525ae8 100644 --- a/src/modules/config.js +++ b/src/modules/config.js @@ -1,8 +1,17 @@ +import UrlPattern from "url-pattern"; import { loadJSON } from "./sub/loadFromFs.js"; const config = loadJSON("./src/config.json"); const packageJson = loadJSON("./package.json"); const servicesConfigJson = loadJSON("./src/modules/processing/servicesConfig.json"); +Object.values(servicesConfigJson.config).forEach(service => { + service.patterns = service.patterns.map( + pattern => new UrlPattern(pattern, { + segmentValueCharset: UrlPattern.defaultOptions.segmentValueCharset + '\\.' + }) + ) +}) + export const services = servicesConfigJson.config, audioIgnore = servicesConfigJson.audioIgnore, diff --git a/src/modules/processing/hostOverrides.js b/src/modules/processing/hostOverrides.js index 88553e35..86d45add 100644 --- a/src/modules/processing/hostOverrides.js +++ b/src/modules/processing/hostOverrides.js @@ -1,48 +1,102 @@ -export default function (inHost, inURL) { - let host = String(inHost); - let url = String(inURL); +import { services } from "./config.js"; +import { strict as assert } from "node:assert"; +import psl from "psl"; - switch(host) { +export function aliasURL(url) { + assert(url instanceof URL); + + const host = psl.parse(url.hostname); + const parts = url.pathname.split('/'); + + switch (host.sld) { case "youtube": - if (url.startsWith("https://youtube.com/live/") || url.startsWith("https://www.youtube.com/live/")) { - url = url.split("?")[0].replace("www.", ""); - url = `https://youtube.com/watch?v=${url.replace("https://youtube.com/live/", "")}` - } - if (url.includes('youtube.com/shorts/')) { - url = url.split('?')[0].replace('shorts/', 'watch?v='); + if (url.pathname.startsWith('/live/') || url.pathname.startsWith('/shorts/')) { + url.pathname = '/watch'; + // ['', 'live' || 'shorts', id, ...rest] + url.search = `?v=${encodeURIComponent(parts[2])}` } break; case "youtu": - if (url.startsWith("https://youtu.be/")) { - host = "youtube"; - url = `https://youtube.com/watch?v=${url.replace("https://youtu.be/", "")}` + if (url.hostname === 'youtu.be' && parts.length === 2) { + /* youtu.be urls can be weird, e.g. https://youtu.be///asdasd// still works + ** but we only care about the 1st segment of the path */ + url = new URL(`https://youtube.com/watch?v=${ + encodeURIComponent(parts[1]) + }`) } break; + case "vxtwitter": case "x": - if (url.startsWith("https://x.com/")) { - host = "twitter"; - url = url.replace("https://x.com/", "https://twitter.com/") - } - if (url.startsWith("https://vxtwitter.com/")) { - host = "twitter"; - url = url.replace("https://vxtwitter.com/", "https://twitter.com/") + if (['x.com', 'vxtwitter.com'].includes(url.hostname)) { + url.hostname = 'twitter.com' } break; + case "tumblr": - if (!url.includes("blog/view")) { - if (url.slice(-1) === '/') url = url.slice(0, -1); - url = url.replace(url.split('/')[5], '') + if (!url.pathname.includes("/blog/view")) { + if (url.pathname.endsWith('/')) + url.pathname = url.pathname.slice(0, -1); + url.pathname = url.pathname.replace(parts[5], '') } break; + case "twitch": - if (url.includes('clips.twitch.tv')) { - url = url.split('?')[0].replace('clips.twitch.tv/', 'twitch.tv/_/clip/'); + if (url.hostname === 'clips.twitch.tv' && parts.length >= 2) { + url = new URL(`https://twitch.tv/_/clip/${parts[1]}`); } break; } - return { - host: host, - url: url - } + + return { url, host: host.sld } } + +export function cleanURL({ url, host }) { + assert(url instanceof URL); + let stripQuery = true; + + if (host === 'pinterest') { + url.hostname = 'pinterest.com' + } else if (host === 'vk' && url.pathname.includes('/clip')) { + if (url.searchParams.get('z')) + url.search = '?z=' + encodeURIComponent(url.searchParams.get('z')); + stripQuery = false; + } else if (host === 'youtube' && url.searchParams.get('v')) { + url.search = '?v=' + encodeURIComponent(url.searchParams.get('v')); + stripQuery = false; + } + + if (stripQuery) { + url.search = url.hash = '' + } + + if (url.pathname.endsWith('/')) + url.pathname = url.pathname.slice(0, -1); + + return url +} + +export function normalizeURL(url) { + return cleanURL( + aliasURL( + new URL(url.replace(/^https\/\//, 'https://')) + ) + ); +} + +export function hasValidHostname(url) { + const host = psl.parse(url.hostname); + if (host.error) return false; + + const service = services[host.sld]; + if (!service) return false; + + if ((service.tld ?? 'com') !== host.tld) return false; + + const anySubdomainAllowed = service.subdomains === '*'; + const validSubdomain = [null, 'www', ...(service.subdomains ?? [])].includes(host.subdomain); + if (!validSubdomain && !anySubdomainAllowed) + return false; + + return true; +} \ No newline at end of file diff --git a/src/modules/sub/utils.js b/src/modules/sub/utils.js index e165a68a..ef64d07b 100644 --- a/src/modules/sub/utils.js +++ b/src/modules/sub/utils.js @@ -52,29 +52,7 @@ export function metadataManager(obj) { for (let i in keys) { if (tags.includes(keys[i])) commands.push('-metadata', `${keys[i]}=${obj[keys[i]]}`) } return commands; } -export function cleanURL(url, host) { - switch (host) { - case "vk": - url = url.includes('clip') ? url.split('&')[0] : url.split('?')[0]; - break; - case "youtube": - url = url.split('&')[0]; - break; - case "tiktok": - url = url.replace(/@([a-zA-Z]+(\.[a-zA-Z]+)+)/, "@a") - case "pinterest": - url = url.replace(/:\/\/(?:www.)pinterest(?:\.[a-z.]+)/, "://pinterest.com") - default: - url = url.split('?')[0]; - if (url.substring(url.length - 1) === "/") url = url.substring(0, url.length - 1); - break; - } - for (let i in forbiddenChars) { - url = url.replaceAll(forbiddenChars[i], '') - } - url = url.replace('https//', 'https://') - return url.slice(0, 128) -} + export function cleanString(string) { for (let i in forbiddenCharsString) { string = string.replaceAll("/", "_").replaceAll(forbiddenCharsString[i], '') @@ -121,13 +99,9 @@ export function checkJSONPost(obj) { } } - if (def.dubLang) def.dubLang = verifyLanguageCode(obj.dubLang); - - obj["url"] = decodeURIComponent(String(obj["url"])); - let hostname = obj["url"].replace("https://", "").replace(' ', '').split('&')[0].split("/")[0].split("."), - host = hostname[hostname.length - 2]; - def["url"] = encodeURIComponent(cleanURL(obj["url"], host)); - + if (def.dubLang) + def.dubLang = verifyLanguageCode(obj.dubLang); + def.url = obj.url; return def } catch (e) { return false From 2e1eb1b864b071f976777105b758729c600f47d1 Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Tue, 12 Dec 2023 15:47:29 +0000 Subject: [PATCH 02/22] api: rename hostOverrides to 'url' it does a bit more than it did before now --- src/modules/processing/{hostOverrides.js => url.js} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/modules/processing/{hostOverrides.js => url.js} (100%) diff --git a/src/modules/processing/hostOverrides.js b/src/modules/processing/url.js similarity index 100% rename from src/modules/processing/hostOverrides.js rename to src/modules/processing/url.js From 149c16abbb7dbbdceaf3e1ed138608a5f55c435f Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Tue, 12 Dec 2023 23:19:01 +0000 Subject: [PATCH 03/22] url: make youtu.be alias rule more lax --- src/modules/processing/url.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/processing/url.js b/src/modules/processing/url.js index 86d45add..a58254a9 100644 --- a/src/modules/processing/url.js +++ b/src/modules/processing/url.js @@ -17,7 +17,7 @@ export function aliasURL(url) { } break; case "youtu": - if (url.hostname === 'youtu.be' && parts.length === 2) { + if (url.hostname === 'youtu.be' && parts.length >= 2) { /* youtu.be urls can be weird, e.g. https://youtu.be///asdasd// still works ** but we only care about the 1st segment of the path */ url = new URL(`https://youtube.com/watch?v=${ From f9feaa41ce77b14c3782ea64acc74361780f6853 Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Thu, 14 Dec 2023 22:43:36 +0000 Subject: [PATCH 04/22] tumblr: stricter subdomain parsing --- src/modules/processing/services/tumblr.js | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/modules/processing/services/tumblr.js b/src/modules/processing/services/tumblr.js index 7ae7336c..f978e5a4 100644 --- a/src/modules/processing/services/tumblr.js +++ b/src/modules/processing/services/tumblr.js @@ -1,9 +1,12 @@ +import psl from "psl"; import { genericUserAgent } from "../../config.js"; export default async function(obj) { - let html = await fetch(`https://${ - obj.user ? obj.user : obj.url.split('.')[0].replace('https://', '') - }.tumblr.com/post/${obj.id}`, { + const { subdomain } = psl.parse(obj.url); + if (subdomain?.includes('.')) + return { error: 'ErrorBrokenLink' } + + let html = await fetch(`https://${obj.user ?? subdomain}.tumblr.com/post/${obj.id}`, { headers: { "user-agent": genericUserAgent } }).then((r) => { return r.text() }).catch(() => { return false }); From c458423e03153bbb4d40cbb894128f8324429bd5 Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Thu, 14 Dec 2023 22:43:57 +0000 Subject: [PATCH 05/22] match: light cleanup --- src/modules/processing/match.js | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/modules/processing/match.js b/src/modules/processing/match.js index 86b6af82..096cf47f 100644 --- a/src/modules/processing/match.js +++ b/src/modules/processing/match.js @@ -37,7 +37,6 @@ export default async function(host, patternMatch, url, lang, obj) { break; case "vk": r = await vk({ - url: url, userId: patternMatch["userId"], videoId: patternMatch["videoId"], quality: obj.vQuality @@ -57,11 +56,13 @@ export default async function(host, patternMatch, url, lang, obj) { isAudioMuted: obj.isAudioMuted, dubLang: obj.dubLang } - if (url.match('music.youtube.com') || isAudioOnly === true) { + + if (new URL(url).hostname === 'music.youtube.com' || isAudioOnly === true) { fetchInfo.quality = "max"; fetchInfo.format = "vp9"; fetchInfo.isAudioOnly = true } + r = await youtube(fetchInfo); break; case "reddit": @@ -83,9 +84,9 @@ export default async function(host, patternMatch, url, lang, obj) { break; case "tumblr": r = await tumblr({ - id: patternMatch["id"], - url: url, - user: patternMatch["user"] || false + id: patternMatch.id, + user: patternMatch.user, + url }); break; case "vimeo": From 3056624b3d574c2ef8d2bcff7005cba46041c095 Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Thu, 14 Dec 2023 22:48:38 +0000 Subject: [PATCH 06/22] servicesConfig: set up subdomains --- src/modules/processing/servicesConfig.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/modules/processing/servicesConfig.json b/src/modules/processing/servicesConfig.json index 6425f0f7..e33bdab4 100644 --- a/src/modules/processing/servicesConfig.json +++ b/src/modules/processing/servicesConfig.json @@ -24,22 +24,26 @@ "youtube": { "alias": "youtube videos, shorts & music", "patterns": ["watch?v=:id", "embed/:id", "watch/:id"], + "subdomains": ["music"], "bestAudio": "opus", "enabled": true }, "tumblr": { "patterns": ["post/:id", "blog/view/:user/:id", ":user/:id", ":user/:id/:trackingId"], + "subdomains": "*", "enabled": true }, "tiktok": { "alias": "tiktok videos, photos & audio", "patterns": [":user/video/:postId", ":id", "t/:id"], + "subdomains": ["vt", "vm"], "audioFormats": ["best", "m4a", "mp3"], "enabled": true }, "douyin": { "alias": "douyin videos & audio", "patterns": ["video/:postId", ":id"], + "subdomains": ["v"], "enabled": false }, "vimeo": { @@ -49,6 +53,7 @@ }, "soundcloud": { "patterns": [":author/:song/s-:accessKey", ":author/:song", ":shortLink"], + "subdomains": ["on"], "bestAudio": "opus", "enabled": true }, From 662360509c4c55b3431db8bbd3c82f2f5a927ba2 Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Thu, 14 Dec 2023 22:51:24 +0000 Subject: [PATCH 07/22] url: return host instead of bool for success --- src/modules/api.js | 5 +++-- src/modules/processing/url.js | 14 ++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/modules/api.js b/src/modules/api.js index 19f657cc..eebbfe27 100644 --- a/src/modules/api.js +++ b/src/modules/api.js @@ -4,13 +4,14 @@ import { apiJSON } from "./sub/utils.js"; import { errorUnsupported } from "./sub/errors.js"; import loc from "../localization/manager.js"; import match from "./processing/match.js"; -import { hasValidHostname, normalizeURL } from "./processing/url.js"; +import { getHostIfValid, normalizeURL } from "./processing/url.js"; export async function getJSON(originalURL, lang, obj) { try { const url = normalizeURL(decodeURIComponent(originalURL)); + const host = getHostIfValid(url); - if (!hasValidHostname(url) || !services[host].enabled) { + if (!host || !services[host].enabled) { return apiJSON(0, { t: errorUnsupported(lang) }); } diff --git a/src/modules/processing/url.js b/src/modules/processing/url.js index a58254a9..75202f5f 100644 --- a/src/modules/processing/url.js +++ b/src/modules/processing/url.js @@ -84,19 +84,17 @@ export function normalizeURL(url) { ); } -export function hasValidHostname(url) { +export function getHostIfValid(url) { const host = psl.parse(url.hostname); - if (host.error) return false; + if (host.error) return; const service = services[host.sld]; - if (!service) return false; - - if ((service.tld ?? 'com') !== host.tld) return false; + if (!service) return; + if ((service.tld ?? 'com') !== host.tld) return; const anySubdomainAllowed = service.subdomains === '*'; const validSubdomain = [null, 'www', ...(service.subdomains ?? [])].includes(host.subdomain); - if (!validSubdomain && !anySubdomainAllowed) - return false; + if (!validSubdomain && !anySubdomainAllowed) return; - return true; + return host.sld; } \ No newline at end of file From 30c9652b6e8803509cc7d563113214bb583a793b Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Thu, 14 Dec 2023 23:03:41 +0000 Subject: [PATCH 08/22] url: typo --- src/modules/processing/url.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/processing/url.js b/src/modules/processing/url.js index 75202f5f..0420a465 100644 --- a/src/modules/processing/url.js +++ b/src/modules/processing/url.js @@ -1,4 +1,4 @@ -import { services } from "./config.js"; +import { services } from "../config.js"; import { strict as assert } from "node:assert"; import psl from "psl"; From 81e68c37f500218ce79793981678941ee97879b6 Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Thu, 14 Dec 2023 23:04:05 +0000 Subject: [PATCH 09/22] processing: pass URL object instead of string --- src/modules/api.js | 2 +- src/modules/processing/match.js | 8 ++++++-- src/modules/processing/services/soundcloud.js | 7 ++++--- src/modules/processing/services/tumblr.js | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/modules/api.js b/src/modules/api.js index eebbfe27..21132022 100644 --- a/src/modules/api.js +++ b/src/modules/api.js @@ -27,7 +27,7 @@ export async function getJSON(originalURL, lang, obj) { return apiJSON(0, { t: errorUnsupported(lang) }); } - return await match(host, patternMatch, url.toString(), lang, obj) + return await match(host, patternMatch, url, lang, obj) } catch (e) { return apiJSON(0, { t: loc(lang, 'ErrorSomethingWentWrong') }) } diff --git a/src/modules/processing/match.js b/src/modules/processing/match.js index 096cf47f..fd7e6ec9 100644 --- a/src/modules/processing/match.js +++ b/src/modules/processing/match.js @@ -1,3 +1,5 @@ +import { strict as assert } from "node:assert"; + import { apiJSON } from "../sub/utils.js"; import { errorUnsupported, genericError, brokenLink } from "../sub/errors.js"; @@ -23,6 +25,8 @@ import twitch from "./services/twitch.js"; import rutube from "./services/rutube.js"; export default async function(host, patternMatch, url, lang, obj) { + assert(url instanceof URL); + try { let r, isAudioOnly = !!obj.isAudioOnly, disableMetadata = !!obj.disableMetadata; @@ -57,7 +61,7 @@ export default async function(host, patternMatch, url, lang, obj) { dubLang: obj.dubLang } - if (new URL(url).hostname === 'music.youtube.com' || isAudioOnly === true) { + if (url.hostname === 'music.youtube.com' || isAudioOnly === true) { fetchInfo.quality = "max"; fetchInfo.format = "vp9"; fetchInfo.isAudioOnly = true @@ -100,7 +104,7 @@ export default async function(host, patternMatch, url, lang, obj) { case "soundcloud": isAudioOnly = true; r = await soundcloud({ - url: url, + url, author: patternMatch["author"], song: patternMatch["song"], shortLink: patternMatch["shortLink"] || false, diff --git a/src/modules/processing/services/soundcloud.js b/src/modules/processing/services/soundcloud.js index fcc6de02..b13c0440 100644 --- a/src/modules/processing/services/soundcloud.js +++ b/src/modules/processing/services/soundcloud.js @@ -39,17 +39,18 @@ export default async function(obj) { if (!clientId) return { error: 'ErrorSoundCloudNoClientId' }; let link; - if (obj.shortLink && !obj.author && !obj.song) { + if (obj.url.hostname === 'on.soundcloud.com' && obj.shortLink) { link = await fetch(`https://on.soundcloud.com/${obj.shortLink}/`, { redirect: "manual" }).then((r) => { if (r.status === 302 && r.headers.get("location").startsWith("https://soundcloud.com/")) { return r.headers.get("location").split('?', 1)[0] } - return false - }).catch(() => { return false }); + }).catch(() => {}); } + if (!link && obj.author && obj.song) { link = `https://soundcloud.com/${obj.author}/${obj.song}${obj.accessKey ? `/s-${obj.accessKey}` : ''}` } + if (!link) return { error: 'ErrorCouldntFetch' }; let json = await fetch(`https://api-v2.soundcloud.com/resolve?url=${link}&client_id=${clientId}`).then((r) => { diff --git a/src/modules/processing/services/tumblr.js b/src/modules/processing/services/tumblr.js index f978e5a4..f894e4e4 100644 --- a/src/modules/processing/services/tumblr.js +++ b/src/modules/processing/services/tumblr.js @@ -2,7 +2,7 @@ import psl from "psl"; import { genericUserAgent } from "../../config.js"; export default async function(obj) { - const { subdomain } = psl.parse(obj.url); + const { subdomain } = psl.parse(obj.url.hostname); if (subdomain?.includes('.')) return { error: 'ErrorBrokenLink' } From 34d8333d726db624fdb1fad21ab7a00527e7791e Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Thu, 14 Dec 2023 23:05:57 +0000 Subject: [PATCH 10/22] tumblr: render error template for broken links --- src/modules/processing/services/tumblr.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/processing/services/tumblr.js b/src/modules/processing/services/tumblr.js index f894e4e4..90eb45c2 100644 --- a/src/modules/processing/services/tumblr.js +++ b/src/modules/processing/services/tumblr.js @@ -4,7 +4,7 @@ import { genericUserAgent } from "../../config.js"; export default async function(obj) { const { subdomain } = psl.parse(obj.url.hostname); if (subdomain?.includes('.')) - return { error: 'ErrorBrokenLink' } + return { error: ['ErrorBrokenLink', 'tumblr'] } let html = await fetch(`https://${obj.user ?? subdomain}.tumblr.com/post/${obj.id}`, { headers: { "user-agent": genericUserAgent } From ba35ec923e87382e013f52c74848794d5640f330 Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Thu, 14 Dec 2023 23:14:22 +0000 Subject: [PATCH 11/22] url: re-parse hostname after validating --- src/modules/processing/url.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/modules/processing/url.js b/src/modules/processing/url.js index 0420a465..ab995b72 100644 --- a/src/modules/processing/url.js +++ b/src/modules/processing/url.js @@ -48,11 +48,12 @@ export function aliasURL(url) { break; } - return { url, host: host.sld } + return url } -export function cleanURL({ url, host }) { +export function cleanURL(url) { assert(url instanceof URL); + const host = psl.parse(url.hostname).sld; let stripQuery = true; if (host === 'pinterest') { From 0244c40d0b58213e94289d5a8ef7c02c44d2f329 Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Thu, 14 Dec 2023 23:23:21 +0000 Subject: [PATCH 12/22] config: add "@" to allowed pattern symbols needed for tiktok urls --- src/modules/config.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/config.js b/src/modules/config.js index a0525ae8..5e079536 100644 --- a/src/modules/config.js +++ b/src/modules/config.js @@ -7,7 +7,7 @@ const servicesConfigJson = loadJSON("./src/modules/processing/servicesConfig.jso Object.values(servicesConfigJson.config).forEach(service => { service.patterns = service.patterns.map( pattern => new UrlPattern(pattern, { - segmentValueCharset: UrlPattern.defaultOptions.segmentValueCharset + '\\.' + segmentValueCharset: UrlPattern.defaultOptions.segmentValueCharset + '@\\.' }) ) }) From 3a00bc7f8d971370bdaa30812b285185d46dd4df Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Thu, 14 Dec 2023 23:37:10 +0000 Subject: [PATCH 13/22] url: remove tumblr aliasing not quite sure what its purpose is/was anyways (tracking id removal? it's not used anyways) --- src/modules/processing/url.js | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/modules/processing/url.js b/src/modules/processing/url.js index ab995b72..a7f35f2d 100644 --- a/src/modules/processing/url.js +++ b/src/modules/processing/url.js @@ -12,7 +12,7 @@ export function aliasURL(url) { case "youtube": if (url.pathname.startsWith('/live/') || url.pathname.startsWith('/shorts/')) { url.pathname = '/watch'; - // ['', 'live' || 'shorts', id, ...rest] + // parts := ['', 'live' || 'shorts', id, ...rest] url.search = `?v=${encodeURIComponent(parts[2])}` } break; @@ -33,14 +33,6 @@ export function aliasURL(url) { } break; - case "tumblr": - if (!url.pathname.includes("/blog/view")) { - if (url.pathname.endsWith('/')) - url.pathname = url.pathname.slice(0, -1); - url.pathname = url.pathname.replace(parts[5], '') - } - break; - case "twitch": if (url.hostname === 'clips.twitch.tv' && parts.length >= 2) { url = new URL(`https://twitch.tv/_/clip/${parts[1]}`); From 818c236782993c89b810d26f502fcd3645318c95 Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Thu, 14 Dec 2023 23:42:53 +0000 Subject: [PATCH 14/22] package.json: lock psl version --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index ed639ba2..e839fe34 100644 --- a/package.json +++ b/package.json @@ -36,7 +36,7 @@ "hls-parser": "^0.10.7", "nanoid": "^4.0.2", "node-cache": "^5.1.2", - "psl": "^1.9.0", + "psl": "1.9.0", "set-cookie-parser": "2.6.0", "undici": "^5.19.1", "url-pattern": "1.0.3", From e1fa32beb3eb60696a9e525a298070dde28886db Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Thu, 14 Dec 2023 23:46:49 +0000 Subject: [PATCH 15/22] front: don't mangle and encode urls when sending to api --- src/front/cobalt.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/front/cobalt.js b/src/front/cobalt.js index 183716cb..b3d3b7a8 100644 --- a/src/front/cobalt.js +++ b/src/front/cobalt.js @@ -1,4 +1,4 @@ -const version = 39; +const version = 40; const ua = navigator.userAgent.toLowerCase(); const isIOS = ua.match("iphone os"); @@ -358,7 +358,7 @@ async function download(url) { eid("url-clear").style.display = "none"; eid("url-input-area").disabled = true; let req = { - url: encodeURIComponent(url.split("&")[0].split('%')[0]), + url, aFormat: sGet("aFormat").slice(0, 4), filenamePattern: sGet("filenamePattern"), dubLang: false From 18a3c06a9eb3b2e250ab51ca80390f55e90e38d7 Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Thu, 14 Dec 2023 23:57:00 +0000 Subject: [PATCH 16/22] url: always strip username, password, port, fragment --- src/modules/processing/url.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/modules/processing/url.js b/src/modules/processing/url.js index a7f35f2d..246d9620 100644 --- a/src/modules/processing/url.js +++ b/src/modules/processing/url.js @@ -60,9 +60,11 @@ export function cleanURL(url) { } if (stripQuery) { - url.search = url.hash = '' + url.search = '' } + url.username = url.password = url.port = url.hash = '' + if (url.pathname.endsWith('/')) url.pathname = url.pathname.slice(0, -1); From 5928b21feee9fa4a51debbdd1bf2760bff660096 Mon Sep 17 00:00:00 2001 From: dumbmoron Date: Mon, 18 Dec 2023 12:44:18 +0000 Subject: [PATCH 17/22] tumblr: fix priority of subdomain/segment for username --- src/modules/processing/services/tumblr.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/modules/processing/services/tumblr.js b/src/modules/processing/services/tumblr.js index 90eb45c2..08b3a3e2 100644 --- a/src/modules/processing/services/tumblr.js +++ b/src/modules/processing/services/tumblr.js @@ -2,11 +2,13 @@ import psl from "psl"; import { genericUserAgent } from "../../config.js"; export default async function(obj) { - const { subdomain } = psl.parse(obj.url.hostname); + let { subdomain } = psl.parse(obj.url.hostname); if (subdomain?.includes('.')) return { error: ['ErrorBrokenLink', 'tumblr'] } + else if (subdomain === 'www') + subdomain = undefined; - let html = await fetch(`https://${obj.user ?? subdomain}.tumblr.com/post/${obj.id}`, { + let html = await fetch(`https://${subdomain ?? obj.user}.tumblr.com/post/${obj.id}`, { headers: { "user-agent": genericUserAgent } }).then((r) => { return r.text() }).catch(() => { return false }); From aaa61cfee9b6f0c60ef87ebd7d3b5194cb9d054a Mon Sep 17 00:00:00 2001 From: wukko Date: Mon, 25 Dec 2023 18:04:52 +0600 Subject: [PATCH 18/22] processing url: alt domains for services and fixvx support --- src/modules/processing/servicesConfig.json | 1 + src/modules/processing/url.js | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/modules/processing/servicesConfig.json b/src/modules/processing/servicesConfig.json index e33bdab4..b2b260f3 100644 --- a/src/modules/processing/servicesConfig.json +++ b/src/modules/processing/servicesConfig.json @@ -13,6 +13,7 @@ }, "twitter": { "alias": "twitter videos & voice", + "altDomains": ["x.com", "vxtwitter.com", "fixvx.com"], "patterns": [":user/status/:id", ":user/status/:id/video/:v"], "enabled": true }, diff --git a/src/modules/processing/url.js b/src/modules/processing/url.js index 246d9620..bb602109 100644 --- a/src/modules/processing/url.js +++ b/src/modules/processing/url.js @@ -27,8 +27,9 @@ export function aliasURL(url) { break; case "vxtwitter": + case "fixvx": case "x": - if (['x.com', 'vxtwitter.com'].includes(url.hostname)) { + if (services.twitter.altDomains.includes(url.hostname)) { url.hostname = 'twitter.com' } break; From 509d24fd87a24871c4d235a0a5f8adceae019bc2 Mon Sep 17 00:00:00 2001 From: wukko Date: Mon, 25 Dec 2023 18:07:01 +0600 Subject: [PATCH 19/22] servicesConfig: add subdomains for twitter and youtube --- src/modules/processing/servicesConfig.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/modules/processing/servicesConfig.json b/src/modules/processing/servicesConfig.json index b2b260f3..9d296114 100644 --- a/src/modules/processing/servicesConfig.json +++ b/src/modules/processing/servicesConfig.json @@ -14,6 +14,7 @@ "twitter": { "alias": "twitter videos & voice", "altDomains": ["x.com", "vxtwitter.com", "fixvx.com"], + "subdomains": ["mobile", "www"], "patterns": [":user/status/:id", ":user/status/:id/video/:v"], "enabled": true }, @@ -25,7 +26,7 @@ "youtube": { "alias": "youtube videos, shorts & music", "patterns": ["watch?v=:id", "embed/:id", "watch/:id"], - "subdomains": ["music"], + "subdomains": ["music", "www", "m"], "bestAudio": "opus", "enabled": true }, From 88666eeeaa546c9a85661b61c3443d5b5fc3d2fd Mon Sep 17 00:00:00 2001 From: wukko Date: Mon, 25 Dec 2023 18:08:17 +0600 Subject: [PATCH 20/22] servicesConfig: www was unnecessary... --- src/modules/processing/servicesConfig.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/processing/servicesConfig.json b/src/modules/processing/servicesConfig.json index 9d296114..1ac431e6 100644 --- a/src/modules/processing/servicesConfig.json +++ b/src/modules/processing/servicesConfig.json @@ -26,7 +26,7 @@ "youtube": { "alias": "youtube videos, shorts & music", "patterns": ["watch?v=:id", "embed/:id", "watch/:id"], - "subdomains": ["music", "www", "m"], + "subdomains": ["music", "m"], "bestAudio": "opus", "enabled": true }, From d6e4b5ac20fb51186e819edddef916fcf53c7138 Mon Sep 17 00:00:00 2001 From: wukko Date: Mon, 25 Dec 2023 18:08:47 +0600 Subject: [PATCH 21/22] servicesConfig: www is unnecessary here too --- src/modules/processing/servicesConfig.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/processing/servicesConfig.json b/src/modules/processing/servicesConfig.json index 1ac431e6..46519cf2 100644 --- a/src/modules/processing/servicesConfig.json +++ b/src/modules/processing/servicesConfig.json @@ -14,7 +14,7 @@ "twitter": { "alias": "twitter videos & voice", "altDomains": ["x.com", "vxtwitter.com", "fixvx.com"], - "subdomains": ["mobile", "www"], + "subdomains": ["mobile"], "patterns": [":user/status/:id", ":user/status/:id/video/:v"], "enabled": true }, From ddc87ca42bb4cf1f7e07283897a679d0a3227057 Mon Sep 17 00:00:00 2001 From: wukko Date: Mon, 25 Dec 2023 18:20:51 +0600 Subject: [PATCH 22/22] api: clean url upon entry, not down the road --- src/core/api.js | 2 +- src/modules/api.js | 5 ++--- src/modules/processing/url.js | 2 +- src/modules/sub/utils.js | 4 +++- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/core/api.js b/src/core/api.js index 4e78fbb5..71ce8d3e 100644 --- a/src/core/api.js +++ b/src/core/api.js @@ -97,7 +97,7 @@ export function runAPI(express, app, gitCommit, gitBranch, __dirname) { let chck = checkJSONPost(request); if (!chck) throw new Error(); - j = await getJSON(chck["url"], lang, chck); + j = await getJSON(chck.url, lang, chck); } else { j = apiJSON(0, { t: !contentCon ? "invalid content type header" : loc(lang, 'ErrorNoLink') diff --git a/src/modules/api.js b/src/modules/api.js index 21132022..c3549bb3 100644 --- a/src/modules/api.js +++ b/src/modules/api.js @@ -4,11 +4,10 @@ import { apiJSON } from "./sub/utils.js"; import { errorUnsupported } from "./sub/errors.js"; import loc from "../localization/manager.js"; import match from "./processing/match.js"; -import { getHostIfValid, normalizeURL } from "./processing/url.js"; +import { getHostIfValid } from "./processing/url.js"; -export async function getJSON(originalURL, lang, obj) { +export async function getJSON(url, lang, obj) { try { - const url = normalizeURL(decodeURIComponent(originalURL)); const host = getHostIfValid(url); if (!host || !services[host].enabled) { diff --git a/src/modules/processing/url.js b/src/modules/processing/url.js index bb602109..2f1ac87f 100644 --- a/src/modules/processing/url.js +++ b/src/modules/processing/url.js @@ -93,4 +93,4 @@ export function getHostIfValid(url) { if (!validSubdomain && !anySubdomainAllowed) return; return host.sld; -} \ No newline at end of file +} diff --git a/src/modules/sub/utils.js b/src/modules/sub/utils.js index ef64d07b..28d37c6c 100644 --- a/src/modules/sub/utils.js +++ b/src/modules/sub/utils.js @@ -1,3 +1,4 @@ +import { normalizeURL } from "../processing/url.js"; import { createStream } from "../stream/manage.js"; const apiVar = { @@ -72,6 +73,7 @@ export function unicodeDecode(str) { } export function checkJSONPost(obj) { let def = { + url: normalizeURL(decodeURIComponent(obj.url)), vCodec: "h264", vQuality: "720", aFormat: "mp3", @@ -101,7 +103,7 @@ export function checkJSONPost(obj) { if (def.dubLang) def.dubLang = verifyLanguageCode(obj.dubLang); - def.url = obj.url; + return def } catch (e) { return false