diff --git a/package.json b/package.json index c4b03eb4..e839fe34 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,7 @@ "hls-parser": "^0.10.7", "nanoid": "^4.0.2", "node-cache": "^5.1.2", + "psl": "1.9.0", "set-cookie-parser": "2.6.0", "undici": "^5.19.1", "url-pattern": "1.0.3", diff --git a/src/core/api.js b/src/core/api.js index 4e78fbb5..71ce8d3e 100644 --- a/src/core/api.js +++ b/src/core/api.js @@ -97,7 +97,7 @@ export function runAPI(express, app, gitCommit, gitBranch, __dirname) { let chck = checkJSONPost(request); if (!chck) throw new Error(); - j = await getJSON(chck["url"], lang, chck); + j = await getJSON(chck.url, lang, chck); } else { j = apiJSON(0, { t: !contentCon ? "invalid content type header" : loc(lang, 'ErrorNoLink') diff --git a/src/front/cobalt.js b/src/front/cobalt.js index 183716cb..b3d3b7a8 100644 --- a/src/front/cobalt.js +++ b/src/front/cobalt.js @@ -1,4 +1,4 @@ -const version = 39; +const version = 40; const ua = navigator.userAgent.toLowerCase(); const isIOS = ua.match("iphone os"); @@ -358,7 +358,7 @@ async function download(url) { eid("url-clear").style.display = "none"; eid("url-input-area").disabled = true; let req = { - url: encodeURIComponent(url.split("&")[0].split('%')[0]), + url, aFormat: sGet("aFormat").slice(0, 4), filenamePattern: sGet("filenamePattern"), dubLang: false diff --git a/src/modules/api.js b/src/modules/api.js index 62e9a7c6..c3549bb3 100644 --- a/src/modules/api.js +++ b/src/modules/api.js @@ -1,33 +1,30 @@ -import UrlPattern from "url-pattern"; +import { services } from "./config.js"; -import { services as patterns } from "./config.js"; - -import { cleanURL, apiJSON } from "./sub/utils.js"; +import { apiJSON } from "./sub/utils.js"; import { errorUnsupported } from "./sub/errors.js"; import loc from "../localization/manager.js"; import match from "./processing/match.js"; -import hostOverrides from "./processing/hostOverrides.js"; +import { getHostIfValid } from "./processing/url.js"; -export async function getJSON(originalURL, lang, obj) { +export async function getJSON(url, lang, obj) { try { - let patternMatch, url = encodeURI(decodeURIComponent(originalURL)), - hostname = new URL(url).hostname.split('.'), - host = hostname[hostname.length - 2]; + const host = getHostIfValid(url); - if (!url.startsWith('https://')) return apiJSON(0, { t: errorUnsupported(lang) }); - - let overrides = hostOverrides(host, url); - host = overrides.host; - url = overrides.url; - - if (!(host && host.length < 20 && host in patterns && patterns[host]["enabled"])) return apiJSON(0, { t: errorUnsupported(lang) }); - - let pathToMatch = cleanURL(url, host).split(`.${patterns[host]['tld'] ? patterns[host]['tld'] : "com"}/`)[1].replace('.', ''); - for (let i in patterns[host]["patterns"]) { - patternMatch = new UrlPattern(patterns[host]["patterns"][i]).match(pathToMatch); - if (patternMatch) break + if (!host || !services[host].enabled) { + return apiJSON(0, { t: errorUnsupported(lang) }); + } + + let patternMatch; + for (const pattern of services[host].patterns) { + patternMatch = pattern.match( + url.pathname.substring(1) + url.search + ); + if (patternMatch) break; + } + + if (!patternMatch) { + return apiJSON(0, { t: errorUnsupported(lang) }); } - if (!patternMatch) return apiJSON(0, { t: errorUnsupported(lang) }); return await match(host, patternMatch, url, lang, obj) } catch (e) { diff --git a/src/modules/config.js b/src/modules/config.js index 6fbe9d43..5e079536 100644 --- a/src/modules/config.js +++ b/src/modules/config.js @@ -1,8 +1,17 @@ +import UrlPattern from "url-pattern"; import { loadJSON } from "./sub/loadFromFs.js"; const config = loadJSON("./src/config.json"); const packageJson = loadJSON("./package.json"); const servicesConfigJson = loadJSON("./src/modules/processing/servicesConfig.json"); +Object.values(servicesConfigJson.config).forEach(service => { + service.patterns = service.patterns.map( + pattern => new UrlPattern(pattern, { + segmentValueCharset: UrlPattern.defaultOptions.segmentValueCharset + '@\\.' + }) + ) +}) + export const services = servicesConfigJson.config, audioIgnore = servicesConfigJson.audioIgnore, diff --git a/src/modules/processing/hostOverrides.js b/src/modules/processing/hostOverrides.js deleted file mode 100644 index 88553e35..00000000 --- a/src/modules/processing/hostOverrides.js +++ /dev/null @@ -1,48 +0,0 @@ -export default function (inHost, inURL) { - let host = String(inHost); - let url = String(inURL); - - switch(host) { - case "youtube": - if (url.startsWith("https://youtube.com/live/") || url.startsWith("https://www.youtube.com/live/")) { - url = url.split("?")[0].replace("www.", ""); - url = `https://youtube.com/watch?v=${url.replace("https://youtube.com/live/", "")}` - } - if (url.includes('youtube.com/shorts/')) { - url = url.split('?')[0].replace('shorts/', 'watch?v='); - } - break; - case "youtu": - if (url.startsWith("https://youtu.be/")) { - host = "youtube"; - url = `https://youtube.com/watch?v=${url.replace("https://youtu.be/", "")}` - } - break; - case "vxtwitter": - case "x": - if (url.startsWith("https://x.com/")) { - host = "twitter"; - url = url.replace("https://x.com/", "https://twitter.com/") - } - if (url.startsWith("https://vxtwitter.com/")) { - host = "twitter"; - url = url.replace("https://vxtwitter.com/", "https://twitter.com/") - } - break; - case "tumblr": - if (!url.includes("blog/view")) { - if (url.slice(-1) === '/') url = url.slice(0, -1); - url = url.replace(url.split('/')[5], '') - } - break; - case "twitch": - if (url.includes('clips.twitch.tv')) { - url = url.split('?')[0].replace('clips.twitch.tv/', 'twitch.tv/_/clip/'); - } - break; - } - return { - host: host, - url: url - } -} diff --git a/src/modules/processing/match.js b/src/modules/processing/match.js index 86b6af82..fd7e6ec9 100644 --- a/src/modules/processing/match.js +++ b/src/modules/processing/match.js @@ -1,3 +1,5 @@ +import { strict as assert } from "node:assert"; + import { apiJSON } from "../sub/utils.js"; import { errorUnsupported, genericError, brokenLink } from "../sub/errors.js"; @@ -23,6 +25,8 @@ import twitch from "./services/twitch.js"; import rutube from "./services/rutube.js"; export default async function(host, patternMatch, url, lang, obj) { + assert(url instanceof URL); + try { let r, isAudioOnly = !!obj.isAudioOnly, disableMetadata = !!obj.disableMetadata; @@ -37,7 +41,6 @@ export default async function(host, patternMatch, url, lang, obj) { break; case "vk": r = await vk({ - url: url, userId: patternMatch["userId"], videoId: patternMatch["videoId"], quality: obj.vQuality @@ -57,11 +60,13 @@ export default async function(host, patternMatch, url, lang, obj) { isAudioMuted: obj.isAudioMuted, dubLang: obj.dubLang } - if (url.match('music.youtube.com') || isAudioOnly === true) { + + if (url.hostname === 'music.youtube.com' || isAudioOnly === true) { fetchInfo.quality = "max"; fetchInfo.format = "vp9"; fetchInfo.isAudioOnly = true } + r = await youtube(fetchInfo); break; case "reddit": @@ -83,9 +88,9 @@ export default async function(host, patternMatch, url, lang, obj) { break; case "tumblr": r = await tumblr({ - id: patternMatch["id"], - url: url, - user: patternMatch["user"] || false + id: patternMatch.id, + user: patternMatch.user, + url }); break; case "vimeo": @@ -99,7 +104,7 @@ export default async function(host, patternMatch, url, lang, obj) { case "soundcloud": isAudioOnly = true; r = await soundcloud({ - url: url, + url, author: patternMatch["author"], song: patternMatch["song"], shortLink: patternMatch["shortLink"] || false, diff --git a/src/modules/processing/services/soundcloud.js b/src/modules/processing/services/soundcloud.js index fcc6de02..b13c0440 100644 --- a/src/modules/processing/services/soundcloud.js +++ b/src/modules/processing/services/soundcloud.js @@ -39,17 +39,18 @@ export default async function(obj) { if (!clientId) return { error: 'ErrorSoundCloudNoClientId' }; let link; - if (obj.shortLink && !obj.author && !obj.song) { + if (obj.url.hostname === 'on.soundcloud.com' && obj.shortLink) { link = await fetch(`https://on.soundcloud.com/${obj.shortLink}/`, { redirect: "manual" }).then((r) => { if (r.status === 302 && r.headers.get("location").startsWith("https://soundcloud.com/")) { return r.headers.get("location").split('?', 1)[0] } - return false - }).catch(() => { return false }); + }).catch(() => {}); } + if (!link && obj.author && obj.song) { link = `https://soundcloud.com/${obj.author}/${obj.song}${obj.accessKey ? `/s-${obj.accessKey}` : ''}` } + if (!link) return { error: 'ErrorCouldntFetch' }; let json = await fetch(`https://api-v2.soundcloud.com/resolve?url=${link}&client_id=${clientId}`).then((r) => { diff --git a/src/modules/processing/services/tumblr.js b/src/modules/processing/services/tumblr.js index 7ae7336c..08b3a3e2 100644 --- a/src/modules/processing/services/tumblr.js +++ b/src/modules/processing/services/tumblr.js @@ -1,9 +1,14 @@ +import psl from "psl"; import { genericUserAgent } from "../../config.js"; export default async function(obj) { - let html = await fetch(`https://${ - obj.user ? obj.user : obj.url.split('.')[0].replace('https://', '') - }.tumblr.com/post/${obj.id}`, { + let { subdomain } = psl.parse(obj.url.hostname); + if (subdomain?.includes('.')) + return { error: ['ErrorBrokenLink', 'tumblr'] } + else if (subdomain === 'www') + subdomain = undefined; + + let html = await fetch(`https://${subdomain ?? obj.user}.tumblr.com/post/${obj.id}`, { headers: { "user-agent": genericUserAgent } }).then((r) => { return r.text() }).catch(() => { return false }); diff --git a/src/modules/processing/servicesConfig.json b/src/modules/processing/servicesConfig.json index 6425f0f7..46519cf2 100644 --- a/src/modules/processing/servicesConfig.json +++ b/src/modules/processing/servicesConfig.json @@ -13,6 +13,8 @@ }, "twitter": { "alias": "twitter videos & voice", + "altDomains": ["x.com", "vxtwitter.com", "fixvx.com"], + "subdomains": ["mobile"], "patterns": [":user/status/:id", ":user/status/:id/video/:v"], "enabled": true }, @@ -24,22 +26,26 @@ "youtube": { "alias": "youtube videos, shorts & music", "patterns": ["watch?v=:id", "embed/:id", "watch/:id"], + "subdomains": ["music", "m"], "bestAudio": "opus", "enabled": true }, "tumblr": { "patterns": ["post/:id", "blog/view/:user/:id", ":user/:id", ":user/:id/:trackingId"], + "subdomains": "*", "enabled": true }, "tiktok": { "alias": "tiktok videos, photos & audio", "patterns": [":user/video/:postId", ":id", "t/:id"], + "subdomains": ["vt", "vm"], "audioFormats": ["best", "m4a", "mp3"], "enabled": true }, "douyin": { "alias": "douyin videos & audio", "patterns": ["video/:postId", ":id"], + "subdomains": ["v"], "enabled": false }, "vimeo": { @@ -49,6 +55,7 @@ }, "soundcloud": { "patterns": [":author/:song/s-:accessKey", ":author/:song", ":shortLink"], + "subdomains": ["on"], "bestAudio": "opus", "enabled": true }, diff --git a/src/modules/processing/url.js b/src/modules/processing/url.js new file mode 100644 index 00000000..2f1ac87f --- /dev/null +++ b/src/modules/processing/url.js @@ -0,0 +1,96 @@ +import { services } from "../config.js"; +import { strict as assert } from "node:assert"; +import psl from "psl"; + +export function aliasURL(url) { + assert(url instanceof URL); + + const host = psl.parse(url.hostname); + const parts = url.pathname.split('/'); + + switch (host.sld) { + case "youtube": + if (url.pathname.startsWith('/live/') || url.pathname.startsWith('/shorts/')) { + url.pathname = '/watch'; + // parts := ['', 'live' || 'shorts', id, ...rest] + url.search = `?v=${encodeURIComponent(parts[2])}` + } + break; + case "youtu": + if (url.hostname === 'youtu.be' && parts.length >= 2) { + /* youtu.be urls can be weird, e.g. https://youtu.be///asdasd// still works + ** but we only care about the 1st segment of the path */ + url = new URL(`https://youtube.com/watch?v=${ + encodeURIComponent(parts[1]) + }`) + } + break; + + case "vxtwitter": + case "fixvx": + case "x": + if (services.twitter.altDomains.includes(url.hostname)) { + url.hostname = 'twitter.com' + } + break; + + case "twitch": + if (url.hostname === 'clips.twitch.tv' && parts.length >= 2) { + url = new URL(`https://twitch.tv/_/clip/${parts[1]}`); + } + break; + } + + return url +} + +export function cleanURL(url) { + assert(url instanceof URL); + const host = psl.parse(url.hostname).sld; + let stripQuery = true; + + if (host === 'pinterest') { + url.hostname = 'pinterest.com' + } else if (host === 'vk' && url.pathname.includes('/clip')) { + if (url.searchParams.get('z')) + url.search = '?z=' + encodeURIComponent(url.searchParams.get('z')); + stripQuery = false; + } else if (host === 'youtube' && url.searchParams.get('v')) { + url.search = '?v=' + encodeURIComponent(url.searchParams.get('v')); + stripQuery = false; + } + + if (stripQuery) { + url.search = '' + } + + url.username = url.password = url.port = url.hash = '' + + if (url.pathname.endsWith('/')) + url.pathname = url.pathname.slice(0, -1); + + return url +} + +export function normalizeURL(url) { + return cleanURL( + aliasURL( + new URL(url.replace(/^https\/\//, 'https://')) + ) + ); +} + +export function getHostIfValid(url) { + const host = psl.parse(url.hostname); + if (host.error) return; + + const service = services[host.sld]; + if (!service) return; + if ((service.tld ?? 'com') !== host.tld) return; + + const anySubdomainAllowed = service.subdomains === '*'; + const validSubdomain = [null, 'www', ...(service.subdomains ?? [])].includes(host.subdomain); + if (!validSubdomain && !anySubdomainAllowed) return; + + return host.sld; +} diff --git a/src/modules/sub/utils.js b/src/modules/sub/utils.js index e165a68a..28d37c6c 100644 --- a/src/modules/sub/utils.js +++ b/src/modules/sub/utils.js @@ -1,3 +1,4 @@ +import { normalizeURL } from "../processing/url.js"; import { createStream } from "../stream/manage.js"; const apiVar = { @@ -52,29 +53,7 @@ export function metadataManager(obj) { for (let i in keys) { if (tags.includes(keys[i])) commands.push('-metadata', `${keys[i]}=${obj[keys[i]]}`) } return commands; } -export function cleanURL(url, host) { - switch (host) { - case "vk": - url = url.includes('clip') ? url.split('&')[0] : url.split('?')[0]; - break; - case "youtube": - url = url.split('&')[0]; - break; - case "tiktok": - url = url.replace(/@([a-zA-Z]+(\.[a-zA-Z]+)+)/, "@a") - case "pinterest": - url = url.replace(/:\/\/(?:www.)pinterest(?:\.[a-z.]+)/, "://pinterest.com") - default: - url = url.split('?')[0]; - if (url.substring(url.length - 1) === "/") url = url.substring(0, url.length - 1); - break; - } - for (let i in forbiddenChars) { - url = url.replaceAll(forbiddenChars[i], '') - } - url = url.replace('https//', 'https://') - return url.slice(0, 128) -} + export function cleanString(string) { for (let i in forbiddenCharsString) { string = string.replaceAll("/", "_").replaceAll(forbiddenCharsString[i], '') @@ -94,6 +73,7 @@ export function unicodeDecode(str) { } export function checkJSONPost(obj) { let def = { + url: normalizeURL(decodeURIComponent(obj.url)), vCodec: "h264", vQuality: "720", aFormat: "mp3", @@ -121,12 +101,8 @@ export function checkJSONPost(obj) { } } - if (def.dubLang) def.dubLang = verifyLanguageCode(obj.dubLang); - - obj["url"] = decodeURIComponent(String(obj["url"])); - let hostname = obj["url"].replace("https://", "").replace(' ', '').split('&')[0].split("/")[0].split("."), - host = hostname[hostname.length - 2]; - def["url"] = encodeURIComponent(cleanURL(obj["url"], host)); + if (def.dubLang) + def.dubLang = verifyLanguageCode(obj.dubLang); return def } catch (e) {