Merge pull request #11648 from liamwhite/unicode-nonsense

gdbserver: use numeric character references for unicode
This commit is contained in:
liamwhite 2023-10-07 12:49:27 -04:00 committed by GitHub
commit 0e9b839b6f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 21 additions and 2 deletions

View file

@ -135,6 +135,11 @@ std::u16string UTF8ToUTF16(std::string_view input) {
return convert.from_bytes(input.data(), input.data() + input.size());
}
std::u32string UTF8ToUTF32(std::string_view input) {
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> convert;
return convert.from_bytes(input.data(), input.data() + input.size());
}
#ifdef _WIN32
static std::wstring CPToUTF16(u32 code_page, std::string_view input) {
const auto size =

View file

@ -38,6 +38,7 @@ bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _
[[nodiscard]] std::string UTF16ToUTF8(std::u16string_view input);
[[nodiscard]] std::u16string UTF8ToUTF16(std::string_view input);
[[nodiscard]] std::u32string UTF8ToUTF32(std::string_view input);
#ifdef _WIN32
[[nodiscard]] std::string UTF16ToUTF8(std::wstring_view input);

View file

@ -2,6 +2,8 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <atomic>
#include <codecvt>
#include <locale>
#include <numeric>
#include <optional>
#include <thread>
@ -12,6 +14,7 @@
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "common/string_util.h"
#include "core/arm/arm_interface.h"
#include "core/core.h"
#include "core/debugger/gdbstub.h"
@ -68,10 +71,16 @@ static std::string EscapeGDB(std::string_view data) {
}
static std::string EscapeXML(std::string_view data) {
std::u32string converted = U"[Encoding error]";
try {
converted = Common::UTF8ToUTF32(data);
} catch (std::range_error&) {
}
std::string escaped;
escaped.reserve(data.size());
for (char c : data) {
for (char32_t c : converted) {
switch (c) {
case '&':
escaped += "&amp;";
@ -86,7 +95,11 @@ static std::string EscapeXML(std::string_view data) {
escaped += "&gt;";
break;
default:
escaped += c;
if (c > 0x7f) {
escaped += fmt::format("&#{};", static_cast<u32>(c));
} else {
escaped += static_cast<char>(c);
}
break;
}
}