diff --git a/.gitmodules b/.gitmodules index bf3b80d59..2ec9dda62 100644 --- a/.gitmodules +++ b/.gitmodules @@ -28,3 +28,6 @@ [submodule "libzip"] path = externals/libzip/libzip url = https://github.com/nih-at/libzip.git +[submodule "xbyak"] + path = externals/xbyak + url = https://github.com/herumi/xbyak.git diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 0b40cd1b0..df7a5e0a9 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -75,3 +75,11 @@ if (ENABLE_WEB_SERVICE) target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT) target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto) endif() + +if (NOT TARGET xbyak) + if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64) + add_library(xbyak INTERFACE) + target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak) + target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES) + endif() +endif() diff --git a/externals/xbyak b/externals/xbyak new file mode 160000 index 000000000..82b70e665 --- /dev/null +++ b/externals/xbyak @@ -0,0 +1 @@ +Subproject commit 82b70e665918efc2ee348091742fd0237b3b68c5 diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 264dff546..24b7a083c 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -171,10 +171,12 @@ if(ARCHITECTURE_x86_64) PRIVATE x64/cpu_detect.cpp x64/cpu_detect.h + x64/xbyak_abi.h + x64/xbyak_util.h ) endif() create_target_directory_groups(common) target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) -target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd) +target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak) diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h new file mode 100644 index 000000000..794da8a52 --- /dev/null +++ b/src/common/x64/xbyak_abi.h @@ -0,0 +1,266 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "common/assert.h" + +namespace Common::X64 { + +inline int RegToIndex(const Xbyak::Reg& reg) { + using Kind = Xbyak::Reg::Kind; + ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, + "RegSet only support GPRs and XMM registers."); + ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15."); + return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); +} + +inline Xbyak::Reg64 IndexToReg64(int reg_index) { + ASSERT(reg_index < 16); + return Xbyak::Reg64(reg_index); +} + +inline Xbyak::Xmm IndexToXmm(int reg_index) { + ASSERT(reg_index >= 16 && reg_index < 32); + return Xbyak::Xmm(reg_index - 16); +} + +inline Xbyak::Reg IndexToReg(int reg_index) { + if (reg_index < 16) { + return IndexToReg64(reg_index); + } else { + return IndexToXmm(reg_index); + } +} + +inline std::bitset<32> BuildRegSet(std::initializer_list regs) { + std::bitset<32> bits; + for (const Xbyak::Reg& reg : regs) { + bits[RegToIndex(reg)] = true; + } + return bits; +} + +const std::bitset<32> ABI_ALL_GPRS(0x0000FFFF); +const std::bitset<32> ABI_ALL_XMMS(0xFFFF0000); + +#ifdef _WIN32 + +// Microsoft x64 ABI +const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; +const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx; +const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx; +const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8; +const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9; + +const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ + // GPRs + Xbyak::util::rcx, + Xbyak::util::rdx, + Xbyak::util::r8, + Xbyak::util::r9, + Xbyak::util::r10, + Xbyak::util::r11, + // XMMs + Xbyak::util::xmm0, + Xbyak::util::xmm1, + Xbyak::util::xmm2, + Xbyak::util::xmm3, + Xbyak::util::xmm4, + Xbyak::util::xmm5, +}); + +const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ + // GPRs + Xbyak::util::rbx, + Xbyak::util::rsi, + Xbyak::util::rdi, + Xbyak::util::rbp, + Xbyak::util::r12, + Xbyak::util::r13, + Xbyak::util::r14, + Xbyak::util::r15, + // XMMs + Xbyak::util::xmm6, + Xbyak::util::xmm7, + Xbyak::util::xmm8, + Xbyak::util::xmm9, + Xbyak::util::xmm10, + Xbyak::util::xmm11, + Xbyak::util::xmm12, + Xbyak::util::xmm13, + Xbyak::util::xmm14, + Xbyak::util::xmm15, +}); + +constexpr size_t ABI_SHADOW_SPACE = 0x20; + +#else + +// System V x86-64 ABI +const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; +const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi; +const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi; +const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx; +const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx; + +const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ + // GPRs + Xbyak::util::rcx, + Xbyak::util::rdx, + Xbyak::util::rdi, + Xbyak::util::rsi, + Xbyak::util::r8, + Xbyak::util::r9, + Xbyak::util::r10, + Xbyak::util::r11, + // XMMs + Xbyak::util::xmm0, + Xbyak::util::xmm1, + Xbyak::util::xmm2, + Xbyak::util::xmm3, + Xbyak::util::xmm4, + Xbyak::util::xmm5, + Xbyak::util::xmm6, + Xbyak::util::xmm7, + Xbyak::util::xmm8, + Xbyak::util::xmm9, + Xbyak::util::xmm10, + Xbyak::util::xmm11, + Xbyak::util::xmm12, + Xbyak::util::xmm13, + Xbyak::util::xmm14, + Xbyak::util::xmm15, +}); + +const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ + // GPRs + Xbyak::util::rbx, + Xbyak::util::rbp, + Xbyak::util::r12, + Xbyak::util::r13, + Xbyak::util::r14, + Xbyak::util::r15, +}); + +constexpr size_t ABI_SHADOW_SPACE = 0; + +#endif + +inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, + size_t needed_frame_size, s32* out_subtraction, + s32* out_xmm_offset) { + const auto count = (regs & ABI_ALL_GPRS).count(); + rsp_alignment -= count * 8; + size_t subtraction = 0; + const auto xmm_count = (regs & ABI_ALL_XMMS).count(); + if (xmm_count) { + // If we have any XMMs to save, we must align the stack here. + subtraction = rsp_alignment & 0xF; + } + subtraction += 0x10 * xmm_count; + size_t xmm_base_subtraction = subtraction; + subtraction += needed_frame_size; + subtraction += ABI_SHADOW_SPACE; + // Final alignment. + rsp_alignment -= subtraction; + subtraction += rsp_alignment & 0xF; + + *out_subtraction = (s32)subtraction; + *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); +} + +inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, + size_t rsp_alignment, size_t needed_frame_size = 0) { + s32 subtraction, xmm_offset; + ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + for (std::size_t i = 0; i < regs.size(); ++i) { + if (regs[i] && ABI_ALL_GPRS[i]) { + code.push(IndexToReg64(static_cast(i))); + } + } + if (subtraction != 0) { + code.sub(code.rsp, subtraction); + } + + for (int i = 0; i < regs.count(); i++) { + if (regs.test(i) & ABI_ALL_GPRS.test(i)) { + code.push(IndexToReg64(i)); + } + } + + for (std::size_t i = 0; i < regs.size(); ++i) { + if (regs[i] && ABI_ALL_XMMS[i]) { + code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast(i))); + xmm_offset += 0x10; + } + } + + return ABI_SHADOW_SPACE; +} + +inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, + size_t rsp_alignment, size_t needed_frame_size = 0) { + s32 subtraction, xmm_offset; + ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + + for (std::size_t i = 0; i < regs.size(); ++i) { + if (regs[i] && ABI_ALL_XMMS[i]) { + code.movaps(IndexToXmm(static_cast(i)), code.xword[code.rsp + xmm_offset]); + xmm_offset += 0x10; + } + } + + if (subtraction != 0) { + code.add(code.rsp, subtraction); + } + + // GPRs need to be popped in reverse order + for (int i = 15; i >= 0; i--) { + if (regs[i]) { + code.pop(IndexToReg64(i)); + } + } +} + +inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, + size_t rsp_alignment, + size_t needed_frame_size = 0) { + s32 subtraction, xmm_offset; + ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + + for (std::size_t i = 0; i < regs.size(); ++i) { + if (regs[i] && ABI_ALL_GPRS[i]) { + code.push(IndexToReg64(static_cast(i))); + } + } + + if (subtraction != 0) { + code.sub(code.rsp, subtraction); + } + + return ABI_SHADOW_SPACE; +} + +inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, + size_t rsp_alignment, size_t needed_frame_size = 0) { + s32 subtraction, xmm_offset; + ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + + if (subtraction != 0) { + code.add(code.rsp, subtraction); + } + + // GPRs need to be popped in reverse order + for (int i = 15; i >= 0; i--) { + if (regs[i]) { + code.pop(IndexToReg64(i)); + } + } +} + +} // namespace Common::X64 diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h new file mode 100644 index 000000000..df17f8cbe --- /dev/null +++ b/src/common/x64/xbyak_util.h @@ -0,0 +1,47 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/x64/xbyak_abi.h" + +namespace Common::X64 { + +// Constants for use with cmpps/cmpss +enum { + CMP_EQ = 0, + CMP_LT = 1, + CMP_LE = 2, + CMP_UNORD = 3, + CMP_NEQ = 4, + CMP_NLT = 5, + CMP_NLE = 6, + CMP_ORD = 7, +}; + +constexpr bool IsWithin2G(uintptr_t ref, uintptr_t target) { + const u64 distance = target - (ref + 5); + return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL); +} + +inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) { + return IsWithin2G(reinterpret_cast(code.getCurr()), target); +} + +template +inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) { + static_assert(std::is_pointer_v, "Argument must be a (function) pointer."); + size_t addr = reinterpret_cast(f); + if (IsWithin2G(code, addr)) { + code.call(f); + } else { + // ABI_RETURN is a safe temp register to use before a call + code.mov(ABI_RETURN, addr); + code.call(ABI_RETURN); + } +} + +} // namespace Common::X64 diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f00c71dae..d6ee82836 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -229,7 +229,7 @@ endif() create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad) +target_link_libraries(video_core PRIVATE glad xbyak) if (ENABLE_VULKAN) target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)