From a6a43a5ae047404ca0b03aa647ed5b17400ca7b6 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 20:28:30 +0100 Subject: [PATCH 1/7] macro_jit_x64: Remove RESULT_64 This Reg64 codepath has the exact same behaviour as the Reg32 one. --- src/video_core/macro/macro_jit_x64.cpp | 18 +++--------------- src/video_core/macro/macro_jit_x64.h | 1 - 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 11c1cc3be..9a9d50866 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -19,7 +19,6 @@ static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; static const Xbyak::Reg64 STATE = Xbyak::util::r11; static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; -static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13; static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14; static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; @@ -64,15 +63,15 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { const bool is_move_operation = !is_a_zero && is_b_zero; const bool has_zero_register = is_a_zero || is_b_zero; - Xbyak::Reg64 src_a; + Xbyak::Reg32 src_a; Xbyak::Reg32 src_b; if (!optimizer.zero_reg_skip) { - src_a = Compile_GetRegister(opcode.src_a, RESULT_64); + src_a = Compile_GetRegister(opcode.src_a, RESULT); src_b = Compile_GetRegister(opcode.src_b, ebx); } else { if (!is_a_zero) { - src_a = Compile_GetRegister(opcode.src_a, RESULT_64); + src_a = Compile_GetRegister(opcode.src_a, RESULT); } if (!is_b_zero) { src_b = Compile_GetRegister(opcode.src_b, ebx); @@ -553,17 +552,6 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { return dst; } -Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) { - if (index == 0) { - // Register 0 is always zero - xor_(dst, dst); - } else { - mov(dst, dword[REGISTERS + index * sizeof(u32)]); - } - - return dst; -} - void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) { Xbyak::Label zero{}, end{}; xor_(ecx, ecx); diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 21ee157cf..377368086 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -55,7 +55,6 @@ private: Xbyak::Reg32 Compile_FetchParameter(); Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); - Xbyak::Reg64 Compile_GetRegister(u32 index, Xbyak::Reg64 dst); void Compile_WriteCarry(Xbyak::Reg64 dst); void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); From 389549b80d7cd7054ec622f4038ff599386e1c04 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 20:51:33 +0100 Subject: [PATCH 2/7] macro_jit_x64: Remove METHOD_ADDRESS_64 Unnecessary variable. --- src/video_core/macro/macro_jit_x64.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 9a9d50866..1dcf9957c 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -20,7 +20,6 @@ static const Xbyak::Reg64 STATE = Xbyak::util::r11; static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; -static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14; static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ @@ -328,7 +327,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { and_(METHOD_ADDRESS, 0xfff); shr(ecx, 12); and_(ecx, 0x3f); - lea(eax, ptr[rcx + METHOD_ADDRESS_64]); + lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]); sal(ecx, 12); or_(eax, ecx); From 35db6e1c68f18f401bcae8bd8e8937648c7c67c6 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 20:55:02 +0100 Subject: [PATCH 3/7] macro_jit_x64: Remove JITState::parameters This can be passed in as an argument instead. --- src/video_core/macro/macro_jit_x64.cpp | 6 ++---- src/video_core/macro/macro_jit_x64.h | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 1dcf9957c..f1d123f51 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -51,8 +51,7 @@ void MacroJITx64Impl::Execute(const std::vector& parameters, u32 method) { JITState state{}; state.maxwell3d = &maxwell3d; state.registers = {}; - state.parameters = parameters.data(); - program(&state); + program(&state, parameters.data()); } void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { @@ -422,8 +421,7 @@ void MacroJITx64Impl::Compile() { Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); // JIT state mov(STATE, Common::X64::ABI_PARAM1); - mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 + - static_cast(offsetof(JITState, parameters))]); + mov(PARAMETERS, Common::X64::ABI_PARAM2); mov(REGISTERS, Common::X64::ABI_PARAM1); add(REGISTERS, static_cast(offsetof(JITState, registers))); xor_(RESULT, RESULT); diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 377368086..9167b2a93 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -66,11 +66,10 @@ private: struct JITState { Engines::Maxwell3D* maxwell3d{}; std::array registers{}; - const u32* parameters{}; u32 carry_flag{}; }; static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); - using ProgramType = void (*)(JITState*); + using ProgramType = void (*)(JITState*, const u32*); struct OptimizerState { bool can_skip_carry{}; From 79aa7b3aceeecadfb5b15bc25431db7768434f23 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 21:00:59 +0100 Subject: [PATCH 4/7] macro_jit_x64: Remove REGISTERS Unnecessary since this is just an offset from STATE. --- src/video_core/macro/macro_jit_x64.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index f1d123f51..da3b86d3d 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -15,7 +15,6 @@ MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255 namespace Tegra { static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; -static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; static const Xbyak::Reg64 STATE = Xbyak::util::r11; static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; @@ -24,7 +23,6 @@ static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ PARAMETERS, - REGISTERS, STATE, NEXT_PARAMETER, RESULT, @@ -422,14 +420,12 @@ void MacroJITx64Impl::Compile() { // JIT state mov(STATE, Common::X64::ABI_PARAM1); mov(PARAMETERS, Common::X64::ABI_PARAM2); - mov(REGISTERS, Common::X64::ABI_PARAM1); - add(REGISTERS, static_cast(offsetof(JITState, registers))); xor_(RESULT, RESULT); xor_(METHOD_ADDRESS, METHOD_ADDRESS); xor_(NEXT_PARAMETER, NEXT_PARAMETER); xor_(BRANCH_HOLDER, BRANCH_HOLDER); - mov(dword[REGISTERS + 4], Compile_FetchParameter()); + mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter()); // Track get register for zero registers and mark it as no-op optimizer.zero_reg_skip = true; @@ -543,7 +539,7 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { // Register 0 is always zero xor_(dst, dst); } else { - mov(dst, dword[REGISTERS + index * sizeof(u32)]); + mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]); } return dst; @@ -564,7 +560,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3 if (reg == 0) { return; } - mov(dword[REGISTERS + reg * sizeof(u32)], result); + mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); }; auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; From c09a9e5cc7f53280218cdfbfd7d7ff056f1c2ff5 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 21:12:53 +0100 Subject: [PATCH 5/7] macro_jit_x64: Select better registers All registers are now callee-save registers. RBX and RBP selected for STATE and RESULT because these are most commonly accessed; this is to avoid the REX prefix. RBP not used for STATE because there are some SIB restrictions, RBX emits smaller code. --- src/video_core/macro/macro_jit_x64.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index da3b86d3d..1e7b05ac9 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -14,18 +14,18 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); namespace Tegra { -static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; -static const Xbyak::Reg64 STATE = Xbyak::util::r11; -static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; -static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; +static const Xbyak::Reg64 STATE = Xbyak::util::rbx; +static const Xbyak::Reg32 RESULT = Xbyak::util::ebp; +static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; +static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r13; static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ - PARAMETERS, STATE, - NEXT_PARAMETER, RESULT, + PARAMETERS, + NEXT_PARAMETER, METHOD_ADDRESS, BRANCH_HOLDER, }); @@ -64,13 +64,13 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { if (!optimizer.zero_reg_skip) { src_a = Compile_GetRegister(opcode.src_a, RESULT); - src_b = Compile_GetRegister(opcode.src_b, ebx); + src_b = Compile_GetRegister(opcode.src_b, eax); } else { if (!is_a_zero) { src_a = Compile_GetRegister(opcode.src_a, RESULT); } if (!is_b_zero) { - src_b = Compile_GetRegister(opcode.src_b, ebx); + src_b = Compile_GetRegister(opcode.src_b, eax); } } Xbyak::Label skip_carry{}; From 1799f4e7743557c8e41c15201c42431f8d6d6dde Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 21:14:10 +0100 Subject: [PATCH 6/7] macro_jit_x64: Remove unused function Compile_WriteCarry --- src/video_core/macro/macro_jit_x64.cpp | 8 -------- src/video_core/macro/macro_jit_x64.h | 1 - 2 files changed, 9 deletions(-) diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 1e7b05ac9..b703daad9 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -545,14 +545,6 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { return dst; } -void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) { - Xbyak::Label zero{}, end{}; - xor_(ecx, ecx); - shr(dst, 32); - setne(cl); - mov(dword[STATE + offsetof(JITState, carry_flag)], ecx); -} - void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 9167b2a93..a05d8df15 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -55,7 +55,6 @@ private: Xbyak::Reg32 Compile_FetchParameter(); Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); - void Compile_WriteCarry(Xbyak::Reg64 dst); void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); void Compile_Send(Xbyak::Reg32 value); From cf0aad7d6a22024362c7adf04b605108141453f6 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 21:16:47 +0100 Subject: [PATCH 7/7] macro_jit_x64: Remove NEXT_PARAMETER Not required, as PARAMETERS can just be incremented directly. --- src/video_core/macro/macro_jit_x64.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index b703daad9..2eb98173d 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -17,7 +17,6 @@ namespace Tegra { static const Xbyak::Reg64 STATE = Xbyak::util::rbx; static const Xbyak::Reg32 RESULT = Xbyak::util::ebp; static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; -static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r13; static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; @@ -25,7 +24,6 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ STATE, RESULT, PARAMETERS, - NEXT_PARAMETER, METHOD_ADDRESS, BRANCH_HOLDER, }); @@ -422,7 +420,6 @@ void MacroJITx64Impl::Compile() { mov(PARAMETERS, Common::X64::ABI_PARAM2); xor_(RESULT, RESULT); xor_(METHOD_ADDRESS, METHOD_ADDRESS); - xor_(NEXT_PARAMETER, NEXT_PARAMETER); xor_(BRANCH_HOLDER, BRANCH_HOLDER); mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter()); @@ -529,8 +526,8 @@ bool MacroJITx64Impl::Compile_NextInstruction() { } Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { - mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]); - inc(NEXT_PARAMETER); + mov(eax, dword[PARAMETERS]); + add(PARAMETERS, sizeof(u32)); return eax; }