diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 841f27d7f..a710c4bc5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -11,6 +11,8 @@ add_library(video_core STATIC engines/maxwell_compute.h gpu.cpp gpu.h + macro_interpreter.cpp + macro_interpreter.h memory_manager.cpp memory_manager.h rasterizer_interface.h diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 5359d21a2..85255d0a5 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -386,5 +386,10 @@ std::vector Maxwell3D::GetStageTextures(Regs::ShaderSt return textures; } +u32 Maxwell3D::GetRegisterValue(u32 method) const { + ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); + return regs.reg_array[method]; +} + } // namespace Engines } // namespace Tegra diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 3066bc606..086ffeb6a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -514,6 +514,9 @@ public: State state{}; + /// Reads a register value located at the input method address + u32 GetRegisterValue(u32 method) const; + /// Write the value to the register identified by method. void WriteReg(u32 method, u32 value, u32 remaining_params); diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp new file mode 100644 index 000000000..993a67746 --- /dev/null +++ b/src/video_core/macro_interpreter.cpp @@ -0,0 +1,257 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/macro_interpreter.h" + +namespace Tegra { + +MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} + +void MacroInterpreter::Execute(const std::vector& code, std::vector parameters) { + Reset(); + registers[1] = parameters[0]; + this->parameters = std::move(parameters); + + // Execute the code until we hit an exit condition. + bool keep_executing = true; + while (keep_executing) { + keep_executing = Step(code, false); + } + + // Assert the the macro used all the input parameters + ASSERT(next_parameter_index == this->parameters.size()); +} + +void MacroInterpreter::Reset() { + registers = {}; + pc = 0; + delayed_pc = boost::none; + method_address.raw = 0; + parameters.clear(); + // The next parameter index starts at 1, because $r1 already has the value of the first + // parameter. + next_parameter_index = 1; +} + +bool MacroInterpreter::Step(const std::vector& code, bool is_delay_slot) { + u32 base_address = pc; + + Opcode opcode = GetOpcode(code); + pc += 4; + + // Update the program counter if we were delayed + if (delayed_pc != boost::none) { + ASSERT(is_delay_slot); + pc = *delayed_pc; + delayed_pc = boost::none; + } + + switch (opcode.operation) { + case Operation::ALU: { + u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), + GetRegister(opcode.src_b)); + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Operation::AddImmediate: { + ProcessResult(opcode.result_operation, opcode.dst, + GetRegister(opcode.src_a) + opcode.immediate); + break; + } + case Operation::ExtractInsert: { + u32 dst = GetRegister(opcode.src_a); + u32 src = GetRegister(opcode.src_b); + + src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask(); + dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); + dst |= src << opcode.bf_dst_bit; + ProcessResult(opcode.result_operation, opcode.dst, dst); + break; + } + case Operation::ExtractShiftLeftImmediate: { + u32 dst = GetRegister(opcode.src_a); + u32 src = GetRegister(opcode.src_b); + + u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit; + + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Operation::ExtractShiftLeftRegister: { + u32 dst = GetRegister(opcode.src_a); + u32 src = GetRegister(opcode.src_b); + + u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst; + + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Operation::Read: { + u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate); + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Operation::Branch: { + ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); + u32 value = GetRegister(opcode.src_a); + bool taken = EvaluateBranchCondition(opcode.branch_condition, value); + if (taken) { + // Ignore the delay slot if the branch has the annul bit. + if (opcode.branch_annul) { + pc = base_address + (opcode.immediate << 2); + return true; + } + + delayed_pc = base_address + (opcode.immediate << 2); + // Execute one more instruction due to the delay slot. + return Step(code, true); + } + break; + } + default: + UNIMPLEMENTED_MSG("Unimplemented macro operation %u", + static_cast(opcode.operation.Value())); + } + + if (opcode.is_exit) { + // Exit has a delay slot, execute the next instruction + // Note: Executing an exit during a branch delay slot will cause the instruction at the + // branch target to be executed before exiting. + Step(code, true); + return false; + } + + return true; +} + +MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector& code) const { + ASSERT((pc % sizeof(u32)) == 0); + ASSERT(pc < code.size() * sizeof(u32)); + return {code[pc / sizeof(u32)]}; +} + +u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const { + switch (operation) { + case ALUOperation::Add: + return src_a + src_b; + // TODO(Subv): Implement AddWithCarry + case ALUOperation::Subtract: + return src_a - src_b; + // TODO(Subv): Implement SubtractWithBorrow + case ALUOperation::Xor: + return src_a ^ src_b; + case ALUOperation::Or: + return src_a | src_b; + case ALUOperation::And: + return src_a & src_b; + case ALUOperation::AndNot: + return src_a & ~src_b; + case ALUOperation::Nand: + return ~(src_a & src_b); + + default: + UNIMPLEMENTED_MSG("Unimplemented ALU operation %u", static_cast(operation)); + } +} + +void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 result) { + switch (operation) { + case ResultOperation::IgnoreAndFetch: + // Fetch parameter and ignore result. + SetRegister(reg, FetchParameter()); + break; + case ResultOperation::Move: + // Move result. + SetRegister(reg, result); + break; + case ResultOperation::MoveAndSetMethod: + // Move result and use as Method Address. + SetRegister(reg, result); + SetMethodAddress(result); + break; + case ResultOperation::FetchAndSend: + // Fetch parameter and send result. + SetRegister(reg, FetchParameter()); + Send(result); + break; + case ResultOperation::MoveAndSend: + // Move and send result. + SetRegister(reg, result); + Send(result); + break; + case ResultOperation::FetchAndSetMethod: + // Fetch parameter and use result as Method Address. + SetRegister(reg, FetchParameter()); + SetMethodAddress(result); + break; + case ResultOperation::MoveAndSetMethodFetchAndSend: + // Move result and use as Method Address, then fetch and send parameter. + SetRegister(reg, result); + SetMethodAddress(result); + Send(FetchParameter()); + break; + case ResultOperation::MoveAndSetMethodSend: + // Move result and use as Method Address, then send bits 12:17 of result. + SetRegister(reg, result); + SetMethodAddress(result); + Send((result >> 12) & 0b111111); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented result operation %u", static_cast(operation)); + } +} + +u32 MacroInterpreter::FetchParameter() { + ASSERT(next_parameter_index < parameters.size()); + return parameters[next_parameter_index++]; +} + +u32 MacroInterpreter::GetRegister(u32 register_id) const { + // Register 0 is supposed to always return 0. + if (register_id == 0) + return 0; + + ASSERT(register_id < registers.size()); + return registers[register_id]; +} + +void MacroInterpreter::SetRegister(u32 register_id, u32 value) { + // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero + // register. + if (register_id == 0) + return; + + ASSERT(register_id < registers.size()); + registers[register_id] = value; +} + +void MacroInterpreter::SetMethodAddress(u32 address) { + method_address.raw = address; +} + +void MacroInterpreter::Send(u32 value) { + maxwell3d.WriteReg(method_address.address, value, 0); + // Increment the method address by the method increment. + method_address.address.Assign(method_address.address.Value() + + method_address.increment.Value()); +} + +u32 MacroInterpreter::Read(u32 method) const { + return maxwell3d.GetRegisterValue(method); +} + +bool MacroInterpreter::EvaluateBranchCondition(BranchCondition cond, u32 value) const { + switch (cond) { + case BranchCondition::Zero: + return value == 0; + case BranchCondition::NotZero: + return value != 0; + } + UNREACHABLE(); +} + +} // namespace Tegra diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h new file mode 100644 index 000000000..a71e359d8 --- /dev/null +++ b/src/video_core/macro_interpreter.h @@ -0,0 +1,164 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "common/bit_field.h" +#include "common/common_types.h" + +namespace Tegra { +namespace Engines { +class Maxwell3D; +} + +class MacroInterpreter final { +public: + explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d); + + /** + * Executes the macro code with the specified input parameters. + * @param code The macro byte code to execute + * @param parameters The parameters of the macro + */ + void Execute(const std::vector& code, std::vector parameters); + +private: + enum class Operation : u32 { + ALU = 0, + AddImmediate = 1, + ExtractInsert = 2, + ExtractShiftLeftImmediate = 3, + ExtractShiftLeftRegister = 4, + Read = 5, + Unused = 6, // This operation doesn't seem to be a valid encoding. + Branch = 7, + }; + + enum class ALUOperation : u32 { + Add = 0, + AddWithCarry = 1, + Subtract = 2, + SubtractWithBorrow = 3, + // Operations 4-7 don't seem to be valid encodings. + Xor = 8, + Or = 9, + And = 10, + AndNot = 11, + Nand = 12 + }; + + enum class ResultOperation : u32 { + IgnoreAndFetch = 0, + Move = 1, + MoveAndSetMethod = 2, + FetchAndSend = 3, + MoveAndSend = 4, + FetchAndSetMethod = 5, + MoveAndSetMethodFetchAndSend = 6, + MoveAndSetMethodSend = 7 + }; + + enum class BranchCondition : u32 { + Zero = 0, + NotZero = 1, + }; + + union Opcode { + u32 raw; + BitField<0, 3, Operation> operation; + BitField<4, 3, ResultOperation> result_operation; + BitField<4, 1, BranchCondition> branch_condition; + BitField<5, 1, u32> + branch_annul; // If set on a branch, then the branch doesn't have a delay slot. + BitField<7, 1, u32> is_exit; + BitField<8, 3, u32> dst; + BitField<11, 3, u32> src_a; + BitField<14, 3, u32> src_b; + // The signed immediate overlaps the second source operand and the alu operation. + BitField<14, 18, s32> immediate; + + BitField<17, 5, ALUOperation> alu_operation; + + // Bitfield instructions data + BitField<17, 5, u32> bf_src_bit; + BitField<22, 5, u32> bf_size; + BitField<27, 5, u32> bf_dst_bit; + + u32 GetBitfieldMask() const { + return (1 << bf_size) - 1; + } + }; + + union MethodAddress { + u32 raw; + BitField<0, 12, u32> address; + BitField<12, 6, u32> increment; + }; + + /// Resets the execution engine state, zeroing registers, etc. + void Reset(); + + /** + * Executes a single macro instruction located at the current program counter. Returns whether + * the interpreter should keep running. + * @param code The macro code to execute. + * @param is_delay_slot Whether the current step is being executed due to a delay slot in a + * previous instruction. + */ + bool Step(const std::vector& code, bool is_delay_slot); + + /// Calculates the result of an ALU operation. src_a OP src_b; + u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const; + + /// Performs the result operation on the input result and stores it in the specified register + /// (if necessary). + void ProcessResult(ResultOperation operation, u32 reg, u32 result); + + /// Evaluates the branch condition and returns whether the branch should be taken or not. + bool EvaluateBranchCondition(BranchCondition cond, u32 value) const; + + /// Reads an opcode at the current program counter location. + Opcode GetOpcode(const std::vector& code) const; + + /// Returns the specified register's value. Register 0 is hardcoded to always return 0. + u32 GetRegister(u32 register_id) const; + + /// Sets the register to the input value. + void SetRegister(u32 register_id, u32 value); + + /// Sets the method address to use for the next Send instruction. + void SetMethodAddress(u32 address); + + /// Calls a GPU Engine method with the input parameter. + void Send(u32 value); + + /// Reads a GPU register located at the method address. + u32 Read(u32 method) const; + + /// Returns the next parameter in the parameter queue. + u32 FetchParameter(); + + Engines::Maxwell3D& maxwell3d; + + u32 pc; ///< Current program counter + boost::optional + delayed_pc; ///< Program counter to execute at after the delay slot is executed. + + static constexpr size_t NumMacroRegisters = 8; + + /// General purpose macro registers. + std::array registers = {}; + + /// Method address to use for the next Send instruction. + MethodAddress method_address = {}; + + /// Input parameters of the current macro. + std::vector parameters; + /// Index of the next parameter that will be fetched by the 'parm' instruction. + u32 next_parameter_index = 0; +}; +} // namespace Tegra