maxwell_3d: Restructure macro upload to use a single macro code memory.

- Fixes an issue where macros could be skipped.
- Fixes rendering of distant objects in Super Mario Odyssey.
This commit is contained in:
bunnei 2018-10-29 23:36:03 -04:00
parent d08457f879
commit de0ab806df
4 changed files with 55 additions and 27 deletions

View file

@ -43,15 +43,17 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
// Reset the current macro. // Reset the current macro.
executing_macro = 0; executing_macro = 0;
// The requested macro must have been uploaded already. // Lookup the macro offset
auto macro_code = uploaded_macros.find(method); const u32 entry{(method - MacroRegistersStart) >> 1};
if (macro_code == uploaded_macros.end()) { const auto& search{macro_offsets.find(entry)};
LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method); if (search == macro_offsets.end()) {
LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method);
UNREACHABLE();
return; return;
} }
// Execute the current macro. // Execute the current macro.
macro_interpreter.Execute(macro_code->second, std::move(parameters)); macro_interpreter.Execute(search->second, std::move(parameters));
} }
void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
@ -97,6 +99,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
ProcessMacroUpload(value); ProcessMacroUpload(value);
break; break;
} }
case MAXWELL3D_REG_INDEX(macros.bind): {
ProcessMacroBind(value);
break;
}
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
@ -158,9 +164,13 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
} }
void Maxwell3D::ProcessMacroUpload(u32 data) { void Maxwell3D::ProcessMacroUpload(u32 data) {
// Store the uploaded macro code to interpret them when they're called. ASSERT_MSG(regs.macros.upload_address < macro_memory.size(),
auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart]; "upload_address exceeded macro_memory size!");
macro.push_back(data); macro_memory[regs.macros.upload_address++] = data;
}
void Maxwell3D::ProcessMacroBind(u32 data) {
macro_offsets[regs.macros.entry] = data;
} }
void Maxwell3D::ProcessQueryGet() { void Maxwell3D::ProcessQueryGet() {

View file

@ -475,12 +475,13 @@ public:
INSERT_PADDING_WORDS(0x45); INSERT_PADDING_WORDS(0x45);
struct { struct {
INSERT_PADDING_WORDS(1); u32 upload_address;
u32 data; u32 data;
u32 entry; u32 entry;
u32 bind;
} macros; } macros;
INSERT_PADDING_WORDS(0x189); INSERT_PADDING_WORDS(0x188);
u32 tfb_enabled; u32 tfb_enabled;
@ -994,12 +995,25 @@ public:
/// Returns the texture information for a specific texture in a specific shader stage. /// Returns the texture information for a specific texture in a specific shader stage.
Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
/// we've seen used.
using MacroMemory = std::array<u32, 0x40000>;
/// Gets a reference to macro memory.
const MacroMemory& GetMacroMemory() const {
return macro_memory;
}
private: private:
void InitializeRegisterDefaults(); void InitializeRegisterDefaults();
VideoCore::RasterizerInterface& rasterizer; VideoCore::RasterizerInterface& rasterizer;
std::unordered_map<u32, std::vector<u32>> uploaded_macros; /// Start offsets of each macro in macro_memory
std::unordered_map<u32, u32> macro_offsets;
/// Memory for macro code
MacroMemory macro_memory;
/// Macro method that is currently being executed / being fed parameters. /// Macro method that is currently being executed / being fed parameters.
u32 executing_macro = 0; u32 executing_macro = 0;
@ -1022,9 +1036,12 @@ private:
*/ */
void CallMacroMethod(u32 method, std::vector<u32> parameters); void CallMacroMethod(u32 method, std::vector<u32> parameters);
/// Handles writes to the macro uploading registers. /// Handles writes to the macro uploading register.
void ProcessMacroUpload(u32 data); void ProcessMacroUpload(u32 data);
/// Handles writes to the macro bind register.
void ProcessMacroBind(u32 data);
/// Handles a write to the CLEAR_BUFFERS register. /// Handles a write to the CLEAR_BUFFERS register.
void ProcessClearBuffers(); void ProcessClearBuffers();

View file

@ -11,7 +11,7 @@ namespace Tegra {
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> parameters) { void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
Reset(); Reset();
registers[1] = parameters[0]; registers[1] = parameters[0];
this->parameters = std::move(parameters); this->parameters = std::move(parameters);
@ -19,7 +19,7 @@ void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> pa
// Execute the code until we hit an exit condition. // Execute the code until we hit an exit condition.
bool keep_executing = true; bool keep_executing = true;
while (keep_executing) { while (keep_executing) {
keep_executing = Step(code, false); keep_executing = Step(offset, false);
} }
// Assert the the macro used all the input parameters // Assert the the macro used all the input parameters
@ -37,10 +37,10 @@ void MacroInterpreter::Reset() {
next_parameter_index = 1; next_parameter_index = 1;
} }
bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) { bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
u32 base_address = pc; u32 base_address = pc;
Opcode opcode = GetOpcode(code); Opcode opcode = GetOpcode(offset);
pc += 4; pc += 4;
// Update the program counter if we were delayed // Update the program counter if we were delayed
@ -108,7 +108,7 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
delayed_pc = base_address + opcode.GetBranchTarget(); delayed_pc = base_address + opcode.GetBranchTarget();
// Execute one more instruction due to the delay slot. // Execute one more instruction due to the delay slot.
return Step(code, true); return Step(offset, true);
} }
break; break;
} }
@ -121,17 +121,18 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
// Exit has a delay slot, execute the next instruction // Exit has a delay slot, execute the next instruction
// Note: Executing an exit during a branch delay slot will cause the instruction at the // Note: Executing an exit during a branch delay slot will cause the instruction at the
// branch target to be executed before exiting. // branch target to be executed before exiting.
Step(code, true); Step(offset, true);
return false; return false;
} }
return true; return true;
} }
MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector<u32>& code) const { MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const {
const auto& macro_memory{maxwell3d.GetMacroMemory()};
ASSERT((pc % sizeof(u32)) == 0); ASSERT((pc % sizeof(u32)) == 0);
ASSERT(pc < code.size() * sizeof(u32)); ASSERT((pc + offset) < macro_memory.size() * sizeof(u32));
return {code[pc / sizeof(u32)]}; return {macro_memory[offset + pc / sizeof(u32)]};
} }
u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const { u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const {

View file

@ -22,10 +22,10 @@ public:
/** /**
* Executes the macro code with the specified input parameters. * Executes the macro code with the specified input parameters.
* @param code The macro byte code to execute * @param offset Offset to start execution at.
* @param parameters The parameters of the macro * @param parameters The parameters of the macro.
*/ */
void Execute(const std::vector<u32>& code, std::vector<u32> parameters); void Execute(u32 offset, std::vector<u32> parameters);
private: private:
enum class Operation : u32 { enum class Operation : u32 {
@ -110,11 +110,11 @@ private:
/** /**
* Executes a single macro instruction located at the current program counter. Returns whether * Executes a single macro instruction located at the current program counter. Returns whether
* the interpreter should keep running. * the interpreter should keep running.
* @param code The macro code to execute. * @param offset Offset to start execution at.
* @param is_delay_slot Whether the current step is being executed due to a delay slot in a * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
* previous instruction. * previous instruction.
*/ */
bool Step(const std::vector<u32>& code, bool is_delay_slot); bool Step(u32 offset, bool is_delay_slot);
/// Calculates the result of an ALU operation. src_a OP src_b; /// Calculates the result of an ALU operation. src_a OP src_b;
u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const; u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const;
@ -127,7 +127,7 @@ private:
bool EvaluateBranchCondition(BranchCondition cond, u32 value) const; bool EvaluateBranchCondition(BranchCondition cond, u32 value) const;
/// Reads an opcode at the current program counter location. /// Reads an opcode at the current program counter location.
Opcode GetOpcode(const std::vector<u32>& code) const; Opcode GetOpcode(u32 offset) const;
/// Returns the specified register's value. Register 0 is hardcoded to always return 0. /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
u32 GetRegister(u32 register_id) const; u32 GetRegister(u32 register_id) const;