diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 48fc1a9e1..d1f63a5eb 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -260,6 +260,9 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3 // Execute the current macro. macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters); + if (mme_draw.current_mode != MMMEDrawMode::Undefined) { + FlushMMEInlineDraw(); + } } void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { @@ -426,25 +429,37 @@ void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) { regs.reg_array[method] = method_call.argument; if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) || method == MAXWELL3D_REG_INDEX(index_array.count)) { - MMMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count) - ? MMMEDrawMode::Array - : MMMEDrawMode::Indexed; - u32 count = method_call.argument; + const MMMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count) + ? MMMEDrawMode::Array + : MMMEDrawMode::Indexed; + const u32 count = method_call.argument; while (true) { if (mme_draw.current_mode == MMMEDrawMode::Undefined) { - mme_draw.current_mode = expected_mode; - mme_draw.current_count = count; - mme_draw.instance_count = 1; + if (mme_draw.gl_begin_consume) { + mme_draw.current_mode = expected_mode; + mme_draw.current_count = count; + mme_draw.instance_count = 1; + mme_draw.gl_begin_consume = false; + mme_draw.gl_end_count = 0; + } break; } else { - if (mme_draw.current_mode == expected_mode && count == mme_draw.current_count) { + if (mme_draw.current_mode == expected_mode && count == mme_draw.current_count && + mme_draw.instance_mode && mme_draw.gl_begin_consume) { mme_draw.instance_count++; + mme_draw.gl_begin_consume = false; break; } else { FlushMMEInlineDraw(); } } } + } else if (method == MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)) { + mme_draw.instance_mode = + (regs.draw.instance_next != 0) || (regs.draw.instance_cont != 0); + mme_draw.gl_begin_consume = true; + } else { + mme_draw.gl_end_count++; } } else { if (mme_draw.current_mode != MMMEDrawMode::Undefined) { @@ -458,6 +473,7 @@ void Maxwell3D::FlushMMEInlineDraw() { LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast(regs.draw.topology.Value()), regs.vertex_buffer.count); ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); + ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); auto debug_context = system.GetGPUDebugContext(); @@ -488,6 +504,9 @@ void Maxwell3D::FlushMMEInlineDraw() { mme_draw.current_mode = MMMEDrawMode::Undefined; mme_draw.current_count = 0; mme_draw.instance_count = 0; + mme_draw.instance_mode = false; + mme_draw.gl_begin_consume = false; + mme_draw.gl_end_count = 0; } void Maxwell3D::ProcessMacroUpload(u32 data) { diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 1547d930e..8fd3ec85c 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1277,8 +1277,11 @@ public: struct MMEDrawState { MMMEDrawMode current_mode{MMMEDrawMode::Undefined}; - u32 current_count; - u32 instance_count; + u32 current_count{}; + u32 instance_count{}; + bool instance_mode{}; + bool gl_begin_consume{}; + u32 gl_end_count{}; } mme_draw; private: diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 5df7f3f56..f71a22738 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -788,13 +788,13 @@ void RasterizerOpenGL::DrawArrays() { DrawPrelude(); auto& maxwell3d = system.GPU().Maxwell3D(); - auto& regs = maxwell3d.regs; - auto current_instance = maxwell3d.state.current_instance; - auto primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); + const auto& regs = maxwell3d.regs; + const auto current_instance = maxwell3d.state.current_instance; + const auto primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); if (accelerate_draw == AccelDraw::Indexed) { - auto index_format = MaxwellToGL::IndexFormat(regs.index_array.format); - auto count = regs.index_array.count; - auto base_vertex = static_cast(regs.vb_element_base); + const auto index_format = MaxwellToGL::IndexFormat(regs.index_array.format); + const auto count = regs.index_array.count; + const auto base_vertex = static_cast(regs.vb_element_base); const auto index_buffer_ptr = reinterpret_cast(index_buffer_offset); if (current_instance > 0) { glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format, @@ -805,8 +805,8 @@ void RasterizerOpenGL::DrawArrays() { base_vertex); } } else { - auto count = regs.vertex_buffer.count; - auto vertex_first = regs.vertex_buffer.first; + const auto count = regs.vertex_buffer.count; + const auto vertex_first = regs.vertex_buffer.first; if (current_instance > 0) { glDrawArraysInstancedBaseInstance(primitive_mode, vertex_first, count, 1, current_instance); @@ -819,21 +819,19 @@ void RasterizerOpenGL::DrawArrays() { maxwell3d.dirty.memory_general = false; } -#pragma optimize("", off) - void RasterizerOpenGL::DrawMultiArrays() { DrawPrelude(); auto& maxwell3d = system.GPU().Maxwell3D(); - auto& regs = maxwell3d.regs; - auto& draw_setup = maxwell3d.mme_draw; - auto num_instances = draw_setup.instance_count; - auto base_instance = static_cast(regs.vb_base_instance); - auto primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); + const auto& regs = maxwell3d.regs; + const auto& draw_setup = maxwell3d.mme_draw; + const auto num_instances = draw_setup.instance_count; + const auto base_instance = static_cast(regs.vb_base_instance); + const auto primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); if (draw_setup.current_mode == Tegra::Engines::Maxwell3D::MMMEDrawMode::Indexed) { - auto index_format = MaxwellToGL::IndexFormat(regs.index_array.format); - auto count = regs.index_array.count; - auto base_vertex = static_cast(regs.vb_element_base); + const auto index_format = MaxwellToGL::IndexFormat(regs.index_array.format); + const auto count = regs.index_array.count; + const auto base_vertex = static_cast(regs.vb_element_base); const auto index_buffer_ptr = reinterpret_cast(index_buffer_offset); if (num_instances > 1) { glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format, @@ -844,8 +842,8 @@ void RasterizerOpenGL::DrawMultiArrays() { base_vertex); } } else { - auto count = regs.vertex_buffer.count; - auto vertex_first = regs.vertex_buffer.first; + const auto count = regs.vertex_buffer.count; + const auto vertex_first = regs.vertex_buffer.first; if (num_instances > 1) { glDrawArraysInstancedBaseInstance(primitive_mode, vertex_first, count, num_instances, base_instance); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 62e32697e..f7e86ab26 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -462,6 +462,14 @@ private: code.AddLine("float gl_PointSize;"); } + if (ir.UsesInstanceId()) { + code.AddLine("int gl_InstanceID;"); + } + + if (ir.UsesVertexId()) { + code.AddLine("int gl_VertexID;"); + } + --code.scope; code.AddLine("}};"); code.AddNewLine(); @@ -964,7 +972,7 @@ private: switch (element) { case 2: // Config pack's first value is instance_id. - return {"gl_InstanceID", Type::Uint}; + return {"gl_InstanceID", Type::Int}; case 3: return {"gl_VertexID", Type::Int}; } diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index bbbab0bca..2c357f310 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -114,6 +114,18 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff break; } } + if (index == Attribute::Index::TessCoordInstanceIDVertexID) { + switch (element) { + case 2: + uses_instance_id = true; + break; + case 3: + uses_vertex_id = true; + break; + default: + break; + } + } if (index == Attribute::Index::ClipDistances0123 || index == Attribute::Index::ClipDistances4567) { const auto clip_index = diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 6aed9bb84..2f03d83ba 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -124,6 +124,14 @@ public: return uses_point_size; } + bool UsesInstanceId() const { + return uses_instance_id; + } + + bool UsesVertexId() const { + return uses_vertex_id; + } + bool HasPhysicalAttributes() const { return uses_physical_attributes; } @@ -373,6 +381,8 @@ private: bool uses_viewport_index{}; bool uses_point_size{}; bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes + bool uses_instance_id{}; + bool uses_vertex_id{}; Tegra::Shader::Header header; };