From 7784ce18543384b552563035faf32e268bad5750 Mon Sep 17 00:00:00 2001 From: Subv Date: Sat, 18 Aug 2018 16:59:44 -0500 Subject: [PATCH] Shaders: Write all the enabled color outputs when a fragment shader exits. We were only writing to the first render target before. Note that this is only the GLSL side of the implementation, supporting multiple render targets requires more changes in the OpenGL renderer. Dual Source blending is not implemented and stuff that uses it might not work at all. --- .../renderer_opengl/gl_shader_decompiler.cpp | 49 +++++++++++++++++-- .../renderer_opengl/gl_shader_gen.cpp | 2 +- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index bb01b3c27..6cc223328 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -26,6 +26,7 @@ using Tegra::Shader::Sampler; using Tegra::Shader::SubOp; constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; +constexpr u32 PROGRAM_HEADER_SIZE = 0x50; class DecompileFail : public std::runtime_error { public: @@ -616,6 +617,23 @@ public: } private: + // Shader program header for a Fragment Shader. + struct FragmentHeader { + INSERT_PADDING_WORDS(5); + INSERT_PADDING_WORDS(13); + u32 enabled_color_outputs; + union { + BitField<0, 1, u32> writes_samplemask; + BitField<1, 1, u32> writes_depth; + }; + + bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { + u32 bit = render_target * 4 + component; + return enabled_color_outputs & (1 << bit); + } + }; + static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong"); + /// Gets the Subroutine object corresponding to the specified address. const Subroutine& GetSubroutine(u32 begin, u32 end) const { auto iter = subroutines.find(Subroutine{begin, end, suffix}); @@ -815,6 +833,31 @@ private: shader.AddLine('}'); } + /// Writes the output values from a fragment shader to the corresponding GLSL output variables. + void EmitFragmentOutputsWrite() { + ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); + FragmentHeader header; + std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE); + + ASSERT_MSG(header.writes_depth == 0, "Depth write is unimplemented"); + ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented"); + + // Write the color outputs using the data in the shader registers, disabled + // rendertargets/components are skipped in the register assignment. + u32 current_reg = 0; + for (u32 render_target = 0; render_target < Maxwell3D::Regs::NumRenderTargets; + ++render_target) { + // TODO(Subv): Figure out how dual-source blending is configured in the Switch. + for (u32 component = 0; component < 4; ++component) { + if (header.IsColorComponentOutputEnabled(render_target, component)) { + shader.AddLine(fmt::format("color[{}][{}] = {};", render_target, component, + regs.GetRegisterAsFloat(current_reg))); + ++current_reg; + } + } + } + } + /** * Compiles a single instruction from Tegra to GLSL. * @param offset the offset of the Tegra shader instruction. @@ -1779,12 +1822,8 @@ private: default: { switch (opcode->GetId()) { case OpCode::Id::EXIT: { - // Final color output is currently hardcoded to GPR0-3 for fragment shaders if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { - shader.AddLine("color.r = " + regs.GetRegisterAsFloat(0) + ';'); - shader.AddLine("color.g = " + regs.GetRegisterAsFloat(1) + ';'); - shader.AddLine("color.b = " + regs.GetRegisterAsFloat(2) + ';'); - shader.AddLine("color.a = " + regs.GetRegisterAsFloat(3) + ';'); + EmitFragmentOutputsWrite(); } switch (instr.flow.cond) { diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 57e0e1726..01c7b9720 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -87,7 +87,7 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo .get_value_or({}); out += R"( in vec4 position; -out vec4 color; +layout(location = 0) out vec4 color[8]; layout (std140) uniform fs_config { vec4 viewport_flip;