From 0b65e9335eaec6bef6423f6aa3be8d6b930657b9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 18 Jul 2019 08:17:19 -0400 Subject: [PATCH 1/6] Shader_Ir: Downgrade precision and rounding asserts to debug asserts. This commit reduces the sevirity of asserts for FP precision and rounding as this are well known and have little to no consequences in gpu's accuracy. --- src/video_core/shader/decode/arithmetic.cpp | 8 ++++---- .../shader/decode/arithmetic_half_immediate.cpp | 2 +- src/video_core/shader/decode/ffma.cpp | 4 ++-- src/video_core/shader/decode/half_set_predicate.cpp | 2 +- src/video_core/shader/decode/hfma2.cpp | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 87d8fecaa..05a5f19d2 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -42,10 +42,10 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case OpCode::Id::FMUL_R: case OpCode::Id::FMUL_IMM: { // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. - UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); - UNIMPLEMENTED_IF_MSG( - instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", + DEBUG_ASSERT_MSG(instr.fmul.tab5cb8_2 == 0, "FMUL tab5cb8_2({}) is not implemented", + instr.fmul.tab5cb8_2.Value()); + DEBUG_ASSERT_MSG( + instr.fmul.tab5c68_0 == 1, "FMUL tab5cb8_0({}) is not implemented", instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 7bcf38f23..60381b482 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -23,7 +23,7 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); } } else { - UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); + DEBUG_ASSERT(instr.alu_half_imm.precision == Tegra::Shader::HalfPrecision::None); } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index 29be25ca3..a39283a9c 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -18,9 +18,9 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); - UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", + DEBUG_ASSERT_MSG(instr.ffma.tab5980_0 == 1, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO - UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", + DEBUG_ASSERT_MSG(instr.ffma.tab5980_1 == 0, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); const Node op_a = GetRegister(instr.gpr8); diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index d59d15bd8..4587dbd00 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -18,7 +18,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); + DEBUG_ASSERT(instr.hsetp2.ftz == 0); Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index c3bcf1ae9..5b44cb79c 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -22,9 +22,9 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { - UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); + DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); } else { - UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); + DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); } constexpr auto identity = HalfType::H0_H1; From d3b71ff80d73ce83afb5e409b88cbb49f87b54bd Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 18 Jul 2019 08:20:31 -0400 Subject: [PATCH 2/6] Gl_Texture_Cache: Remove assert on component type in GetFormatTuple Textures can have different components types in different orders. This assert was completely inprecise and the effectiveness of such is better handled by case and within the texture cache. --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index b1f6bc7c2..6ecb02c45 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -137,7 +137,6 @@ constexpr std::array tex_format const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { ASSERT(static_cast(pixel_format) < tex_format_tuples.size()); const auto& format{tex_format_tuples[static_cast(pixel_format)]}; - ASSERT(component_type == format.component_type); return format; } From 3a3fee5abff816758ea5070d54ed8f6252dc451e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 18 Jul 2019 08:31:38 -0400 Subject: [PATCH 3/6] MaxwellDMA/KeplerCopy: Downgrade DMA log message to Trace. This log was just to know which games used DMA. It's no longer important. --- src/video_core/engines/maxwell_dma.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index afb9578d0..758c154cb 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -38,7 +38,7 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { } void MaxwellDMA::HandleCopy() { - LOG_WARNING(HW_GPU, "Requested a DMA copy"); + LOG_TRACE(HW_GPU, "Requested a DMA copy"); const GPUVAddr source = regs.src_address.Address(); const GPUVAddr dest = regs.dst_address.Address(); From 43f57d668c04c7dde05b076919ae5755db0ff0ac Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 18 Jul 2019 08:54:42 -0400 Subject: [PATCH 4/6] GPU: Add missing puller methods. This adds some missing puller methods. We don't assert them as these are nop operations for us. --- src/video_core/gpu.cpp | 20 +++++++------------- src/video_core/gpu.h | 9 ++++++++- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 1b4975498..6cb5fd4e1 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -143,12 +143,12 @@ enum class BufferMethods { NotifyIntr = 0x8, WrcacheFlush = 0x9, Unk28 = 0xA, - Unk2c = 0xB, + UnkCacheFlush = 0xB, RefCnt = 0x14, SemaphoreAcquire = 0x1A, SemaphoreRelease = 0x1B, - Unk70 = 0x1C, - Unk74 = 0x1D, + FenceValue = 0x1C, + FenceAction = 0x1D, Unk78 = 0x1E, Unk7c = 0x1F, Yield = 0x20, @@ -194,6 +194,10 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { case BufferMethods::SemaphoreAddressLow: case BufferMethods::SemaphoreSequence: case BufferMethods::RefCnt: + case BufferMethods::UnkCacheFlush: + case BufferMethods::WrcacheFlush: + case BufferMethods::FenceValue: + case BufferMethods::FenceAction: break; case BufferMethods::SemaphoreTrigger: { ProcessSemaphoreTriggerMethod(); @@ -204,21 +208,11 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); break; } - case BufferMethods::WrcacheFlush: { - // TODO(Kmather73): Research and implement this method. - LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented"); - break; - } case BufferMethods::Unk28: { // TODO(Kmather73): Research and implement this method. LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); break; } - case BufferMethods::Unk2c: { - // TODO(Kmather73): Research and implement this method. - LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented"); - break; - } case BufferMethods::SemaphoreAcquire: { ProcessSemaphoreAcquire(); break; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index fe6628923..5a8b1c74a 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -194,7 +194,12 @@ public: u32 semaphore_acquire; u32 semaphore_release; - INSERT_PADDING_WORDS(0xE4); + u32 fence_value; + union { + BitField<4, 4, u32> operation; + BitField<8, 8, u32> id; + } fence_action; + INSERT_PADDING_WORDS(0xE2); // Puller state u32 acquire_mode; @@ -274,6 +279,8 @@ ASSERT_REG_POSITION(semaphore_trigger, 0x7); ASSERT_REG_POSITION(reference_count, 0x14); ASSERT_REG_POSITION(semaphore_acquire, 0x1A); ASSERT_REG_POSITION(semaphore_release, 0x1B); +ASSERT_REG_POSITION(fence_value, 0x1C); +ASSERT_REG_POSITION(fence_action, 0x1D); ASSERT_REG_POSITION(acquire_mode, 0x100); ASSERT_REG_POSITION(acquire_source, 0x101); From 5a06e338598d3893bc587de303d3d25526180d14 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 18 Jul 2019 10:09:26 -0400 Subject: [PATCH 5/6] Shader_Ir: correct clang format --- src/video_core/shader/decode/ffma.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index a39283a9c..cb3a9cfc1 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -19,9 +19,9 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); DEBUG_ASSERT_MSG(instr.ffma.tab5980_0 == 1, "FFMA tab5980_0({}) not implemented", - instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO + instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO DEBUG_ASSERT_MSG(instr.ffma.tab5980_1 == 0, "FFMA tab5980_1({}) not implemented", - instr.ffma.tab5980_1.Value()); + instr.ffma.tab5980_1.Value()); const Node op_a = GetRegister(instr.gpr8); From 1158777737716536c0ac4c2eedb5e9b46d2dcfd8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Jul 2019 22:15:34 -0400 Subject: [PATCH 6/6] Shader_Ir: Change Debug Asserts for Log Warnings --- src/video_core/shader/decode/arithmetic.cpp | 13 ++++++++----- .../shader/decode/arithmetic_half_immediate.cpp | 4 +++- src/video_core/shader/decode/ffma.cpp | 10 ++++++---- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 05a5f19d2..1473c282a 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -42,11 +42,14 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case OpCode::Id::FMUL_R: case OpCode::Id::FMUL_IMM: { // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. - DEBUG_ASSERT_MSG(instr.fmul.tab5cb8_2 == 0, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); - DEBUG_ASSERT_MSG( - instr.fmul.tab5c68_0 == 1, "FMUL tab5cb8_0({}) is not implemented", - instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default + if (instr.fmul.tab5cb8_2 != 0) { + LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", + instr.fmul.tab5cb8_2.Value()); + } + if (instr.fmul.tab5c68_0 != 1) { + LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", + instr.fmul.tab5c68_0.Value()); + } op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 60381b482..6466fc011 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -23,7 +23,9 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); } } else { - DEBUG_ASSERT(instr.alu_half_imm.precision == Tegra::Shader::HalfPrecision::None); + if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) { + LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + } } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index cb3a9cfc1..ca2f39e8d 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -18,10 +18,12 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); - DEBUG_ASSERT_MSG(instr.ffma.tab5980_0 == 1, "FFMA tab5980_0({}) not implemented", - instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO - DEBUG_ASSERT_MSG(instr.ffma.tab5980_1 == 0, "FFMA tab5980_1({}) not implemented", - instr.ffma.tab5980_1.Value()); + if (instr.ffma.tab5980_0 != 1) { + LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); + } + if (instr.ffma.tab5980_1 != 0) { + LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); + } const Node op_a = GetRegister(instr.gpr8);