From 5d31bab69a9cdc720347dfd69a9f5011b361e17a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 9 Jul 2019 18:02:03 -0400 Subject: [PATCH 1/7] Texture_Cache: Correct Linear Structural Match. --- src/video_core/texture_cache/surface_base.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 7a0fdb19b..6af9044ca 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -75,9 +75,12 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) // Linear Surface check if (!params.is_tiled) { - if (std::tie(params.width, params.height, params.pitch) == - std::tie(rhs.width, rhs.height, rhs.pitch)) { - return MatchStructureResult::FullMatch; + if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) { + if (params.width == rhs.width) { + return MatchStructureResult::FullMatch; + } else { + return MatchStructureResult::SemiMatch; + } } return MatchStructureResult::None; } From 5c1e1a148ee6be4f8c33264d210467da92702c6a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 9 Jul 2019 19:49:53 -0400 Subject: [PATCH 2/7] Gl_Texture_Cache: Measure Buffer Copy Times --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 08ae1a429..c6c76de08 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -31,6 +31,7 @@ using VideoCore::Surface::SurfaceType; MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", MP_RGB(128, 192, 128)); namespace { @@ -535,6 +536,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, } void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { + MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); const auto& src_params = src_surface->GetSurfaceParams(); const auto& dst_params = dst_surface->GetSurfaceParams(); UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); From a9943222f2afce6255b635091099925b3e451c8b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 9 Jul 2019 20:58:30 -0400 Subject: [PATCH 3/7] GL_State: Add a microprofile timer to OpenGL state. --- src/video_core/renderer_opengl/gl_state.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index d86e137ac..0eae98afe 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -6,8 +6,11 @@ #include #include "common/assert.h" #include "common/logging/log.h" +#include "common/microprofile.h" #include "video_core/renderer_opengl/gl_state.h" +MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128)); + namespace OpenGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -524,6 +527,7 @@ void OpenGLState::ApplySamplers() const { } void OpenGLState::Apply() const { + MICROPROFILE_SCOPE(OpenGL_State); ApplyFramebufferState(); ApplyVertexArrayState(); ApplyShaderProgram(); From 913b7a6872a67a0dd689bb19bc4ecfef7fb9cdcd Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 9 Jul 2019 21:27:27 -0400 Subject: [PATCH 4/7] GPU: Add a microprofile for macro interpreter --- src/video_core/macro_interpreter.cpp | 4 ++++ src/video_core/renderer_opengl/gl_texture_cache.cpp | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index c766ed692..9f59a2dc1 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -4,14 +4,18 @@ #include "common/assert.h" #include "common/logging/log.h" +#include "common/microprofile.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/macro_interpreter.h" +MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); + namespace Tegra { MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} void MacroInterpreter::Execute(u32 offset, std::vector parameters) { + MICROPROFILE_SCOPE(MacroInterp); Reset(); registers[1] = parameters[0]; this->parameters = std::move(parameters); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index c6c76de08..b1f6bc7c2 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -31,7 +31,8 @@ using VideoCore::Surface::SurfaceType; MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", + MP_RGB(128, 192, 128)); namespace { From 5818959e543041fdff8965e71e52d55a05ee22de Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 11 Jul 2019 15:15:21 -0400 Subject: [PATCH 5/7] Texture_Cache: Force Framebuffer reset if an active render target is unregistered. --- src/video_core/texture_cache/surface_base.h | 8 +++++- .../texture_cache/surface_params.cpp | 13 +++++++--- src/video_core/texture_cache/texture_cache.h | 25 ++++++++++++++----- 3 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 8ba386a8a..fb6378bc7 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -200,8 +200,9 @@ public: modification_tick = tick; } - void MarkAsRenderTarget(const bool is_target) { + void MarkAsRenderTarget(const bool is_target, const u32 index) { this->is_target = is_target; + this->index = index; } void MarkAsPicked(const bool is_picked) { @@ -221,6 +222,10 @@ public: return is_target; } + u32 GetRenderTarget() const { + return index; + } + bool IsRegistered() const { return is_registered; } @@ -311,6 +316,7 @@ private: bool is_target{}; bool is_registered{}; bool is_picked{}; + u32 index{0xFFFFFFFF}; u64 modification_tick{}; }; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 9c56e2b4f..df9260859 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -290,12 +290,19 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const { - const bool tiled{as_host_size ? false : is_tiled}; const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; const u32 depth{is_layered ? 1U : GetMipDepth(level)}; - return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, - GetMipBlockHeight(level), GetMipBlockDepth(level)); + if (is_tiled) { + return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height, depth, + GetMipBlockHeight(level), GetMipBlockDepth(level)); + } else { + if (as_host_size || IsBuffer()) { + return GetBytesPerPixel()*width*height*depth; + } else { + return pitch*height*depth; + } + } } bool SurfaceParams::operator==(const SurfaceParams& rhs) const { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c9e72531a..5e9812bb9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -133,11 +133,11 @@ public: regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); if (depth_buffer.target) - depth_buffer.target->MarkAsRenderTarget(false); + depth_buffer.target->MarkAsRenderTarget(false, -1); depth_buffer.target = surface_view.first; depth_buffer.view = surface_view.second; if (depth_buffer.target) - depth_buffer.target->MarkAsRenderTarget(true); + depth_buffer.target->MarkAsRenderTarget(true, 8); return surface_view.second; } @@ -167,11 +167,11 @@ public: auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents, true); if (render_targets[index].target) - render_targets[index].target->MarkAsRenderTarget(false); + render_targets[index].target->MarkAsRenderTarget(false, -1); render_targets[index].target = surface_view.first; render_targets[index].view = surface_view.second; if (render_targets[index].target) - render_targets[index].target->MarkAsRenderTarget(true); + render_targets[index].target->MarkAsRenderTarget(true, static_cast(index)); return surface_view.second; } @@ -191,7 +191,7 @@ public: if (depth_buffer.target == nullptr) { return; } - depth_buffer.target->MarkAsRenderTarget(false); + depth_buffer.target->MarkAsRenderTarget(false, -1); depth_buffer.target = nullptr; depth_buffer.view = nullptr; } @@ -200,7 +200,7 @@ public: if (render_targets[index].target == nullptr) { return; } - render_targets[index].target->MarkAsRenderTarget(false); + render_targets[index].target->MarkAsRenderTarget(false, -1); render_targets[index].target = nullptr; render_targets[index].view = nullptr; } @@ -270,6 +270,16 @@ protected: // and reading it from a sepparate buffer. virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; + void ManageRenderTargetUnregister(TSurface& surface) { + auto& maxwell3d = system.GPU().Maxwell3D(); + u32 index = surface->GetRenderTarget(); + if (index == 8) { + maxwell3d.dirty_flags.zeta_buffer = true; + } else { + maxwell3d.dirty_flags.color_buffer.set(index, true); + } + } + void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); @@ -294,6 +304,9 @@ protected: if (guard_render_targets && surface->IsProtected()) { return; } + if (!guard_render_targets && surface->IsRenderTarget()) { + ManageRenderTargetUnregister(surface); + } const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = surface->GetCacheAddr(); const std::size_t size = surface->GetSizeInBytes(); From 0f54b541f4877eda87ad968708fa38ce604c3a80 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 14 Jul 2019 08:41:06 -0400 Subject: [PATCH 6/7] Texture_Cache: Remove some unprecise fallback case and clang format --- src/video_core/texture_cache/surface_params.cpp | 9 +++++---- src/video_core/texture_cache/texture_cache.h | 9 --------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index df9260859..33c94daa8 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -294,13 +294,14 @@ std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; const u32 depth{is_layered ? 1U : GetMipDepth(level)}; if (is_tiled) { - return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height, depth, - GetMipBlockHeight(level), GetMipBlockDepth(level)); + return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height, + depth, GetMipBlockHeight(level), + GetMipBlockDepth(level)); } else { if (as_host_size || IsBuffer()) { - return GetBytesPerPixel()*width*height*depth; + return GetBytesPerPixel() * width * height * depth; } else { - return pitch*height*depth; + return pitch * height * depth; } } } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 5e9812bb9..6d3d2da7d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -662,15 +662,6 @@ private: } return {current_surface, *view}; } - // The next case is unsafe, so if we r in accurate GPU, just skip it - if (Settings::values.use_accurate_gpu_emulation) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); - } - // This is the case the texture is a part of the parent. - if (current_surface->MatchesSubTexture(params, gpu_addr)) { - return RebuildSurface(current_surface, params, is_render); - } } else { // If there are many overlaps, odds are they are subtextures of the candidate // surface. We try to construct a new surface based on the candidate parameters, From 2ac7472d3f94f1adb33c0a1d7748e922c515f6a8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 14 Jul 2019 17:42:39 -0400 Subject: [PATCH 7/7] Texture_Cache: Address Feedback --- src/video_core/texture_cache/surface_base.h | 4 +++- src/video_core/texture_cache/surface_params.cpp | 9 ++++----- src/video_core/texture_cache/texture_cache.h | 17 ++++++++++------- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index fb6378bc7..bcce8d863 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -312,11 +312,13 @@ private: return view; } + static constexpr u32 NO_RT = 0xFFFFFFFF; + bool is_modified{}; bool is_target{}; bool is_registered{}; bool is_picked{}; - u32 index{0xFFFFFFFF}; + u32 index{NO_RT}; u64 modification_tick{}; }; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 33c94daa8..fd5472451 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -297,12 +297,11 @@ std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height, depth, GetMipBlockHeight(level), GetMipBlockDepth(level)); + } else if (as_host_size || IsBuffer()) { + return GetBytesPerPixel() * width * height * depth; } else { - if (as_host_size || IsBuffer()) { - return GetBytesPerPixel() * width * height * depth; - } else { - return pitch * height * depth; - } + // Linear Texture Case + return pitch * height * depth; } } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 6d3d2da7d..7f9623c62 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -133,11 +133,11 @@ public: regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); if (depth_buffer.target) - depth_buffer.target->MarkAsRenderTarget(false, -1); + depth_buffer.target->MarkAsRenderTarget(false, NO_RT); depth_buffer.target = surface_view.first; depth_buffer.view = surface_view.second; if (depth_buffer.target) - depth_buffer.target->MarkAsRenderTarget(true, 8); + depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT); return surface_view.second; } @@ -167,7 +167,7 @@ public: auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents, true); if (render_targets[index].target) - render_targets[index].target->MarkAsRenderTarget(false, -1); + render_targets[index].target->MarkAsRenderTarget(false, NO_RT); render_targets[index].target = surface_view.first; render_targets[index].view = surface_view.second; if (render_targets[index].target) @@ -191,7 +191,7 @@ public: if (depth_buffer.target == nullptr) { return; } - depth_buffer.target->MarkAsRenderTarget(false, -1); + depth_buffer.target->MarkAsRenderTarget(false, NO_RT); depth_buffer.target = nullptr; depth_buffer.view = nullptr; } @@ -200,7 +200,7 @@ public: if (render_targets[index].target == nullptr) { return; } - render_targets[index].target->MarkAsRenderTarget(false, -1); + render_targets[index].target->MarkAsRenderTarget(false, NO_RT); render_targets[index].target = nullptr; render_targets[index].view = nullptr; } @@ -272,8 +272,8 @@ protected: void ManageRenderTargetUnregister(TSurface& surface) { auto& maxwell3d = system.GPU().Maxwell3D(); - u32 index = surface->GetRenderTarget(); - if (index == 8) { + const u32 index = surface->GetRenderTarget(); + if (index == DEPTH_RT) { maxwell3d.dirty_flags.zeta_buffer = true; } else { maxwell3d.dirty_flags.color_buffer.set(index, true); @@ -797,6 +797,9 @@ private: static constexpr u64 registry_page_size{1 << registry_page_bits}; std::unordered_map> registry; + static constexpr u32 DEPTH_RT = 8; + static constexpr u32 NO_RT = 0xFFFFFFFF; + // The L1 Cache is used for fast texture lookup before checking the overlaps // This avoids calculating size and other stuffs. std::unordered_map l1_cache;