diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index dbd106c53..3c32f1067 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -487,7 +487,12 @@ public: }; } rt_control; - INSERT_PADDING_WORDS(0x2B); + INSERT_PADDING_WORDS(0x2); + + u32 zeta_width; + u32 zeta_height; + + INSERT_PADDING_WORDS(0x27); u32 depth_test_enable; @@ -540,7 +545,11 @@ public: u32 vb_element_base; - INSERT_PADDING_WORDS(0x49); + INSERT_PADDING_WORDS(0x40); + + u32 zeta_enable; + + INSERT_PADDING_WORDS(0x8); struct { u32 tsc_address_high; @@ -865,6 +874,8 @@ ASSERT_REG_POSITION(clear_depth, 0x364); ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458); ASSERT_REG_POSITION(rt_control, 0x487); +ASSERT_REG_POSITION(zeta_width, 0x48a); +ASSERT_REG_POSITION(zeta_height, 0x48b); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); @@ -874,6 +885,7 @@ ASSERT_REG_POSITION(blend, 0x4CF); ASSERT_REG_POSITION(stencil, 0x4E0); ASSERT_REG_POSITION(screen_y_control, 0x4EB); ASSERT_REG_POSITION(vb_element_base, 0x50D); +ASSERT_REG_POSITION(zeta_enable, 0x54E); ASSERT_REG_POSITION(tsc, 0x557); ASSERT_REG_POSITION(tic, 0x55D); ASSERT_REG_POSITION(stencil_two_side, 0x565); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 65a2fd5e8..56d9c575b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -387,7 +387,7 @@ void RasterizerOpenGL::Clear() { } if (regs.clear_buffers.Z) { clear_mask |= GL_DEPTH_BUFFER_BIT; - use_depth_fb = true; + use_depth_fb = regs.zeta_enable != 0; // Always enable the depth write when clearing the depth buffer. The depth write mask is // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true. @@ -413,11 +413,13 @@ void RasterizerOpenGL::Clear() { glClear(clear_mask); // Mark framebuffer surfaces as dirty - if (dirty_color_surface != nullptr) { - res_cache.MarkSurfaceAsDirty(dirty_color_surface); - } - if (dirty_depth_surface != nullptr) { - res_cache.MarkSurfaceAsDirty(dirty_depth_surface); + if (Settings::values.use_accurate_framebuffers) { + if (dirty_color_surface != nullptr) { + res_cache.FlushSurface(dirty_color_surface); + } + if (dirty_depth_surface != nullptr) { + res_cache.FlushSurface(dirty_depth_surface); + } } } @@ -431,7 +433,7 @@ void RasterizerOpenGL::DrawArrays() { ScopeAcquireGLContext acquire_context; auto [dirty_color_surface, dirty_depth_surface] = - ConfigureFramebuffers(true, regs.zeta.Address() != 0); + ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0); SyncDepthTestState(); SyncBlendState(); @@ -520,11 +522,13 @@ void RasterizerOpenGL::DrawArrays() { state.Apply(); // Mark framebuffer surfaces as dirty - if (dirty_color_surface != nullptr) { - res_cache.MarkSurfaceAsDirty(dirty_color_surface); - } - if (dirty_depth_surface != nullptr) { - res_cache.MarkSurfaceAsDirty(dirty_depth_surface); + if (Settings::values.use_accurate_framebuffers) { + if (dirty_color_surface != nullptr) { + res_cache.FlushSurface(dirty_color_surface); + } + if (dirty_depth_surface != nullptr) { + res_cache.FlushSurface(dirty_depth_surface); + } } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 2c43982b0..28f0bc379 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -65,9 +65,9 @@ struct FormatTuple { return params; } -/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer( - const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config, Tegra::GPUVAddr zeta_address, - Tegra::DepthFormat format) { +/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, + Tegra::GPUVAddr zeta_address, + Tegra::DepthFormat format) { SurfaceParams params{}; params.addr = zeta_address; @@ -77,9 +77,9 @@ struct FormatTuple { params.component_type = ComponentTypeFromDepthFormat(format); params.type = GetFormatType(params.pixel_format); params.size_in_bytes = params.SizeInBytes(); - params.width = config.width; - params.height = config.height; - params.unaligned_height = config.height; + params.width = zeta_width; + params.height = zeta_height; + params.unaligned_height = zeta_height; params.size_in_bytes = params.SizeInBytes(); return params; } @@ -254,6 +254,60 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup cur_state.Apply(); } +static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rect, GLuint dst_tex, + const MathUtil::Rectangle& dst_rect, SurfaceType type, + GLuint read_fb_handle, GLuint draw_fb_handle) { + OpenGLState prev_state{OpenGLState::GetCurState()}; + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.draw.read_framebuffer = read_fb_handle; + state.draw.draw_framebuffer = draw_fb_handle; + state.Apply(); + + u32 buffers{}; + + if (type == SurfaceType::ColorTexture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, + 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, + 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + buffers = GL_COLOR_BUFFER_BIT; + } else if (type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + buffers = GL_DEPTH_BUFFER_BIT; + } else if (type == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + src_tex, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + dst_tex, 0); + + buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + } + + glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left, + dst_rect.bottom, dst_rect.right, dst_rect.top, buffers, + buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); + + return true; +} + CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) { texture.Create(); const auto& rect{params.GetRect()}; @@ -519,8 +573,8 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( } if (using_depth_fb) { - depth_params = - SurfaceParams::CreateForDepthBuffer(regs.rt[0], regs.zeta.Address(), regs.zeta.format); + depth_params = SurfaceParams::CreateForDepthBuffer(regs.zeta_width, regs.zeta_height, + regs.zeta.Address(), regs.zeta.format); } MathUtil::Rectangle color_rect{}; @@ -565,17 +619,9 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); } -void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) { - if (Settings::values.use_accurate_framebuffers) { - // If enabled, always flush dirty surfaces - surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); - surface->FlushGLBuffer(); - } else { - // Otherwise, don't mark surfaces that we write to as cached, because the resulting loads - // and flushes are very slow and do not seem to improve accuracy - const auto& params{surface->GetSurfaceParams()}; - Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false); - } +void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) { + surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); + surface->FlushGLBuffer(); } Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { @@ -588,25 +634,53 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { if (gpu.memory_manager->GpuToCpuAddress(params.addr) == boost::none) return {}; - // Check for an exact match in existing surfaces - const auto& surface_key{SurfaceKey::Create(params)}; - const auto& search{surface_cache.find(surface_key)}; + // Look up surface in the cache based on address + const auto& search{surface_cache.find(params.addr)}; Surface surface; if (search != surface_cache.end()) { surface = search->second; if (Settings::values.use_accurate_framebuffers) { - // Reload the surface from Switch memory - LoadSurface(surface); + // If use_accurate_framebuffers is enabled, always load from memory + FlushSurface(surface); + UnregisterSurface(surface); + } else if (surface->GetSurfaceParams() != params) { + // If surface parameters changed, recreate the surface from the old one + return RecreateSurface(surface, params); + } else { + // Use the cached surface as-is + return surface; } - } else { - surface = std::make_shared(params); - RegisterSurface(surface); - LoadSurface(surface); } + // No surface found - create a new one + surface = std::make_shared(params); + RegisterSurface(surface); + LoadSurface(surface); + return surface; } +Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, + const SurfaceParams& new_params) { + // Verify surface is compatible for blitting + const auto& params{surface->GetSurfaceParams()}; + ASSERT(params.type == new_params.type); + ASSERT(params.pixel_format == new_params.pixel_format); + ASSERT(params.component_type == new_params.component_type); + + // Create a new surface with the new parameters, and blit the previous surface to it + Surface new_surface{std::make_shared(new_params)}; + BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle, + new_surface->GetSurfaceParams().GetRect(), params.type, read_framebuffer.handle, + draw_framebuffer.handle); + + // Update cache accordingly + UnregisterSurface(surface); + RegisterSurface(new_surface); + + return new_surface; +} + Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const { // Tries to find the GPU address of a framebuffer based on the CPU address. This is because // final output framebuffers are specified by CPU address, but internally our GPU cache uses @@ -652,22 +726,20 @@ void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { const auto& params{surface->GetSurfaceParams()}; - const auto& surface_key{SurfaceKey::Create(params)}; - const auto& search{surface_cache.find(surface_key)}; + const auto& search{surface_cache.find(params.addr)}; if (search != surface_cache.end()) { // Registered already return; } - surface_cache[surface_key] = surface; + surface_cache[params.addr] = surface; UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1); } void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { const auto& params{surface->GetSurfaceParams()}; - const auto& surface_key{SurfaceKey::Create(params)}; - const auto& search{surface_cache.find(surface_key)}; + const auto& search{surface_cache.find(params.addr)}; if (search == surface_cache.end()) { // Unregistered already diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 718c45ce1..b084c4db4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -10,7 +10,6 @@ #include #include #include "common/common_types.h" -#include "common/hash.h" #include "common/math_util.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -137,6 +136,7 @@ struct SurfaceParams { ASSERT(static_cast(format) < bpp_table.size()); return bpp_table[static_cast(format)]; } + u32 GetFormatBpp() const { return GetFormatBpp(pixel_format); } @@ -365,9 +365,21 @@ struct SurfaceParams { const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config); /// Creates SurfaceParams for a depth buffer configuration - static SurfaceParams CreateForDepthBuffer( - const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config, - Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format); + static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, + Tegra::GPUVAddr zeta_address, + Tegra::DepthFormat format); + + bool operator==(const SurfaceParams& other) const { + return std::tie(addr, is_tiled, block_height, pixel_format, component_type, type, width, + height, unaligned_height, size_in_bytes) == + std::tie(other.addr, other.is_tiled, other.block_height, other.pixel_format, + other.component_type, other.type, other.width, other.height, + other.unaligned_height, other.size_in_bytes); + } + + bool operator!=(const SurfaceParams& other) const { + return !operator==(other); + } Tegra::GPUVAddr addr; bool is_tiled; @@ -381,24 +393,6 @@ struct SurfaceParams { size_t size_in_bytes; }; -/// Hashable variation of SurfaceParams, used for a key in the surface cache -struct SurfaceKey : Common::HashableStruct { - static SurfaceKey Create(const SurfaceParams& params) { - SurfaceKey res; - res.state = params; - return res; - } -}; - -namespace std { -template <> -struct hash { - size_t operator()(const SurfaceKey& k) const { - return k.Hash(); - } -}; -} // namespace std - class CachedSurface final { public: CachedSurface(const SurfaceParams& params); @@ -444,8 +438,8 @@ public: SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle& viewport); - /// Marks the specified surface as "dirty", in that it is out of sync with Switch memory - void MarkSurfaceAsDirty(const Surface& surface); + /// Flushes the surface to Switch memory + void FlushSurface(const Surface& surface); /// Tries to find a framebuffer GPU address based on the provided CPU address Surface TryFindFramebufferSurface(VAddr cpu_addr) const; @@ -460,6 +454,9 @@ private: void LoadSurface(const Surface& surface); Surface GetSurface(const SurfaceParams& params); + /// Recreates a surface with new parameters + Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params); + /// Register surface into the cache void RegisterSurface(const Surface& surface); @@ -469,7 +466,7 @@ private: /// Increase/decrease the number of surface in pages touching the specified region void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); - std::unordered_map surface_cache; + std::unordered_map surface_cache; PageMap cached_pages; OGLFramebuffer read_framebuffer;