diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index bcf0c15a4..a7bcf26fb 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h @@ -129,6 +129,15 @@ protected: return ++modified_ticks; } + /// Flushes the specified object, updating appropriate cache state as needed + void FlushObject(const T& object) { + if (!object->IsDirty()) { + return; + } + object->Flush(); + object->MarkAsModified(false, *this); + } + private: /// Returns a list of cached objects from the specified memory region, ordered by access time std::vector GetSortedObjectsFromRegion(VAddr addr, u64 size) { @@ -154,15 +163,6 @@ private: return objects; } - /// Flushes the specified object, updating appropriate cache state as needed - void FlushObject(const T& object) { - if (!object->IsDirty()) { - return; - } - object->Flush(); - object->MarkAsModified(false, *this); - } - using ObjectSet = std::set; using ObjectCache = std::unordered_map; using IntervalCache = boost::icl::interval_map; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 523ed10fa..c8c1d6911 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -738,9 +738,13 @@ void RasterizerOpenGL::DrawArrays() { shader_program_manager->ApplyTo(state); state.Apply(); + res_cache.SignalPreDrawCall(); + // Execute draw call params.DispatchDraw(); + res_cache.SignalPostDrawCall(); + // Disable scissor test state.viewports[0].scissor.enabled = false; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 81b6099f9..5fdf1164d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include "common/alignment.h" @@ -549,6 +550,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params) // alternatives. This signals a bug on those functions. const auto width = static_cast(params.MipWidth(0)); const auto height = static_cast(params.MipHeight(0)); + memory_size = params.MemorySize(); + reinterpreted = false; const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); gl_internal_format = format_tuple.internal_format; @@ -970,22 +973,23 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); if (index >= regs.rt_control.count) { - return last_color_buffers[index] = {}; + return current_color_buffers[index] = {}; } if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { - return last_color_buffers[index] = {}; + return current_color_buffers[index] = {}; } const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; - return last_color_buffers[index] = GetSurface(color_params, preserve_contents); + return current_color_buffers[index] = GetSurface(color_params, preserve_contents); } void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { surface->LoadGLBuffer(); surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); surface->MarkAsModified(false, *this); + surface->MarkForReload(false); } Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { @@ -997,18 +1001,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres Surface surface{TryGet(params.addr)}; if (surface) { if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { - // Use the cached surface as-is + // Use the cached surface as-is unless it's not synced with memory + if (surface->MustReload()) + LoadSurface(surface); return surface; } else if (preserve_contents) { // If surface parameters changed and we care about keeping the previous data, recreate // the surface from the old one Surface new_surface{RecreateSurface(surface, params)}; - Unregister(surface); + UnregisterSurface(surface); Register(new_surface); + if (new_surface->IsUploaded()) { + RegisterReinterpretSurface(new_surface); + } return new_surface; } else { // Delete the old surface before creating a new one to prevent collisions. - Unregister(surface); + UnregisterSurface(surface); } } @@ -1290,4 +1299,107 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params return {}; } +static std::optional TryFindBestMipMap(std::size_t memory, const SurfaceParams params, + u32 height) { + for (u32 i = 0; i < params.max_mip_level; i++) { + if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) { + return {i}; + } + } + return {}; +} + +static std::optional TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) { + const std::size_t size = params.LayerMemorySize(); + VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap); + for (u32 i = 0; i < params.depth; i++) { + if (start == addr) { + return {i}; + } + start += size; + } + return {}; +} + +static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface, + const Surface blitted_surface) { + const auto& dst_params = blitted_surface->GetSurfaceParams(); + const auto& src_params = render_surface->GetSurfaceParams(); + const std::size_t src_memory_size = src_params.size_in_bytes; + const std::optional level = + TryFindBestMipMap(src_memory_size, dst_params, src_params.height); + if (level.has_value()) { + if (src_params.width == dst_params.MipWidthGobAligned(*level) && + src_params.height == dst_params.MipHeight(*level) && + src_params.block_height >= dst_params.MipBlockHeight(*level)) { + const std::optional slot = + TryFindBestLayer(render_surface->GetAddr(), dst_params, *level); + if (slot.has_value()) { + glCopyImageSubData(render_surface->Texture().handle, + SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, + blitted_surface->Texture().handle, + SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot, + dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1); + blitted_surface->MarkAsModified(true, cache); + return true; + } + } + } + return false; +} + +static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) { + const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize(); + const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize(); + if (bound2 > bound1) + return true; + const auto& dst_params = blitted_surface->GetSurfaceParams(); + const auto& src_params = render_surface->GetSurfaceParams(); + return (dst_params.component_type != src_params.component_type); +} + +static bool IsReinterpretInvalidSecond(const Surface render_surface, + const Surface blitted_surface) { + const auto& dst_params = blitted_surface->GetSurfaceParams(); + const auto& src_params = render_surface->GetSurfaceParams(); + return (dst_params.height > src_params.height && dst_params.width > src_params.width); +} + +bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface, + Surface intersect) { + if (IsReinterpretInvalid(triggering_surface, intersect)) { + UnregisterSurface(intersect); + return false; + } + if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) { + if (IsReinterpretInvalidSecond(triggering_surface, intersect)) { + UnregisterSurface(intersect); + return false; + } + FlushObject(intersect); + FlushObject(triggering_surface); + intersect->MarkForReload(true); + } + return true; +} + +void RasterizerCacheOpenGL::SignalPreDrawCall() { + if (texception && GLAD_GL_ARB_texture_barrier) { + glTextureBarrier(); + } + texception = false; +} + +void RasterizerCacheOpenGL::SignalPostDrawCall() { + for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) { + if (current_color_buffers[i] != nullptr) { + Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr()); + if (intersect != nullptr) { + PartialReinterpretSurface(current_color_buffers[i], intersect); + texception = true; + } + } + } +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 838554c35..797bbdc9c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -34,6 +34,7 @@ using SurfaceTarget = VideoCore::Surface::SurfaceTarget; using SurfaceType = VideoCore::Surface::SurfaceType; using PixelFormat = VideoCore::Surface::PixelFormat; using ComponentType = VideoCore::Surface::ComponentType; +using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct SurfaceParams { enum class SurfaceClass { @@ -140,10 +141,18 @@ struct SurfaceParams { return offset; } + std::size_t GetMipmapSingleSize(u32 mip_level) const { + return InnerMipmapMemorySize(mip_level, false, is_layered); + } + u32 MipWidth(u32 mip_level) const { return std::max(1U, width >> mip_level); } + u32 MipWidthGobAligned(u32 mip_level) const { + return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp()); + } + u32 MipHeight(u32 mip_level) const { return std::max(1U, height >> mip_level); } @@ -346,6 +355,10 @@ public: return cached_size_in_bytes; } + std::size_t GetMemorySize() const { + return memory_size; + } + void Flush() override { FlushGLBuffer(); } @@ -395,6 +408,26 @@ public: Tegra::Texture::SwizzleSource swizzle_z, Tegra::Texture::SwizzleSource swizzle_w); + void MarkReinterpreted() { + reinterpreted = true; + } + + bool IsReinterpreted() const { + return reinterpreted; + } + + void MarkForReload(bool reload) { + must_reload = reload; + } + + bool MustReload() const { + return must_reload; + } + + bool IsUploaded() const { + return params.identity == SurfaceParams::SurfaceClass::Uploaded; + } + private: void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); @@ -408,6 +441,9 @@ private: GLenum gl_internal_format{}; std::size_t cached_size_in_bytes{}; std::array swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; + std::size_t memory_size; + bool reinterpreted = false; + bool must_reload = false; }; class RasterizerCacheOpenGL final : public RasterizerCache { @@ -433,6 +469,9 @@ public: const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect); + void SignalPreDrawCall(); + void SignalPostDrawCall(); + private: void LoadSurface(const Surface& surface); Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); @@ -449,6 +488,10 @@ private: /// Tries to get a reserved surface for the specified parameters Surface TryGetReservedSurface(const SurfaceParams& params); + // Partialy reinterpret a surface based on a triggering_surface that collides with it. + // returns true if the reinterpret was successful, false in case it was not. + bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect); + /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); @@ -465,12 +508,50 @@ private: OGLFramebuffer read_framebuffer; OGLFramebuffer draw_framebuffer; + bool texception = false; + /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one /// using the new format. OGLBuffer copy_pbo; - std::array last_color_buffers; + std::array last_color_buffers; + std::array current_color_buffers; Surface last_depth_buffer; + + using SurfaceIntervalCache = boost::icl::interval_map; + using SurfaceInterval = typename SurfaceIntervalCache::interval_type; + + static auto GetReinterpretInterval(const Surface& object) { + return SurfaceInterval::right_open(object->GetAddr() + 1, + object->GetAddr() + object->GetMemorySize() - 1); + } + + // Reinterpreted surfaces are very fragil as the game may keep rendering into them. + SurfaceIntervalCache reinterpreted_surfaces; + + void RegisterReinterpretSurface(Surface reinterpret_surface) { + auto interval = GetReinterpretInterval(reinterpret_surface); + reinterpreted_surfaces.insert({interval, reinterpret_surface}); + reinterpret_surface->MarkReinterpreted(); + } + + Surface CollideOnReinterpretedSurface(VAddr addr) const { + const SurfaceInterval interval{addr}; + for (auto& pair : + boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { + return pair.second; + } + return nullptr; + } + + /// Unregisters an object from the cache + void UnregisterSurface(const Surface& object) { + if (object->IsReinterpreted()) { + auto interval = GetReinterpretInterval(object); + reinterpreted_surfaces.erase(interval); + } + Unregister(object); + } }; } // namespace OpenGL