Merge pull request #2705 from FernandoS27/tex-cache-fixes

GPU: Fixes to Texture Cache and Include Microprofiles for GL State/BufferCopy/Macro Interpreter
This commit is contained in:
bunnei 2019-07-14 22:44:36 -04:00 committed by GitHub
commit b77a1ed67a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 58 additions and 22 deletions

View file

@ -4,14 +4,18 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/microprofile.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/macro_interpreter.h" #include "video_core/macro_interpreter.h"
MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
namespace Tegra { namespace Tegra {
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
MICROPROFILE_SCOPE(MacroInterp);
Reset(); Reset();
registers[1] = parameters[0]; registers[1] = parameters[0];
this->parameters = std::move(parameters); this->parameters = std::move(parameters);

View file

@ -6,8 +6,11 @@
#include <glad/glad.h> #include <glad/glad.h>
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_state.h"
MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128));
namespace OpenGL { namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@ -524,6 +527,7 @@ void OpenGLState::ApplySamplers() const {
} }
void OpenGLState::Apply() const { void OpenGLState::Apply() const {
MICROPROFILE_SCOPE(OpenGL_State);
ApplyFramebufferState(); ApplyFramebufferState();
ApplyVertexArrayState(); ApplyVertexArrayState();
ApplyShaderProgram(); ApplyShaderProgram();

View file

@ -31,6 +31,8 @@ using VideoCore::Surface::SurfaceType;
MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
MP_RGB(128, 192, 128));
namespace { namespace {
@ -535,6 +537,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
} }
void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) {
MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy);
const auto& src_params = src_surface->GetSurfaceParams(); const auto& src_params = src_surface->GetSurfaceParams();
const auto& dst_params = dst_surface->GetSurfaceParams(); const auto& dst_params = dst_surface->GetSurfaceParams();
UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);

View file

@ -75,9 +75,12 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs)
// Linear Surface check // Linear Surface check
if (!params.is_tiled) { if (!params.is_tiled) {
if (std::tie(params.width, params.height, params.pitch) == if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) {
std::tie(rhs.width, rhs.height, rhs.pitch)) { if (params.width == rhs.width) {
return MatchStructureResult::FullMatch; return MatchStructureResult::FullMatch;
} else {
return MatchStructureResult::SemiMatch;
}
} }
return MatchStructureResult::None; return MatchStructureResult::None;
} }

View file

@ -200,8 +200,9 @@ public:
modification_tick = tick; modification_tick = tick;
} }
void MarkAsRenderTarget(const bool is_target) { void MarkAsRenderTarget(const bool is_target, const u32 index) {
this->is_target = is_target; this->is_target = is_target;
this->index = index;
} }
void MarkAsPicked(const bool is_picked) { void MarkAsPicked(const bool is_picked) {
@ -221,6 +222,10 @@ public:
return is_target; return is_target;
} }
u32 GetRenderTarget() const {
return index;
}
bool IsRegistered() const { bool IsRegistered() const {
return is_registered; return is_registered;
} }
@ -307,10 +312,13 @@ private:
return view; return view;
} }
static constexpr u32 NO_RT = 0xFFFFFFFF;
bool is_modified{}; bool is_modified{};
bool is_target{}; bool is_target{};
bool is_registered{}; bool is_registered{};
bool is_picked{}; bool is_picked{};
u32 index{NO_RT};
u64 modification_tick{}; u64 modification_tick{};
}; };

View file

@ -290,12 +290,19 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co
std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
bool uncompressed) const { bool uncompressed) const {
const bool tiled{as_host_size ? false : is_tiled};
const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
const u32 depth{is_layered ? 1U : GetMipDepth(level)}; const u32 depth{is_layered ? 1U : GetMipDepth(level)};
return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, if (is_tiled) {
GetMipBlockHeight(level), GetMipBlockDepth(level)); return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height,
depth, GetMipBlockHeight(level),
GetMipBlockDepth(level));
} else if (as_host_size || IsBuffer()) {
return GetBytesPerPixel() * width * height * depth;
} else {
// Linear Texture Case
return pitch * height * depth;
}
} }
bool SurfaceParams::operator==(const SurfaceParams& rhs) const { bool SurfaceParams::operator==(const SurfaceParams& rhs) const {

View file

@ -133,11 +133,11 @@ public:
regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true);
if (depth_buffer.target) if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(false); depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
depth_buffer.target = surface_view.first; depth_buffer.target = surface_view.first;
depth_buffer.view = surface_view.second; depth_buffer.view = surface_view.second;
if (depth_buffer.target) if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(true); depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
return surface_view.second; return surface_view.second;
} }
@ -167,11 +167,11 @@ public:
auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
preserve_contents, true); preserve_contents, true);
if (render_targets[index].target) if (render_targets[index].target)
render_targets[index].target->MarkAsRenderTarget(false); render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
render_targets[index].target = surface_view.first; render_targets[index].target = surface_view.first;
render_targets[index].view = surface_view.second; render_targets[index].view = surface_view.second;
if (render_targets[index].target) if (render_targets[index].target)
render_targets[index].target->MarkAsRenderTarget(true); render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
return surface_view.second; return surface_view.second;
} }
@ -191,7 +191,7 @@ public:
if (depth_buffer.target == nullptr) { if (depth_buffer.target == nullptr) {
return; return;
} }
depth_buffer.target->MarkAsRenderTarget(false); depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
depth_buffer.target = nullptr; depth_buffer.target = nullptr;
depth_buffer.view = nullptr; depth_buffer.view = nullptr;
} }
@ -200,7 +200,7 @@ public:
if (render_targets[index].target == nullptr) { if (render_targets[index].target == nullptr) {
return; return;
} }
render_targets[index].target->MarkAsRenderTarget(false); render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
render_targets[index].target = nullptr; render_targets[index].target = nullptr;
render_targets[index].view = nullptr; render_targets[index].view = nullptr;
} }
@ -270,6 +270,16 @@ protected:
// and reading it from a sepparate buffer. // and reading it from a sepparate buffer.
virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
void ManageRenderTargetUnregister(TSurface& surface) {
auto& maxwell3d = system.GPU().Maxwell3D();
const u32 index = surface->GetRenderTarget();
if (index == DEPTH_RT) {
maxwell3d.dirty_flags.zeta_buffer = true;
} else {
maxwell3d.dirty_flags.color_buffer.set(index, true);
}
}
void Register(TSurface surface) { void Register(TSurface surface) {
const GPUVAddr gpu_addr = surface->GetGpuAddr(); const GPUVAddr gpu_addr = surface->GetGpuAddr();
const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
@ -294,6 +304,9 @@ protected:
if (guard_render_targets && surface->IsProtected()) { if (guard_render_targets && surface->IsProtected()) {
return; return;
} }
if (!guard_render_targets && surface->IsRenderTarget()) {
ManageRenderTargetUnregister(surface);
}
const GPUVAddr gpu_addr = surface->GetGpuAddr(); const GPUVAddr gpu_addr = surface->GetGpuAddr();
const CacheAddr cache_ptr = surface->GetCacheAddr(); const CacheAddr cache_ptr = surface->GetCacheAddr();
const std::size_t size = surface->GetSizeInBytes(); const std::size_t size = surface->GetSizeInBytes();
@ -649,15 +662,6 @@ private:
} }
return {current_surface, *view}; return {current_surface, *view};
} }
// The next case is unsafe, so if we r in accurate GPU, just skip it
if (Settings::values.use_accurate_gpu_emulation) {
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
MatchTopologyResult::FullMatch);
}
// This is the case the texture is a part of the parent.
if (current_surface->MatchesSubTexture(params, gpu_addr)) {
return RebuildSurface(current_surface, params, is_render);
}
} else { } else {
// If there are many overlaps, odds are they are subtextures of the candidate // If there are many overlaps, odds are they are subtextures of the candidate
// surface. We try to construct a new surface based on the candidate parameters, // surface. We try to construct a new surface based on the candidate parameters,
@ -793,6 +797,9 @@ private:
static constexpr u64 registry_page_size{1 << registry_page_bits}; static constexpr u64 registry_page_size{1 << registry_page_bits};
std::unordered_map<CacheAddr, std::vector<TSurface>> registry; std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
static constexpr u32 DEPTH_RT = 8;
static constexpr u32 NO_RT = 0xFFFFFFFF;
// The L1 Cache is used for fast texture lookup before checking the overlaps // The L1 Cache is used for fast texture lookup before checking the overlaps
// This avoids calculating size and other stuffs. // This avoids calculating size and other stuffs.
std::unordered_map<CacheAddr, TSurface> l1_cache; std::unordered_map<CacheAddr, TSurface> l1_cache;