Merge pull request #1723 from degasus/dirty_flags
gl_rasterizer: Skip VB upload if the state is clean.
This commit is contained in:
commit
67a154e23d
9 changed files with 60 additions and 6 deletions
|
@ -34,6 +34,9 @@ MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB
|
||||||
void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
|
void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
|
||||||
MICROPROFILE_SCOPE(ProcessCommandLists);
|
MICROPROFILE_SCOPE(ProcessCommandLists);
|
||||||
|
|
||||||
|
// On entering GPU code, assume all memory may be touched by the ARM core.
|
||||||
|
maxwell_3d->dirty_flags.OnMemoryWrite();
|
||||||
|
|
||||||
auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
|
auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
|
||||||
LOG_TRACE(HW_GPU,
|
LOG_TRACE(HW_GPU,
|
||||||
"Processing method {:08X} on subchannel {} value "
|
"Processing method {:08X} on subchannel {} value "
|
||||||
|
|
|
@ -2,8 +2,10 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "core/core.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
#include "video_core/engines/fermi_2d.h"
|
#include "video_core/engines/fermi_2d.h"
|
||||||
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
#include "video_core/textures/decoders.h"
|
#include "video_core/textures/decoders.h"
|
||||||
|
|
||||||
|
@ -47,6 +49,9 @@ void Fermi2D::HandleSurfaceCopy() {
|
||||||
u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
|
u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
|
||||||
|
|
||||||
if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
|
if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
|
||||||
|
// All copies here update the main memory, so mark all rasterizer states as invalid.
|
||||||
|
Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||||
|
|
||||||
rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
|
rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
|
||||||
// We have to invalidate the destination region to evict any outdated surfaces from the
|
// We have to invalidate the destination region to evict any outdated surfaces from the
|
||||||
// cache. We do this before actually writing the new data because the destination address
|
// cache. We do this before actually writing the new data because the destination address
|
||||||
|
|
|
@ -3,8 +3,10 @@
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
|
#include "core/core.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
#include "video_core/engines/kepler_memory.h"
|
#include "video_core/engines/kepler_memory.h"
|
||||||
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
|
|
||||||
namespace Tegra::Engines {
|
namespace Tegra::Engines {
|
||||||
|
@ -47,6 +49,7 @@ void KeplerMemory::ProcessData(u32 data) {
|
||||||
rasterizer.InvalidateRegion(dest_address, sizeof(u32));
|
rasterizer.InvalidateRegion(dest_address, sizeof(u32));
|
||||||
|
|
||||||
Memory::Write32(dest_address, data);
|
Memory::Write32(dest_address, data);
|
||||||
|
Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||||
|
|
||||||
state.write_offset++;
|
state.write_offset++;
|
||||||
}
|
}
|
||||||
|
|
|
@ -135,10 +135,24 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
|
||||||
|
|
||||||
if (regs.reg_array[method] != value) {
|
if (regs.reg_array[method] != value) {
|
||||||
regs.reg_array[method] = value;
|
regs.reg_array[method] = value;
|
||||||
|
// Vertex format
|
||||||
if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
|
if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
|
||||||
method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
|
method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
|
||||||
dirty_flags.vertex_attrib_format = true;
|
dirty_flags.vertex_attrib_format = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Vertex buffer
|
||||||
|
if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
|
||||||
|
method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
|
||||||
|
dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
|
||||||
|
} else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
|
||||||
|
method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
|
||||||
|
dirty_flags.vertex_array |=
|
||||||
|
1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
|
||||||
|
} else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
|
||||||
|
method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
|
||||||
|
dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (method) {
|
switch (method) {
|
||||||
|
@ -270,6 +284,7 @@ void Maxwell3D::ProcessQueryGet() {
|
||||||
query_result.timestamp = CoreTiming::GetTicks();
|
query_result.timestamp = CoreTiming::GetTicks();
|
||||||
Memory::WriteBlock(*address, &query_result, sizeof(query_result));
|
Memory::WriteBlock(*address, &query_result, sizeof(query_result));
|
||||||
}
|
}
|
||||||
|
dirty_flags.OnMemoryWrite();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
@ -346,6 +361,7 @@ void Maxwell3D::ProcessCBData(u32 value) {
|
||||||
memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
|
memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
|
||||||
|
|
||||||
Memory::Write32(*address, value);
|
Memory::Write32(*address, value);
|
||||||
|
dirty_flags.OnMemoryWrite();
|
||||||
|
|
||||||
// Increment the current buffer position.
|
// Increment the current buffer position.
|
||||||
regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
|
regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
|
||||||
|
|
|
@ -1061,6 +1061,11 @@ public:
|
||||||
|
|
||||||
struct DirtyFlags {
|
struct DirtyFlags {
|
||||||
bool vertex_attrib_format = true;
|
bool vertex_attrib_format = true;
|
||||||
|
u32 vertex_array = 0xFFFFFFFF;
|
||||||
|
|
||||||
|
void OnMemoryWrite() {
|
||||||
|
vertex_array = 0xFFFFFFFF;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
DirtyFlags dirty_flags;
|
DirtyFlags dirty_flags;
|
||||||
|
|
|
@ -2,7 +2,9 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "core/core.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/engines/maxwell_dma.h"
|
#include "video_core/engines/maxwell_dma.h"
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
#include "video_core/textures/decoders.h"
|
#include "video_core/textures/decoders.h"
|
||||||
|
@ -54,6 +56,9 @@ void MaxwellDMA::HandleCopy() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// All copies here update the main memory, so mark all rasterizer states as invalid.
|
||||||
|
Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||||
|
|
||||||
if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
|
if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
|
||||||
// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
|
// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
|
||||||
// buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
|
// buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
|
||||||
|
|
|
@ -76,7 +76,7 @@ std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::s
|
||||||
return std::make_tuple(uploaded_ptr, uploaded_offset);
|
return std::make_tuple(uploaded_ptr, uploaded_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
void OGLBufferCache::Map(std::size_t max_size) {
|
bool OGLBufferCache::Map(std::size_t max_size) {
|
||||||
bool invalidate;
|
bool invalidate;
|
||||||
std::tie(buffer_ptr, buffer_offset_base, invalidate) =
|
std::tie(buffer_ptr, buffer_offset_base, invalidate) =
|
||||||
stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
|
stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
|
||||||
|
@ -85,6 +85,7 @@ void OGLBufferCache::Map(std::size_t max_size) {
|
||||||
if (invalidate) {
|
if (invalidate) {
|
||||||
InvalidateAll();
|
InvalidateAll();
|
||||||
}
|
}
|
||||||
|
return invalidate;
|
||||||
}
|
}
|
||||||
|
|
||||||
void OGLBufferCache::Unmap() {
|
void OGLBufferCache::Unmap() {
|
||||||
|
|
|
@ -50,7 +50,7 @@ public:
|
||||||
/// Reserves memory to be used by host's CPU. Returns mapped address and offset.
|
/// Reserves memory to be used by host's CPU. Returns mapped address and offset.
|
||||||
std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);
|
std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);
|
||||||
|
|
||||||
void Map(std::size_t max_size);
|
bool Map(std::size_t max_size);
|
||||||
void Unmap();
|
void Unmap();
|
||||||
|
|
||||||
GLuint GetHandle() const;
|
GLuint GetHandle() const;
|
||||||
|
|
|
@ -176,15 +176,25 @@ void RasterizerOpenGL::SetupVertexFormat() {
|
||||||
}
|
}
|
||||||
state.draw.vertex_array = VAO.handle;
|
state.draw.vertex_array = VAO.handle;
|
||||||
state.ApplyVertexBufferState();
|
state.ApplyVertexBufferState();
|
||||||
|
|
||||||
|
// Rebinding the VAO invalidates the vertex buffer bindings.
|
||||||
|
gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupVertexBuffer() {
|
void RasterizerOpenGL::SetupVertexBuffer() {
|
||||||
MICROPROFILE_SCOPE(OpenGL_VB);
|
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
||||||
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
|
||||||
const auto& regs = gpu.regs;
|
const auto& regs = gpu.regs;
|
||||||
|
|
||||||
|
if (!gpu.dirty_flags.vertex_array)
|
||||||
|
return;
|
||||||
|
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_VB);
|
||||||
|
|
||||||
// Upload all guest vertex arrays sequentially to our buffer
|
// Upload all guest vertex arrays sequentially to our buffer
|
||||||
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||||
|
if (~gpu.dirty_flags.vertex_array & (1u << index))
|
||||||
|
continue;
|
||||||
|
|
||||||
const auto& vertex_array = regs.vertex_array[index];
|
const auto& vertex_array = regs.vertex_array[index];
|
||||||
if (!vertex_array.IsEnabled())
|
if (!vertex_array.IsEnabled())
|
||||||
continue;
|
continue;
|
||||||
|
@ -211,6 +221,8 @@ void RasterizerOpenGL::SetupVertexBuffer() {
|
||||||
|
|
||||||
// Implicit set by glBindVertexBuffer. Stupid glstate handling...
|
// Implicit set by glBindVertexBuffer. Stupid glstate handling...
|
||||||
state.draw.vertex_buffer = buffer_cache.GetHandle();
|
state.draw.vertex_buffer = buffer_cache.GetHandle();
|
||||||
|
|
||||||
|
gpu.dirty_flags.vertex_array = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
DrawParameters RasterizerOpenGL::SetupDraw() {
|
DrawParameters RasterizerOpenGL::SetupDraw() {
|
||||||
|
@ -600,7 +612,7 @@ void RasterizerOpenGL::DrawArrays() {
|
||||||
return;
|
return;
|
||||||
|
|
||||||
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
||||||
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
||||||
const auto& regs = gpu.regs;
|
const auto& regs = gpu.regs;
|
||||||
|
|
||||||
ScopeAcquireGLContext acquire_context{emu_window};
|
ScopeAcquireGLContext acquire_context{emu_window};
|
||||||
|
@ -653,7 +665,11 @@ void RasterizerOpenGL::DrawArrays() {
|
||||||
// Add space for at least 18 constant buffers
|
// Add space for at least 18 constant buffers
|
||||||
buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
|
buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
|
||||||
|
|
||||||
buffer_cache.Map(buffer_size);
|
bool invalidate = buffer_cache.Map(buffer_size);
|
||||||
|
if (invalidate) {
|
||||||
|
// As all cached buffers are invalidated, we need to recheck their state.
|
||||||
|
gpu.dirty_flags.vertex_attrib_format = 0xFFFFFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
SetupVertexFormat();
|
SetupVertexFormat();
|
||||||
SetupVertexBuffer();
|
SetupVertexBuffer();
|
||||||
|
|
Loading…
Reference in a new issue