Revamp Kepler Memory to use a subegine to manage uploads

This commit is contained in:
Fernando Sahmkow 2019-04-22 18:50:56 -04:00
parent b5889cbd6f
commit a91d3fc639
6 changed files with 134 additions and 93 deletions

View file

@ -3,6 +3,8 @@ add_library(video_core STATIC
dma_pusher.h dma_pusher.h
debug_utils/debug_utils.cpp debug_utils/debug_utils.cpp
debug_utils/debug_utils.h debug_utils/debug_utils.h
engines/engine_upload.cpp
engines/engine_upload.h
engines/fermi_2d.cpp engines/fermi_2d.cpp
engines/fermi_2d.h engines/fermi_2d.h
engines/kepler_compute.cpp engines/kepler_compute.cpp

View file

@ -0,0 +1,44 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/memory_manager.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines::Upload {
void State::ProcessExec(const bool is_linear) {
write_offset = 0;
copy_size = regs.line_length_in * regs.line_count;
inner_buffer.resize(copy_size);
linear = is_linear;
}
void State::ProcessData(const u32 data, const bool is_last_call) {
const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
write_offset += sub_copy_size;
if (is_last_call) {
const GPUVAddr address{regs.dest.Address()};
if (linear) {
memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
} else {
UNIMPLEMENTED_IF(regs.dest.z != 0);
UNIMPLEMENTED_IF(regs.dest.depth != 1);
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
const std::size_t dst_size = Tegra::Texture::CalculateSize(
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
std::vector<u8> tmp_buffer(dst_size);
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
regs.dest.y, regs.dest.BlockHeight(), copy_size,
inner_buffer.data(), tmp_buffer.data());
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
}
}
}
} // namespace Tegra::Engines::Upload

View file

@ -0,0 +1,74 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
namespace Tegra {
class MemoryManager;
}
namespace Tegra::Engines::Upload {
struct Data {
u32 line_length_in;
u32 line_count;
struct {
u32 address_high;
u32 address_low;
u32 pitch;
union {
BitField<0, 4, u32> block_width;
BitField<4, 4, u32> block_height;
BitField<8, 4, u32> block_depth;
};
u32 width;
u32 height;
u32 depth;
u32 z;
u32 x;
u32 y;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
}
u32 BlockWidth() const {
return 1U << block_width.Value();
}
u32 BlockHeight() const {
return 1U << block_height.Value();
}
u32 BlockDepth() const {
return 1U << block_depth.Value();
}
} dest;
};
class State {
public:
State(MemoryManager& memory_manager, Data& regs) : memory_manager(memory_manager), regs(regs) {}
~State() = default;
void ProcessExec(const bool is_linear);
void ProcessData(const u32 data, const bool is_last_call);
private:
u32 write_offset = 0;
u32 copy_size = 0;
std::vector<u8> inner_buffer;
bool linear;
Data& regs;
MemoryManager& memory_manager;
};
} // namespace Tegra::Engines::Upload

View file

@ -14,9 +14,8 @@
namespace Tegra::Engines { namespace Tegra::Engines {
KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
MemoryManager& memory_manager) : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
KeplerMemory::~KeplerMemory() = default; KeplerMemory::~KeplerMemory() = default;
@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
switch (method_call.method) { switch (method_call.method) {
case KEPLERMEMORY_REG_INDEX(exec): { case KEPLERMEMORY_REG_INDEX(exec): {
ProcessExec(); upload_state.ProcessExec(regs.exec.linear != 0);
break; break;
} }
case KEPLERMEMORY_REG_INDEX(data): { case KEPLERMEMORY_REG_INDEX(data): {
ProcessData(method_call.argument, method_call.IsLastCall()); bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
}
break; break;
} }
} }
} }
void KeplerMemory::ProcessExec() {
state.write_offset = 0;
state.copy_size = regs.line_length_in * regs.line_count;
state.inner_buffer.resize(state.copy_size);
}
void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
std::memcpy(&state.inner_buffer[state.write_offset], &regs.data, sub_copy_size);
state.write_offset += sub_copy_size;
if (is_last_call) {
const GPUVAddr address{regs.dest.Address()};
if (regs.exec.linear != 0) {
memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
} else {
UNIMPLEMENTED_IF(regs.dest.z != 0);
UNIMPLEMENTED_IF(regs.dest.depth != 1);
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
const std::size_t dst_size = Tegra::Texture::CalculateSize(
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
std::vector<u8> tmp_buffer(dst_size);
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
state.inner_buffer.data(), tmp_buffer.data());
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
}
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
}
}
} // namespace Tegra::Engines } // namespace Tegra::Engines

View file

@ -10,6 +10,7 @@
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
namespace Core { namespace Core {
@ -20,10 +21,6 @@ namespace Tegra {
class MemoryManager; class MemoryManager;
} }
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra::Engines { namespace Tegra::Engines {
#define KEPLERMEMORY_REG_INDEX(field_name) \ #define KEPLERMEMORY_REG_INDEX(field_name) \
@ -31,8 +28,7 @@ namespace Tegra::Engines {
class KeplerMemory final { class KeplerMemory final {
public: public:
KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, KeplerMemory(Core::System& system, MemoryManager& memory_manager);
MemoryManager& memory_manager);
~KeplerMemory(); ~KeplerMemory();
/// Write the value to the register identified by method. /// Write the value to the register identified by method.
@ -45,42 +41,7 @@ public:
struct { struct {
INSERT_PADDING_WORDS(0x60); INSERT_PADDING_WORDS(0x60);
u32 line_length_in; Upload::Data upload;
u32 line_count;
struct {
u32 address_high;
u32 address_low;
u32 pitch;
union {
BitField<0, 4, u32> block_width;
BitField<4, 4, u32> block_height;
BitField<8, 4, u32> block_depth;
};
u32 width;
u32 height;
u32 depth;
u32 z;
u32 x;
u32 y;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
u32 BlockWidth() const {
return 1U << block_width.Value();
}
u32 BlockHeight() const {
return 1U << block_height.Value();
}
u32 BlockDepth() const {
return 1U << block_depth.Value();
}
} dest;
struct { struct {
union { union {
@ -96,28 +57,17 @@ public:
}; };
} regs{}; } regs{};
struct {
u32 write_offset = 0;
u32 copy_size = 0;
std::vector<u8> inner_buffer;
} state{};
private: private:
Core::System& system; Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager; MemoryManager& memory_manager;
Upload::State upload_state;
void ProcessExec();
void ProcessData(u32 data, bool is_last_call);
}; };
#define ASSERT_REG_POSITION(field_name, position) \ #define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \ static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position") "Field " #field_name " has invalid position")
ASSERT_REG_POSITION(line_length_in, 0x60); ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(line_count, 0x61);
ASSERT_REG_POSITION(dest, 0x62);
ASSERT_REG_POSITION(exec, 0x6C); ASSERT_REG_POSITION(exec, 0x6C);
ASSERT_REG_POSITION(data, 0x6D); ASSERT_REG_POSITION(data, 0x6D);
#undef ASSERT_REG_POSITION #undef ASSERT_REG_POSITION

View file

@ -37,7 +37,7 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager); kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
} }
GPU::~GPU() = default; GPU::~GPU() = default;