From 717394c980509ee68608e6378cf58162adb5edaa Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Tue, 19 Feb 2019 09:44:33 +0100 Subject: [PATCH 1/2] video_core/dma_pusher: The full list of headers at once. Fetching every u32 from memory leads to a big overhead. So let's fetch all of them as a block if possible. This reduces the Memory::* calls by the dma_pusher by a factor of 10. --- src/video_core/dma_pusher.cpp | 102 ++++++++++++++++++---------------- src/video_core/dma_pusher.h | 2 + 2 files changed, 57 insertions(+), 47 deletions(-) diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index eb9bf1878..654e4d9aa 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -38,58 +38,66 @@ bool DmaPusher::Step() { const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get); ASSERT_MSG(address, "Invalid GPU address"); - const CommandHeader command_header{Memory::Read32(*address)}; + GPUVAddr size = dma_put - dma_get; + ASSERT_MSG(size % sizeof(CommandHeader) == 0, "Invalid aligned GPU addresses"); + command_headers.resize(size / sizeof(CommandHeader)); - dma_get += sizeof(u32); + Memory::ReadBlock(*address, command_headers.data(), size); - if (!non_main) { - dma_mget = dma_get; + for (const CommandHeader& command_header : command_headers) { + + // now, see if we're in the middle of a command + if (dma_state.length_pending) { + // Second word of long non-inc methods command - method count + dma_state.length_pending = 0; + dma_state.method_count = command_header.method_count_; + } else if (dma_state.method_count) { + // Data word of methods command + CallMethod(command_header.argument); + + if (!dma_state.non_incrementing) { + dma_state.method++; + } + + if (dma_increment_once) { + dma_state.non_incrementing = true; + } + + dma_state.method_count--; + } else { + // No command active - this is the first word of a new one + switch (command_header.mode) { + case SubmissionMode::Increasing: + SetState(command_header); + dma_state.non_incrementing = false; + dma_increment_once = false; + break; + case SubmissionMode::NonIncreasing: + SetState(command_header); + dma_state.non_incrementing = true; + dma_increment_once = false; + break; + case SubmissionMode::Inline: + dma_state.method = command_header.method; + dma_state.subchannel = command_header.subchannel; + CallMethod(command_header.arg_count); + dma_state.non_incrementing = true; + dma_increment_once = false; + break; + case SubmissionMode::IncreaseOnce: + SetState(command_header); + dma_state.non_incrementing = false; + dma_increment_once = true; + break; + } + } } - // now, see if we're in the middle of a command - if (dma_state.length_pending) { - // Second word of long non-inc methods command - method count - dma_state.length_pending = 0; - dma_state.method_count = command_header.method_count_; - } else if (dma_state.method_count) { - // Data word of methods command - CallMethod(command_header.argument); + dma_get = dma_put; - if (!dma_state.non_incrementing) { - dma_state.method++; - } - - if (dma_increment_once) { - dma_state.non_incrementing = true; - } - - dma_state.method_count--; - } else { - // No command active - this is the first word of a new one - switch (command_header.mode) { - case SubmissionMode::Increasing: - SetState(command_header); - dma_state.non_incrementing = false; - dma_increment_once = false; - break; - case SubmissionMode::NonIncreasing: - SetState(command_header); - dma_state.non_incrementing = true; - dma_increment_once = false; - break; - case SubmissionMode::Inline: - dma_state.method = command_header.method; - dma_state.subchannel = command_header.subchannel; - CallMethod(command_header.arg_count); - dma_state.non_incrementing = true; - dma_increment_once = false; - break; - case SubmissionMode::IncreaseOnce: - SetState(command_header); - dma_state.non_incrementing = false; - dma_increment_once = true; - break; - } + if (!non_main) { + // TODO (degasus): This is dead code, as dma_mget is never read. + dma_mget = dma_get; } } else if (ib_enable && !dma_pushbuffer.empty()) { // Current pushbuffer empty, but we have more IB entries to read diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 1097e5c49..14b23b1d7 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -75,6 +75,8 @@ private: GPU& gpu; + std::vector command_headers; ///< Buffer for list of commands fetched at once + std::queue dma_pushbuffer; ///< Queue of command lists to be processed std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer From 6dd40976d0eb39e9b4ac2cb7e3b78fe0a4bf0116 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Tue, 19 Feb 2019 10:26:58 +0100 Subject: [PATCH 2/2] video_core/dma_pusher: Simplyfy Step() logic. As fetching command list headers and and the list of command headers is a fixed 1:1 relation now, they can be implemented within a single call. This cleans up the Step() logic quite a bit. --- src/video_core/dma_pusher.cpp | 143 +++++++++++++++++----------------- src/video_core/dma_pusher.h | 3 - 2 files changed, 71 insertions(+), 75 deletions(-) diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 654e4d9aa..669541b4b 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -33,88 +33,87 @@ void DmaPusher::DispatchCalls() { } bool DmaPusher::Step() { - if (dma_get != dma_put) { - // Push buffer non-empty, read a word - const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get); - ASSERT_MSG(address, "Invalid GPU address"); + if (!ib_enable || dma_pushbuffer.empty()) { + // pushbuffer empty and IB empty or nonexistent - nothing to do + return false; + } - GPUVAddr size = dma_put - dma_get; - ASSERT_MSG(size % sizeof(CommandHeader) == 0, "Invalid aligned GPU addresses"); - command_headers.resize(size / sizeof(CommandHeader)); + const CommandList& command_list{dma_pushbuffer.front()}; + const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; + GPUVAddr dma_get = command_list_header.addr; + GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32); + bool non_main = command_list_header.is_non_main; - Memory::ReadBlock(*address, command_headers.data(), size); + if (dma_pushbuffer_subindex >= command_list.size()) { + // We've gone through the current list, remove it from the queue + dma_pushbuffer.pop(); + dma_pushbuffer_subindex = 0; + } - for (const CommandHeader& command_header : command_headers) { + if (command_list_header.size == 0) { + return true; + } - // now, see if we're in the middle of a command - if (dma_state.length_pending) { - // Second word of long non-inc methods command - method count - dma_state.length_pending = 0; - dma_state.method_count = command_header.method_count_; - } else if (dma_state.method_count) { - // Data word of methods command - CallMethod(command_header.argument); + // Push buffer non-empty, read a word + const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get); + ASSERT_MSG(address, "Invalid GPU address"); - if (!dma_state.non_incrementing) { - dma_state.method++; - } + command_headers.resize(command_list_header.size); - if (dma_increment_once) { - dma_state.non_incrementing = true; - } + Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32)); - dma_state.method_count--; - } else { - // No command active - this is the first word of a new one - switch (command_header.mode) { - case SubmissionMode::Increasing: - SetState(command_header); - dma_state.non_incrementing = false; - dma_increment_once = false; - break; - case SubmissionMode::NonIncreasing: - SetState(command_header); - dma_state.non_incrementing = true; - dma_increment_once = false; - break; - case SubmissionMode::Inline: - dma_state.method = command_header.method; - dma_state.subchannel = command_header.subchannel; - CallMethod(command_header.arg_count); - dma_state.non_incrementing = true; - dma_increment_once = false; - break; - case SubmissionMode::IncreaseOnce: - SetState(command_header); - dma_state.non_incrementing = false; - dma_increment_once = true; - break; - } + for (const CommandHeader& command_header : command_headers) { + + // now, see if we're in the middle of a command + if (dma_state.length_pending) { + // Second word of long non-inc methods command - method count + dma_state.length_pending = 0; + dma_state.method_count = command_header.method_count_; + } else if (dma_state.method_count) { + // Data word of methods command + CallMethod(command_header.argument); + + if (!dma_state.non_incrementing) { + dma_state.method++; + } + + if (dma_increment_once) { + dma_state.non_incrementing = true; + } + + dma_state.method_count--; + } else { + // No command active - this is the first word of a new one + switch (command_header.mode) { + case SubmissionMode::Increasing: + SetState(command_header); + dma_state.non_incrementing = false; + dma_increment_once = false; + break; + case SubmissionMode::NonIncreasing: + SetState(command_header); + dma_state.non_incrementing = true; + dma_increment_once = false; + break; + case SubmissionMode::Inline: + dma_state.method = command_header.method; + dma_state.subchannel = command_header.subchannel; + CallMethod(command_header.arg_count); + dma_state.non_incrementing = true; + dma_increment_once = false; + break; + case SubmissionMode::IncreaseOnce: + SetState(command_header); + dma_state.non_incrementing = false; + dma_increment_once = true; + break; } } + } - dma_get = dma_put; - - if (!non_main) { - // TODO (degasus): This is dead code, as dma_mget is never read. - dma_mget = dma_get; - } - } else if (ib_enable && !dma_pushbuffer.empty()) { - // Current pushbuffer empty, but we have more IB entries to read - const CommandList& command_list{dma_pushbuffer.front()}; - const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; - dma_get = command_list_header.addr; - dma_put = dma_get + command_list_header.size * sizeof(u32); - non_main = command_list_header.is_non_main; - - if (dma_pushbuffer_subindex >= command_list.size()) { - // We've gone through the current list, remove it from the queue - dma_pushbuffer.pop(); - dma_pushbuffer_subindex = 0; - } - } else { - // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do - return {}; + if (!non_main) { + // TODO (degasus): This is dead code, as dma_mget is never read. + dma_mget = dma_put; } return true; diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 14b23b1d7..27a36348c 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -91,11 +91,8 @@ private: DmaState dma_state{}; bool dma_increment_once{}; - GPUVAddr dma_put{}; ///< pushbuffer current end address - GPUVAddr dma_get{}; ///< pushbuffer current read address GPUVAddr dma_mget{}; ///< main pushbuffer last read address bool ib_enable{true}; ///< IB mode enabled - bool non_main{}; ///< non-main pushbuffer active }; } // namespace Tegra