OpenGL: Implement Fencing backend.

This commit is contained in:
Fernando Sahmkow 2020-02-17 18:10:23 -04:00
parent ed7e965712
commit 487379c593
12 changed files with 94 additions and 19 deletions

View file

@ -397,14 +397,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
} }
} }
void Maxwell3D::ReleaseFences() {
for (const auto pair : delay_fences) {
const auto [addr, payload] = pair;
memory_manager.Write<u32>(addr, static_cast<u32>(payload));
}
delay_fences.clear();
}
void Maxwell3D::ProcessQueryGet() { void Maxwell3D::ProcessQueryGet() {
// TODO(Subv): Support the other query units. // TODO(Subv): Support the other query units.
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@ -412,10 +404,12 @@ void Maxwell3D::ProcessQueryGet() {
switch (regs.query.query_get.operation) { switch (regs.query.query_get.operation) {
case Regs::QueryOperation::Release: { case Regs::QueryOperation::Release: {
rasterizer.FlushCommands();
rasterizer.SyncGuestHost();
const u64 result = regs.query.query_sequence; const u64 result = regs.query.query_sequence;
delay_fences.emplace_back(regs.query.QueryAddress(), result); if (regs.query.query_get.fence == 1) {
rasterizer.SignalFence(regs.query.QueryAddress(), static_cast<u32>(result));
} else {
StampQueryResult(result, regs.query.query_get.short_query == 0);
}
break; break;
} }
case Regs::QueryOperation::Acquire: case Regs::QueryOperation::Acquire:

View file

@ -1427,8 +1427,6 @@ public:
Tables tables{}; Tables tables{};
} dirty; } dirty;
void ReleaseFences();
private: private:
void InitializeRegisterDefaults(); void InitializeRegisterDefaults();
@ -1469,8 +1467,6 @@ private:
std::array<u8, Regs::NUM_REGS> dirty_pointers{}; std::array<u8, Regs::NUM_REGS> dirty_pointers{};
std::vector<std::pair<GPUVAddr, u64>> delay_fences;
/// Retrieves information about a specific TIC entry from the TIC buffer. /// Retrieves information about a specific TIC entry from the TIC buffer.
Texture::TICEntry GetTICEntry(u32 tic_index) const; Texture::TICEntry GetTICEntry(u32 tic_index) const;

View file

@ -147,7 +147,7 @@ void GPU::SyncGuestHost() {
} }
void GPU::OnCommandListEnd() { void GPU::OnCommandListEnd() {
maxwell_3d->ReleaseFences(); renderer.Rasterizer().ReleaseFences();
} }
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.

View file

@ -157,7 +157,7 @@ public:
void FlushCommands(); void FlushCommands();
void SyncGuestHost(); void SyncGuestHost();
void OnCommandListEnd(); virtual void OnCommandListEnd();
/// Returns a reference to the Maxwell3D GPU engine. /// Returns a reference to the Maxwell3D GPU engine.
Engines::Maxwell3D& Maxwell3D(); Engines::Maxwell3D& Maxwell3D();

View file

@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const {
gpu_thread.WaitIdle(); gpu_thread.WaitIdle();
} }
void GPUAsynch::OnCommandListEnd() {
gpu_thread.OnCommandListEnd();
}
} // namespace VideoCommon } // namespace VideoCommon

View file

@ -32,6 +32,8 @@ public:
void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitIdle() const override; void WaitIdle() const override;
void OnCommandListEnd() override;
protected: protected:
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;

View file

@ -37,6 +37,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
dma_pusher.DispatchCalls(); dma_pusher.DispatchCalls();
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
} else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
renderer.Rasterizer().ReleaseFences();
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size); renderer.Rasterizer().FlushRegion(data->addr, data->size);
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
@ -95,6 +97,10 @@ void ThreadManager::WaitIdle() const {
} }
} }
void ThreadManager::OnCommandListEnd() {
PushCommand(OnCommandListEndCommand());
}
u64 ThreadManager::PushCommand(CommandData&& command_data) { u64 ThreadManager::PushCommand(CommandData&& command_data) {
const u64 fence{++state.last_fence}; const u64 fence{++state.last_fence};
state.queue.Push(CommandDataContainer(std::move(command_data), fence)); state.queue.Push(CommandDataContainer(std::move(command_data), fence));

View file

@ -70,9 +70,12 @@ struct FlushAndInvalidateRegionCommand final {
u64 size; u64 size;
}; };
/// Command to signal to the GPU thread that processing has ended
struct OnCommandListEndCommand final {};
using CommandData = using CommandData =
std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand>;
struct CommandDataContainer { struct CommandDataContainer {
CommandDataContainer() = default; CommandDataContainer() = default;
@ -122,6 +125,8 @@ public:
// Wait until the gpu thread is idle. // Wait until the gpu thread is idle.
void WaitIdle() const; void WaitIdle() const;
void OnCommandListEnd();
private: private:
/// Pushes a command to be executed by the GPU thread /// Pushes a command to be executed by the GPU thread
u64 PushCommand(CommandData&& command_data); u64 PushCommand(CommandData&& command_data);

View file

@ -49,6 +49,14 @@ public:
/// Records a GPU query and caches it /// Records a GPU query and caches it
virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
virtual void SignalFence(GPUVAddr addr, u32 value) {
}
virtual void ReleaseFences() {
}
/// Notify rasterizer that all caches should be flushed to Switch memory /// Notify rasterizer that all caches should be flushed to Switch memory
virtual void FlushAll() = 0; virtual void FlushAll() = 0;

View file

@ -676,6 +676,34 @@ void RasterizerOpenGL::SyncGuestHost() {
buffer_cache.SyncGuestHost(); buffer_cache.SyncGuestHost();
} }
void RasterizerOpenGL::SignalFence(GPUVAddr addr, u32 value) {
if (!fences.empty()) {
const std::pair<GPUVAddr, u32>& current_fence = fences.front();
const auto [address, payload] = current_fence;
texture_cache.PopAsyncFlushes();
auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()};
memory_manager.Write<u32>(address, payload);
fences.pop_front();
}
fences.emplace_back(addr, value);
texture_cache.CommitAsyncFlushes();
FlushCommands();
SyncGuestHost();
}
void RasterizerOpenGL::ReleaseFences() {
while (!fences.empty()) {
const std::pair<GPUVAddr, u32>& current_fence = fences.front();
const auto [address, payload] = current_fence;
texture_cache.PopAsyncFlushes();
auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()};
memory_manager.Write<u32>(address, payload);
fences.pop_front();
}
}
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
if (Settings::IsGPULevelExtreme()) { if (Settings::IsGPULevelExtreme()) {
FlushRegion(addr, size); FlushRegion(addr, size);

View file

@ -69,6 +69,8 @@ public:
void InvalidateRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override; void SyncGuestHost() override;
void SignalFence(GPUVAddr addr, u32 value) override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void FlushCommands() override; void FlushCommands() override;
void TickFrame() override; void TickFrame() override;

View file

@ -238,7 +238,7 @@ public:
surface->MarkAsRenderTarget(false, NO_RT); surface->MarkAsRenderTarget(false, NO_RT);
const auto& cr_params = surface->GetSurfaceParams(); const auto& cr_params = surface->GetSurfaceParams();
if (!cr_params.is_tiled) { if (!cr_params.is_tiled) {
FlushSurface(surface); AsyncFlushSurface(surface);
} }
} }
render_targets[index].target = surface_view.first; render_targets[index].target = surface_view.first;
@ -317,6 +317,26 @@ public:
return ++ticks; return ++ticks;
} }
void CommitAsyncFlushes() {
commited_flushes.push_back(uncommited_flushes);
uncommited_flushes.reset();
}
void PopAsyncFlushes() {
if (commited_flushes.empty()) {
return;
}
auto& flush_list = commited_flushes.front();
if (!flush_list) {
commited_flushes.pop_front();
return;
}
for (TSurface& surface : *flush_list) {
FlushSurface(surface);
}
commited_flushes.pop_front();
}
protected: protected:
explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
bool is_astc_supported) bool is_astc_supported)
@ -1152,6 +1172,13 @@ private:
TView view; TView view;
}; };
void AsyncFlushSurface(TSurface& surface) {
if (!uncommited_flushes) {
uncommited_flushes = std::make_shared<std::list<TSurface>>();
}
uncommited_flushes->push_back(surface);
}
VideoCore::RasterizerInterface& rasterizer; VideoCore::RasterizerInterface& rasterizer;
FormatLookupTable format_lookup_table; FormatLookupTable format_lookup_table;
@ -1198,6 +1225,9 @@ private:
std::list<TSurface> marked_for_unregister; std::list<TSurface> marked_for_unregister;
std::shared_ptr<std::list<TSurface>> uncommited_flushes{};
std::list<std::shared_ptr<std::list<TSurface>>> commited_flushes;
StagingCache staging_cache; StagingCache staging_cache;
std::recursive_mutex mutex; std::recursive_mutex mutex;
}; };