diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index fe91ff6a0..9add2bc94 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -556,23 +556,13 @@ void Maxwell3D::ProcessQueryGet() { // matches the current payload. UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); break; - case Regs::QueryOperation::Counter: { - u64 result; - switch (regs.query.query_get.select) { - case Regs::QuerySelect::Zero: - result = 0; - break; - case Regs::QuerySelect::SamplesPassed: - result = rasterizer.Query(VideoCore::QueryType::SamplesPassed); - break; - default: - result = 1; - UNIMPLEMENTED_MSG("Unimplemented query select type {}", - static_cast(regs.query.query_get.select.Value())); + case Regs::QueryOperation::Counter: + if (const std::optional result = GetQueryResult()) { + // If the query returns an empty optional it means it's cached and deferred. + // In this case we have a non-empty result, so we stamp it immediately. + StampQueryResult(*result, regs.query.query_get.short_query == 0); } - StampQueryResult(result, regs.query.query_get.short_query == 0); break; - } case Regs::QueryOperation::Trap: UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); break; @@ -595,20 +585,20 @@ void Maxwell3D::ProcessQueryCondition() { } case Regs::ConditionMode::ResNonZero: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U; break; } case Regs::ConditionMode::Equal: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode; break; } case Regs::ConditionMode::NotEqual: { Regs::QueryCompare cmp; - memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); + memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); execute_on = cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode; break; @@ -674,6 +664,21 @@ void Maxwell3D::DrawArrays() { } } +std::optional Maxwell3D::GetQueryResult() { + switch (regs.query.query_get.select) { + case Regs::QuerySelect::Zero: + return 0; + case Regs::QuerySelect::SamplesPassed: + // Deferred. + rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed); + return {}; + default: + UNIMPLEMENTED_MSG("Unimplemented query select type {}", + static_cast(regs.query.query_get.select.Value())); + return 1; + } +} + void Maxwell3D::ProcessCBBind(std::size_t stage_index) { // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. auto& shader = state.shader_stages[stage_index]; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d21f678ed..26939be3f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -1462,6 +1463,9 @@ private: // Handles a instance drawcall from MME void StepInstance(MMEDrawMode expected_mode, u32 count); + + /// Returns a query's value or an empty object if the value will be deferred through a cache. + std::optional GetQueryResult(); }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 2fc627539..a394f2d3e 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -20,6 +20,7 @@ namespace VideoCore { enum class QueryType { SamplesPassed, }; +constexpr std::size_t NumQueryTypes = 1; enum class LoadCallbackStage { Prepare, @@ -48,8 +49,8 @@ public: /// Resets the counter of a query virtual void ResetCounter(QueryType type) = 0; - /// Returns the value of a GPU query - virtual u64 Query(QueryType type) = 0; + /// Records a GPU query and caches it + virtual void Query(GPUVAddr gpu_addr, QueryType type) = 0; /// Notify rasterizer that all caches should be flushed to Switch memory virtual void FlushAll() = 0; diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index 1c7dc999a..8f0e8241d 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -2,58 +2,203 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include +#include +#include +#include + #include +#include "common/assert.h" +#include "core/core.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_query_cache.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" namespace OpenGL { -HostCounter::HostCounter(GLenum target) { - query.Create(target); +using VideoCore::QueryType; + +namespace { + +constexpr std::array QueryTargets = {GL_SAMPLES_PASSED}; + +constexpr GLenum GetTarget(QueryType type) { + return QueryTargets[static_cast(type)]; } -HostCounter::~HostCounter() = default; +} // Anonymous namespace -void HostCounter::UpdateState(bool enabled) { +CounterStream::CounterStream(QueryCache& cache, QueryType type) + : cache{cache}, type{type}, target{GetTarget(type)} {} + +CounterStream::~CounterStream() = default; + +void CounterStream::Update(bool enabled, bool any_command_queued) { if (enabled) { - Enable(); - } else { - Disable(); + if (!current) { + current = cache.GetHostCounter(last, type); + } + return; } + + if (current) { + EndQuery(any_command_queued); + } + last = std::exchange(current, nullptr); } -void HostCounter::Reset() { - counter = 0; - Disable(); +void CounterStream::Reset(bool any_command_queued) { + if (current) { + EndQuery(any_command_queued); + } + current = nullptr; + last = nullptr; } -u64 HostCounter::Query() { - if (!is_beginned) { - return counter; +std::shared_ptr CounterStream::GetCurrent(bool any_command_queued) { + if (!current) { + return nullptr; } - Disable(); - u64 value; - glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value); - Enable(); + EndQuery(any_command_queued); + last = std::move(current); + current = cache.GetHostCounter(last, type); + return last; +} - counter += value; +void CounterStream::EndQuery(bool any_command_queued) { + if (!any_command_queued) { + // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not + // having any of these causes a lock. glFlush is considered a command, so we can safely wait + // for this. Insert to the OpenGL command stream a flush. + glFlush(); + } + glEndQuery(target); +} + +QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer) + : RasterizerCache{rasterizer}, system{system}, + rasterizer{rasterizer}, streams{{CounterStream{*this, QueryType::SamplesPassed}}} {} + +QueryCache::~QueryCache() = default; + +void QueryCache::Query(GPUVAddr gpu_addr, QueryType type) { + auto& memory_manager = system.GPU().MemoryManager(); + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + + auto query = TryGet(host_ptr); + if (!query) { + const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + ASSERT_OR_EXECUTE(cpu_addr, return;); + + query = std::make_shared(type, *cpu_addr, host_ptr); + Register(query); + } + + query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued())); + query->MarkAsModified(true, *this); +} + +void QueryCache::UpdateCounters() { + auto& samples_passed = GetStream(QueryType::SamplesPassed); + + const auto& regs = system.GPU().Maxwell3D().regs; + samples_passed.Update(regs.samplecnt_enable, rasterizer.AnyCommandQueued()); +} + +void QueryCache::ResetCounter(QueryType type) { + GetStream(type).Reset(rasterizer.AnyCommandQueued()); +} + +void QueryCache::Reserve(QueryType type, OGLQuery&& query) { + reserved_queries[static_cast(type)].push_back(std::move(query)); +} + +std::shared_ptr QueryCache::GetHostCounter(std::shared_ptr dependency, + QueryType type) { + const auto type_index = static_cast(type); + auto& reserve = reserved_queries[type_index]; + + if (reserve.empty()) { + return std::make_shared(*this, std::move(dependency), type); + } + + auto counter = std::make_shared(*this, std::move(dependency), type, + std::move(reserve.back())); + reserve.pop_back(); return counter; } -void HostCounter::Enable() { - if (is_beginned) { - return; +void QueryCache::FlushObjectInner(const std::shared_ptr& counter_) { + auto& counter = *counter_; + auto& stream = GetStream(counter.GetType()); + + // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. + // To avoid this disable and re-enable keeping the dependency stream. + const bool is_enabled = stream.IsEnabled(); + if (is_enabled) { + stream.Update(false, false); + } + + counter.Flush(); + + if (is_enabled) { + stream.Update(true, false); } - is_beginned = true; - glBeginQuery(GL_SAMPLES_PASSED, query.handle); } -void HostCounter::Disable() { - if (!is_beginned) { - return; +CounterStream& QueryCache::GetStream(QueryType type) { + return streams[static_cast(type)]; +} + +HostCounter::HostCounter(QueryCache& cache, std::shared_ptr dependency, QueryType type) + : cache{cache}, type{type}, dependency{std::move(dependency)} { + const GLenum target = GetTarget(type); + query.Create(target); + glBeginQuery(target, query.handle); +} + +HostCounter::HostCounter(QueryCache& cache, std::shared_ptr dependency, QueryType type, + OGLQuery&& query_) + : cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} { + glBeginQuery(GetTarget(type), query.handle); +} + +HostCounter::~HostCounter() { + cache.Reserve(type, std::move(query)); +} + +u64 HostCounter::Query() { + if (query.handle == 0) { + return result; } - glEndQuery(GL_SAMPLES_PASSED); - is_beginned = false; + + glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &result); + + if (dependency) { + result += dependency->Query(); + } + + return result; +} + +CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr) + : RasterizerCacheObject{host_ptr}, type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {} + +CachedQuery::~CachedQuery() = default; + +void CachedQuery::Flush() { + const u64 value = counter->Query(); + std::memcpy(host_ptr, &value, sizeof(value)); +} + +void CachedQuery::SetCounter(std::shared_ptr counter_) { + counter = std::move(counter_); +} + +QueryType CachedQuery::GetType() const { + return type; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index 52c6546bf..91594b120 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -4,38 +4,131 @@ #pragma once +#include +#include +#include +#include + #include #include "common/common_types.h" +#include "video_core/rasterizer_cache.h" +#include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +namespace Core { +class System; +} + namespace OpenGL { +class CachedQuery; +class HostCounter; +class RasterizerOpenGL; +class QueryCache; + +class CounterStream final { +public: + explicit CounterStream(QueryCache& cache, VideoCore::QueryType type); + ~CounterStream(); + + void Update(bool enabled, bool any_command_queued); + + void Reset(bool any_command_queued); + + std::shared_ptr GetCurrent(bool any_command_queued); + + bool IsEnabled() const { + return current != nullptr; + } + +private: + void EndQuery(bool any_command_queued); + + QueryCache& cache; + + std::shared_ptr current; + std::shared_ptr last; + VideoCore::QueryType type; + GLenum target; +}; + +class QueryCache final : public RasterizerCache> { +public: + explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); + ~QueryCache(); + + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type); + + void UpdateCounters(); + + void ResetCounter(VideoCore::QueryType type); + + void Reserve(VideoCore::QueryType type, OGLQuery&& query); + + std::shared_ptr GetHostCounter(std::shared_ptr dependency, + VideoCore::QueryType type); + +protected: + void FlushObjectInner(const std::shared_ptr& counter) override; + +private: + CounterStream& GetStream(VideoCore::QueryType type); + + Core::System& system; + RasterizerOpenGL& rasterizer; + + std::array streams; + std::array, VideoCore::NumQueryTypes> reserved_queries; +}; + class HostCounter final { public: - explicit HostCounter(GLenum target); + explicit HostCounter(QueryCache& cache, std::shared_ptr dependency, + VideoCore::QueryType type); + explicit HostCounter(QueryCache& cache, std::shared_ptr dependency, + VideoCore::QueryType type, OGLQuery&& query); ~HostCounter(); - /// Enables or disables the counter as required. - void UpdateState(bool enabled); - - /// Resets the counter disabling it if needed. - void Reset(); - /// Returns the current value of the query. - /// @note It may harm precision of future queries if the counter is not disabled. u64 Query(); private: - /// Enables the counter when disabled. - void Enable(); + QueryCache& cache; + VideoCore::QueryType type; - /// Disables the counter when enabled. - void Disable(); + std::shared_ptr dependency; ///< Counter queued before this one. + OGLQuery query; ///< OpenGL query. + u64 result; ///< Added values of the counter. +}; - OGLQuery query; ///< OpenGL query. - u64 counter{}; ///< Added values of the counter. - bool is_beginned{}; ///< True when the OpenGL query is beginned. +class CachedQuery final : public RasterizerCacheObject { +public: + explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr); + ~CachedQuery(); + + /// Writes the counter value to host memory. + void Flush(); + + /// Updates the counter this cached query registered in guest memory will write when requested. + void SetCounter(std::shared_ptr counter); + + /// Returns the query type. + VideoCore::QueryType GetType() const; + + VAddr GetCpuAddr() const override { + return cpu_addr; + } + + std::size_t GetSizeInBytes() const override { + return sizeof(u64); + } + +private: + VideoCore::QueryType type; + VAddr cpu_addr; ///< Guest CPU address. + u8* host_ptr; ///< Writable host pointer. + std::shared_ptr counter; ///< Host counter to query, owns the dependency tree. }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 652db705b..827f85884 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -25,6 +25,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_gen.h" @@ -92,8 +93,8 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device}, - shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info}, - buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { + shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, + screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { shader_program_manager = std::make_unique(); state.draw.shader_program = 0; state.Apply(); @@ -548,9 +549,9 @@ void RasterizerOpenGL::Clear() { void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(OpenGL_Drawing); auto& gpu = system.GPU().Maxwell3D(); - const auto& regs = gpu.regs; - samples_passed.UpdateState(regs.samplecnt_enable); + + query_cache.UpdateCounters(); SyncRasterizeEnable(state); SyncColorMask(); @@ -718,24 +719,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { - switch (type) { - case VideoCore::QueryType::SamplesPassed: - samples_passed.Reset(); - break; - default: - UNIMPLEMENTED_MSG("type={}", static_cast(type)); - break; - } + query_cache.ResetCounter(type); } -u64 RasterizerOpenGL::Query(VideoCore::QueryType type) { - switch (type) { - case VideoCore::QueryType::SamplesPassed: - return samples_passed.Query(); - default: - UNIMPLEMENTED_MSG("type={}", static_cast(type)); - return 1; - } +void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type) { + query_cache.Query(gpu_addr, type); } void RasterizerOpenGL::FlushAll() {} @@ -747,6 +735,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { } texture_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size); + query_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { @@ -757,6 +746,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { texture_cache.InvalidateRegion(addr, size); shader_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); + query_cache.InvalidateRegion(addr, size); } void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 857a6c073..4fb6811a7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -63,7 +63,7 @@ public: void Clear() override; void DispatchCompute(GPUVAddr code_addr) override; void ResetCounter(VideoCore::QueryType type) override; - u64 Query(VideoCore::QueryType type) override; + void Query(GPUVAddr gpu_addr, VideoCore::QueryType type) override; void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; @@ -78,6 +78,11 @@ public: void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; + /// Returns true when there are commands queued to the OpenGL server. + bool AnyCommandQueued() const { + return num_queued_commands > 0; + } + private: /// Configures the color and depth framebuffer states. void ConfigureFramebuffers(); @@ -207,6 +212,7 @@ private: ShaderCacheOpenGL shader_cache; SamplerCacheOpenGL sampler_cache; FramebufferCacheOpenGL framebuffer_cache; + QueryCache query_cache; Core::System& system; ScreenInfo& screen_info; @@ -224,8 +230,6 @@ private: BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; - HostCounter samples_passed{GL_SAMPLES_PASSED}; - /// Number of commands queued to the OpenGL driver. Reseted on flush. std::size_t num_queued_commands = 0; };