diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7cea146f0..0b3e8749b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -9,6 +9,7 @@ #include "core/core_timing.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" +#include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/textures/texture.h" @@ -519,61 +520,63 @@ void Maxwell3D::ProcessFirmwareCall4() { regs.reg_array[0xd00] = 1; } -void Maxwell3D::ProcessQueryGet() { - const GPUVAddr sequence_address{regs.query.QueryAddress()}; - // Since the sequence address is given as a GPU VAddr, we have to convert it to an application - // VAddr before writing. - - // TODO(Subv): Support the other query units. - ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, - "Units other than CROP are unimplemented"); - - u64 result = 0; - - // TODO(Subv): Support the other query variables - switch (regs.query.query_get.select) { - case Regs::QuerySelect::Zero: - // This seems to actually write the query sequence to the query address. - result = regs.query.query_sequence; - break; - default: - result = 1; - UNIMPLEMENTED_MSG("Unimplemented query select type {}", - static_cast(regs.query.query_get.select.Value())); - } - - // TODO(Subv): Research and implement how query sync conditions work. - +void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { struct LongQueryResult { u64_le value; u64_le timestamp; }; static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); + const GPUVAddr sequence_address{regs.query.QueryAddress()}; + if (long_query) { + // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast + // GPU, this command may actually take a while to complete in real hardware due to GPU + // wait queues. + LongQueryResult query_result{payload, system.GPU().GetTicks()}; + memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + } else { + memory_manager.Write(sequence_address, static_cast(payload)); + } +} - switch (regs.query.query_get.mode) { - case Regs::QueryMode::Write: - case Regs::QueryMode::Write2: { - u32 sequence = regs.query.query_sequence; - if (regs.query.query_get.short_query) { - // Write the current query sequence to the sequence address. - // TODO(Subv): Find out what happens if you use a long query type but mark it as a short - // query. - memory_manager.Write(sequence_address, sequence); - } else { - // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast - // GPU, this command may actually take a while to complete in real hardware due to GPU - // wait queues. - LongQueryResult query_result{}; - query_result.value = result; - // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming - query_result.timestamp = system.CoreTiming().GetTicks(); - memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); - } +void Maxwell3D::ProcessQueryGet() { + // TODO(Subv): Support the other query units. + ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, + "Units other than CROP are unimplemented"); + + switch (regs.query.query_get.operation) { + case Regs::QueryOperation::Release: { + const u64 result = regs.query.query_sequence; + StampQueryResult(result, regs.query.query_get.short_query == 0); + break; + } + case Regs::QueryOperation::Acquire: { + // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU + // to write a value that matches the current payload. + UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); + break; + } + case Regs::QueryOperation::Counter: { + u64 result{}; + switch (regs.query.query_get.select) { + case Regs::QuerySelect::Zero: + result = 0; + break; + default: + result = 1; + UNIMPLEMENTED_MSG("Unimplemented query select type {}", + static_cast(regs.query.query_get.select.Value())); + } + StampQueryResult(result, regs.query.query_get.short_query == 0); + break; + } + case Regs::QueryOperation::Trap: { + UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); + break; + } + default: { + UNIMPLEMENTED_MSG("Unknown query operation"); break; } - default: - UNIMPLEMENTED_MSG("Query mode {} not implemented", - static_cast(regs.query.query_get.mode.Value())); } } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 7b1912a66..0a2af54e5 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -71,12 +71,11 @@ public: static constexpr std::size_t MaxConstBuffers = 18; static constexpr std::size_t MaxConstBufferSize = 0x10000; - enum class QueryMode : u32 { - Write = 0, - Sync = 1, - // TODO(Subv): It is currently unknown what the difference between method 2 and method 0 - // is. - Write2 = 2, + enum class QueryOperation : u32 { + Release = 0, + Acquire = 1, + Counter = 2, + Trap = 3, }; enum class QueryUnit : u32 { @@ -1081,7 +1080,7 @@ public: u32 query_sequence; union { u32 raw; - BitField<0, 2, QueryMode> mode; + BitField<0, 2, QueryOperation> operation; BitField<4, 1, u32> fence; BitField<12, 4, QueryUnit> unit; BitField<16, 1, QuerySyncCondition> sync_cond; @@ -1413,6 +1412,9 @@ private: /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); + // Writes the query result accordingly + void StampQueryResult(u64 payload, bool long_query); + // Handles Conditional Rendering void ProcessQueryCondition(); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 062ca83b8..4419ab735 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -6,6 +6,7 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/core_timing.h" +#include "core/core_timing_util.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" @@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { return true; } +u64 GPU::GetTicks() const { + // This values were reversed engineered by fincs from NVN + // The gpu clock is reported in units of 385/625 nanoseconds + constexpr u64 gpu_ticks_num = 384; + constexpr u64 gpu_ticks_den = 625; + + const u64 cpu_ticks = system.CoreTiming().GetTicks(); + const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); + const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; + const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; + return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; +} + void GPU::FlushCommands() { renderer.Rasterizer().FlushCommands(); } @@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { block.sequence = regs.semaphore_sequence; // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of // CoreTiming - block.timestamp = system.CoreTiming().GetTicks(); + block.timestamp = GetTicks(); memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); } else { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b648317bb..07727210c 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -192,6 +192,8 @@ public: bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); + u64 GetTicks() const; + std::unique_lock LockSync() { return std::unique_lock{sync_mutex}; }