maxwell_3d: Slow implementation of passed samples (query 21)

Implements GL_SAMPLES_PASSED by waiting immediately for queries.
This commit is contained in:
ReinUsesLisp 2019-07-27 19:40:10 -03:00
parent 3217400dd1
commit 2b58652f08
8 changed files with 201 additions and 17 deletions

View file

@ -74,6 +74,8 @@ add_library(video_core STATIC
renderer_opengl/gl_stream_buffer.h
renderer_opengl/gl_texture_cache.cpp
renderer_opengl/gl_texture_cache.h
renderer_opengl/gl_query_cache.cpp
renderer_opengl/gl_query_cache.h
renderer_opengl/maxwell_to_gl.h
renderer_opengl/renderer_opengl.cpp
renderer_opengl/renderer_opengl.h

View file

@ -400,6 +400,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
ProcessQueryCondition();
break;
}
case MAXWELL3D_REG_INDEX(counter_reset): {
ProcessCounterReset();
break;
}
case MAXWELL3D_REG_INDEX(sync_info): {
ProcessSyncPoint();
break;
@ -544,23 +548,23 @@ void Maxwell3D::ProcessQueryGet() {
"Units other than CROP are unimplemented");
switch (regs.query.query_get.operation) {
case Regs::QueryOperation::Release: {
const u64 result = regs.query.query_sequence;
StampQueryResult(result, regs.query.query_get.short_query == 0);
case Regs::QueryOperation::Release:
StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
break;
}
case Regs::QueryOperation::Acquire: {
// Todo(Blinkhawk): Under this operation, the GPU waits for the CPU
// to write a value that matches the current payload.
case Regs::QueryOperation::Acquire:
// TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
// matches the current payload.
UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
break;
}
case Regs::QueryOperation::Counter: {
u64 result{};
u64 result;
switch (regs.query.query_get.select) {
case Regs::QuerySelect::Zero:
result = 0;
break;
case Regs::QuerySelect::SamplesPassed:
result = rasterizer.Query(VideoCore::QueryType::SamplesPassed);
break;
default:
result = 1;
UNIMPLEMENTED_MSG("Unimplemented query select type {}",
@ -569,15 +573,13 @@ void Maxwell3D::ProcessQueryGet() {
StampQueryResult(result, regs.query.query_get.short_query == 0);
break;
}
case Regs::QueryOperation::Trap: {
case Regs::QueryOperation::Trap:
UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
break;
}
default: {
default:
UNIMPLEMENTED_MSG("Unknown query operation");
break;
}
}
}
void Maxwell3D::ProcessQueryCondition() {
@ -619,6 +621,17 @@ void Maxwell3D::ProcessQueryCondition() {
}
}
void Maxwell3D::ProcessCounterReset() {
switch (regs.counter_reset) {
case Regs::CounterReset::SampleCnt:
rasterizer.ResetCounter(VideoCore::QueryType::SamplesPassed);
break;
default:
UNIMPLEMENTED_MSG("counter_reset={}", static_cast<u32>(regs.counter_reset));
break;
}
}
void Maxwell3D::ProcessSyncPoint() {
const u32 sync_point = regs.sync_info.sync_point.Value();
const u32 increment = regs.sync_info.increment.Value();

View file

@ -409,6 +409,27 @@ public:
Linear = 1,
};
enum class CounterReset : u32 {
SampleCnt = 0x01,
Unk02 = 0x02,
Unk03 = 0x03,
Unk04 = 0x04,
EmittedPrimitives = 0x10, // Not tested
Unk11 = 0x11,
Unk12 = 0x12,
Unk13 = 0x13,
Unk15 = 0x15,
Unk16 = 0x16,
Unk17 = 0x17,
Unk18 = 0x18,
Unk1A = 0x1A,
Unk1B = 0x1B,
Unk1C = 0x1C,
Unk1D = 0x1D,
Unk1E = 0x1E,
GeneratedPrimitives = 0x1F,
};
struct Cull {
enum class FrontFace : u32 {
ClockWise = 0x0900,
@ -857,7 +878,7 @@ public:
BitField<7, 1, u32> c7;
} clip_distance_enabled;
INSERT_UNION_PADDING_WORDS(0x1);
u32 samplecnt_enable;
float point_size;
@ -865,7 +886,11 @@ public:
u32 point_sprite_enable;
INSERT_UNION_PADDING_WORDS(0x5);
INSERT_UNION_PADDING_WORDS(0x3);
CounterReset counter_reset;
INSERT_UNION_PADDING_WORDS(0x1);
u32 zeta_enable;
@ -1412,12 +1437,15 @@ private:
/// Handles a write to the QUERY_GET register.
void ProcessQueryGet();
// Writes the query result accordingly
/// Writes the query result accordingly.
void StampQueryResult(u64 payload, bool long_query);
// Handles Conditional Rendering
/// Handles conditional rendering.
void ProcessQueryCondition();
/// Handles counter resets.
void ProcessCounterReset();
/// Handles writes to syncing register.
void ProcessSyncPoint();
@ -1499,8 +1527,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB);
ASSERT_REG_POSITION(vb_element_base, 0x50D);
ASSERT_REG_POSITION(vb_base_instance, 0x50E);
ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
ASSERT_REG_POSITION(samplecnt_enable, 0x545);
ASSERT_REG_POSITION(point_size, 0x546);
ASSERT_REG_POSITION(point_sprite_enable, 0x548);
ASSERT_REG_POSITION(counter_reset, 0x54C);
ASSERT_REG_POSITION(zeta_enable, 0x54E);
ASSERT_REG_POSITION(multisample_control, 0x54F);
ASSERT_REG_POSITION(condition, 0x554);

View file

@ -17,6 +17,10 @@ class MemoryManager;
namespace VideoCore {
enum class QueryType {
SamplesPassed,
};
enum class LoadCallbackStage {
Prepare,
Decompile,
@ -41,6 +45,12 @@ public:
/// Dispatches a compute shader invocation
virtual void DispatchCompute(GPUVAddr code_addr) = 0;
/// Resets the counter of a query
virtual void ResetCounter(QueryType type) = 0;
/// Returns the value of a GPU query
virtual u64 Query(QueryType type) = 0;
/// Notify rasterizer that all caches should be flushed to Switch memory
virtual void FlushAll() = 0;

View file

@ -0,0 +1,59 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_query_cache.h"
namespace OpenGL {
HostCounter::HostCounter(GLenum target) {
query.Create(target);
}
HostCounter::~HostCounter() = default;
void HostCounter::UpdateState(bool enabled) {
if (enabled) {
Enable();
} else {
Disable();
}
}
void HostCounter::Reset() {
counter = 0;
Disable();
}
u64 HostCounter::Query() {
if (!is_beginned) {
return counter;
}
Disable();
u64 value;
glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value);
Enable();
counter += value;
return counter;
}
void HostCounter::Enable() {
if (is_beginned) {
return;
}
is_beginned = true;
glBeginQuery(GL_SAMPLES_PASSED, query.handle);
}
void HostCounter::Disable() {
if (!is_beginned) {
return;
}
glEndQuery(GL_SAMPLES_PASSED);
is_beginned = false;
}
} // namespace OpenGL

View file

@ -0,0 +1,41 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class HostCounter final {
public:
explicit HostCounter(GLenum target);
~HostCounter();
/// Enables or disables the counter as required.
void UpdateState(bool enabled);
/// Resets the counter disabling it if needed.
void Reset();
/// Returns the current value of the query.
/// @note It may harm precision of future queries if the counter is not disabled.
u64 Query();
private:
/// Enables the counter when disabled.
void Enable();
/// Disables the counter when enabled.
void Disable();
OGLQuery query; ///< OpenGL query.
u64 counter{}; ///< Added values of the counter.
bool is_beginned{}; ///< True when the OpenGL query is beginned.
};
} // namespace OpenGL

View file

@ -547,6 +547,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
samples_passed.UpdateState(regs.samplecnt_enable);
SyncRasterizeEnable(state);
SyncColorMask();
SyncFragmentColorClampState();
@ -709,6 +712,27 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
}
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
switch (type) {
case VideoCore::QueryType::SamplesPassed:
samples_passed.Reset();
break;
default:
UNIMPLEMENTED_MSG("type={}", static_cast<u32>(type));
break;
}
}
u64 RasterizerOpenGL::Query(VideoCore::QueryType type) {
switch (type) {
case VideoCore::QueryType::SamplesPassed:
return samples_passed.Query();
default:
UNIMPLEMENTED_MSG("type={}", static_cast<u32>(type));
return 1;
}
}
void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {

View file

@ -24,6 +24,7 @@
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_sampler_cache.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
@ -61,6 +62,8 @@ public:
bool DrawMultiBatch(bool is_indexed) override;
void Clear() override;
void DispatchCompute(GPUVAddr code_addr) override;
void ResetCounter(VideoCore::QueryType type) override;
u64 Query(VideoCore::QueryType type) override;
void FlushAll() override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
@ -221,6 +224,8 @@ private:
GLintptr SetupIndexBuffer();
void SetupShaders(GLenum primitive_mode);
HostCounter samples_passed{GL_SAMPLES_PASSED};
};
} // namespace OpenGL