Shader_Cache: setup connection of ConstBufferLocker

This commit is contained in:
Fernando Sahmkow 2019-09-23 15:40:58 -04:00 committed by FernandoS27
parent 1a58f45d76
commit acd6441134
10 changed files with 82 additions and 43 deletions

View file

@ -1006,7 +1006,8 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel)
} }
const auto cbuf = entry.GetBindlessCBuf(); const auto cbuf = entry.GetBindlessCBuf();
Tegra::Texture::TextureHandle tex_handle; Tegra::Texture::TextureHandle tex_handle;
tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, cbuf.first, cbuf.second); tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute,
cbuf.first, cbuf.second);
return compute.GetTextureInfo(tex_handle, entry.GetOffset()); return compute.GetTextureInfo(tex_handle, entry.GetOffset());
}(); }();
@ -1051,7 +1052,8 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
} }
const auto cbuf = entry.GetBindlessCBuf(); const auto cbuf = entry.GetBindlessCBuf();
Tegra::Texture::TextureHandle tex_handle; Tegra::Texture::TextureHandle tex_handle;
tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, cbuf.first, cbuf.second); tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute,
cbuf.first, cbuf.second);
return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic;
}(); }();
SetupImage(bindpoint, tic, entry); SetupImage(bindpoint, tic, entry);

View file

@ -10,6 +10,7 @@
#include "common/scope_exit.h" #include "common/scope_exit.h"
#include "core/core.h" #include "core/core.h"
#include "core/frontend/emu_window.h" #include "core/frontend/emu_window.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_rasterizer.h"
@ -173,8 +174,9 @@ u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
} }
/// Creates an unspecialized program from code streams /// Creates an unspecialized program from code streams
GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type, GLShader::ProgramResult CreateProgram(Core::System& system, const Device& device,
ProgramCode program_code, ProgramCode program_code_b) { ProgramType program_type, ProgramCode program_code,
ProgramCode program_code_b) {
GLShader::ShaderSetup setup(program_code); GLShader::ShaderSetup setup(program_code);
setup.program.size_a = CalculateProgramSize(program_code); setup.program.size_a = CalculateProgramSize(program_code);
setup.program.size_b = 0; setup.program.size_b = 0;
@ -190,14 +192,25 @@ GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_
switch (program_type) { switch (program_type) {
case ProgramType::VertexA: case ProgramType::VertexA:
case ProgramType::VertexB: case ProgramType::VertexB: {
return GLShader::GenerateVertexShader(device, setup); VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Vertex,
case ProgramType::Geometry: &(system.GPU().Maxwell3D())};
return GLShader::GenerateGeometryShader(device, setup); return GLShader::GenerateVertexShader(locker, device, setup);
case ProgramType::Fragment: }
return GLShader::GenerateFragmentShader(device, setup); case ProgramType::Geometry: {
case ProgramType::Compute: VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Geometry,
return GLShader::GenerateComputeShader(device, setup); &(system.GPU().Maxwell3D())};
return GLShader::GenerateGeometryShader(locker, device, setup);
}
case ProgramType::Fragment: {
VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Fragment,
&(system.GPU().Maxwell3D())};
return GLShader::GenerateFragmentShader(locker, device, setup);
}
case ProgramType::Compute: {
VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Compute, &(system.GPU().KeplerCompute())};
return GLShader::GenerateComputeShader(locker, device, setup);
}
default: default:
UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
return {}; return {};
@ -307,8 +320,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
ProgramCode&& program_code_b) { ProgramCode&& program_code_b) {
const auto code_size{CalculateProgramSize(program_code)}; const auto code_size{CalculateProgramSize(program_code)};
const auto code_size_b{CalculateProgramSize(program_code_b)}; const auto code_size_b{CalculateProgramSize(program_code_b)};
auto result{ auto result{CreateProgram(params.system, params.device, GetProgramType(program_type),
CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)}; program_code, program_code_b)};
if (result.first.empty()) { if (result.first.empty()) {
// TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
return {}; return {};
@ -331,7 +344,7 @@ Shader CachedShader::CreateStageFromCache(const ShaderParameters& params,
} }
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) {
auto result{CreateProgram(params.device, ProgramType::Compute, code, {})}; auto result{CreateProgram(params.system, params.device, ProgramType::Compute, code, {})};
const auto code_size{CalculateProgramSize(code)}; const auto code_size{CalculateProgramSize(code)};
params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute,
@ -566,7 +579,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
result = {stored_decompiled.code, stored_decompiled.entries}; result = {stored_decompiled.code, stored_decompiled.entries};
} else { } else {
// Otherwise decompile the shader at boot and save the result to the decompiled file // Otherwise decompile the shader at boot and save the result to the decompiled file
result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(), result = CreateProgram(system, device, raw.GetProgramType(), raw.GetProgramCode(),
raw.GetProgramCodeB()); raw.GetProgramCodeB());
disk_cache.SaveDecompiled(unique_identifier, result.first, result.second); disk_cache.SaveDecompiled(unique_identifier, result.first, result.second);
} }
@ -612,7 +625,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const auto unique_identifier = const auto unique_identifier =
GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr,
host_ptr, unique_identifier}; host_ptr, unique_identifier};
const auto found = precompiled_shaders.find(unique_identifier); const auto found = precompiled_shaders.find(unique_identifier);
@ -639,7 +652,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr,
host_ptr, unique_identifier}; host_ptr, unique_identifier};
const auto found = precompiled_shaders.find(unique_identifier); const auto found = precompiled_shaders.find(unique_identifier);

View file

@ -45,6 +45,7 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
struct ShaderParameters { struct ShaderParameters {
ShaderDiskCacheOpenGL& disk_cache; ShaderDiskCacheOpenGL& disk_cache;
const PrecompiledPrograms& precompiled_programs; const PrecompiledPrograms& precompiled_programs;
Core::System& system;
const Device& device; const Device& device;
VAddr cpu_addr; VAddr cpu_addr;
u8* host_ptr; u8* host_ptr;

View file

@ -21,7 +21,8 @@ static constexpr u32 COMPUTE_OFFSET = 0;
static constexpr CompilerSettings settings{CompileDepth::NoFlowStack, true}; static constexpr CompilerSettings settings{CompileDepth::NoFlowStack, true};
ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device,
const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: VS" + id + "\n\n"; std::string out = "// Shader Unique Id: VS" + id + "\n\n";
@ -35,14 +36,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
)"; )";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings,
locker);
const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB;
ProgramResult program = Decompile(device, program_ir, stage, "vertex"); ProgramResult program = Decompile(device, program_ir, stage, "vertex");
out += program.first; out += program.first;
if (setup.IsDualProgram()) { if (setup.IsDualProgram()) {
const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b, const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b,
settings); settings, locker);
ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");
out += program_b.first; out += program_b.first;
} }
@ -71,7 +73,8 @@ void main() {
return {std::move(out), std::move(program.second)}; return {std::move(out), std::move(program.second)};
} }
ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) { ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device,
const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: GS" + id + "\n\n"; std::string out = "// Shader Unique Id: GS" + id + "\n\n";
@ -85,7 +88,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
)"; )";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings,
locker);
ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
out += program.first; out += program.first;
@ -97,7 +101,8 @@ void main() {
return {std::move(out), std::move(program.second)}; return {std::move(out), std::move(program.second)};
} }
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) { ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device,
const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: FS" + id + "\n\n"; std::string out = "// Shader Unique Id: FS" + id + "\n\n";
@ -120,7 +125,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
)"; )";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings,
locker);
ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
out += program.first; out += program.first;
@ -133,13 +139,15 @@ void main() {
return {std::move(out), std::move(program.second)}; return {std::move(out), std::move(program.second)};
} }
ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device,
const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: CS" + id + "\n\n"; std::string out = "// Shader Unique Id: CS" + id + "\n\n";
out += GetCommonDeclarations(); out += GetCommonDeclarations();
const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings); const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings,
locker);
ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute");
out += program.first; out += program.first;

View file

@ -16,6 +16,7 @@ class Device;
namespace OpenGL::GLShader { namespace OpenGL::GLShader {
using VideoCommon::Shader::ConstBufferLocker;
using VideoCommon::Shader::ProgramCode; using VideoCommon::Shader::ProgramCode;
struct ShaderSetup { struct ShaderSetup {
@ -46,15 +47,19 @@ private:
}; };
/// Generates the GLSL vertex shader program source code for the given VS program /// Generates the GLSL vertex shader program source code for the given VS program
ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup); ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device,
const ShaderSetup& setup);
/// Generates the GLSL geometry shader program source code for the given GS program /// Generates the GLSL geometry shader program source code for the given GS program
ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup); ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device,
const ShaderSetup& setup);
/// Generates the GLSL fragment shader program source code for the given FS program /// Generates the GLSL fragment shader program source code for the given FS program
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device,
const ShaderSetup& setup);
/// Generates the GLSL compute shader program source code for the given CS program /// Generates the GLSL compute shader program source code for the given CS program
ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device,
const ShaderSetup& setup);
} // namespace OpenGL::GLShader } // namespace OpenGL::GLShader

View file

@ -57,8 +57,8 @@ struct BlockInfo {
struct CFGRebuildState { struct CFGRebuildState {
explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size,
const u32 start) const u32 start, ConstBufferLocker& locker)
: start{start}, program_code{program_code}, program_size{program_size} {} : start{start}, program_code{program_code}, program_size{program_size}, locker{locker} {}
u32 start{}; u32 start{};
std::vector<BlockInfo> block_info{}; std::vector<BlockInfo> block_info{};
@ -72,6 +72,7 @@ struct CFGRebuildState {
const ProgramCode& program_code; const ProgramCode& program_code;
const std::size_t program_size; const std::size_t program_size;
ASTManager* manager; ASTManager* manager;
ConstBufferLocker& locker;
}; };
enum class BlockCollision : u32 { None, Found, Inside }; enum class BlockCollision : u32 { None, Found, Inside };
@ -214,7 +215,7 @@ std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState&
if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) { if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) {
if (instr.gpr0.Value() == track_register) { if (instr.gpr0.Value() == track_register) {
track_register = instr.gpr8.Value(); track_register = instr.gpr8.Value();
result.entries = instr.alu.GetSignedImm20_20(); result.entries = instr.alu.GetSignedImm20_20() + 1;
pos--; pos--;
found_track = true; found_track = true;
break; break;
@ -406,8 +407,14 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
auto tmp = TrackBranchIndirectInfo(state, address, offset); auto tmp = TrackBranchIndirectInfo(state, address, offset);
if (tmp) { if (tmp) {
auto result = *tmp; auto result = *tmp;
LOG_CRITICAL(HW_GPU, "Track Successful, BRX: buffer:{}, offset:{}, entries:{}", std::string entries{};
result.buffer, result.offset, result.entries); for (u32 i = 0; i < result.entries; i++) {
auto k = locker.ObtainKey(result.buffer, result.offset + i * 4);
entries = entries + std::to_string(*k) + '\n';
}
LOG_CRITICAL(HW_GPU,
"Track Successful, BRX: buffer:{}, offset:{}, entries:{}, inner:\n{}",
result.buffer, result.offset, result.entries, entries);
} else { } else {
LOG_CRITICAL(HW_GPU, "Track Unsuccesful"); LOG_CRITICAL(HW_GPU, "Track Unsuccesful");
} }
@ -588,14 +595,15 @@ void DecompileShader(CFGRebuildState& state) {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
std::size_t program_size, u32 start_address, std::size_t program_size, u32 start_address,
const CompilerSettings& settings) { const CompilerSettings& settings,
ConstBufferLocker& locker) {
auto result_out = std::make_unique<ShaderCharacteristics>(); auto result_out = std::make_unique<ShaderCharacteristics>();
if (settings.depth == CompileDepth::BruteForce) { if (settings.depth == CompileDepth::BruteForce) {
result_out->settings.depth = CompileDepth::BruteForce; result_out->settings.depth = CompileDepth::BruteForce;
return result_out; return result_out;
} }
CFGRebuildState state{program_code, program_size, start_address}; CFGRebuildState state{program_code, program_size, start_address, locker};
// Inspect Code and generate blocks // Inspect Code and generate blocks
state.labels.clear(); state.labels.clear();
state.labels.emplace(start_address); state.labels.emplace(start_address);

View file

@ -78,6 +78,7 @@ struct ShaderCharacteristics {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
std::size_t program_size, u32 start_address, std::size_t program_size, u32 start_address,
const CompilerSettings& settings); const CompilerSettings& settings,
ConstBufferLocker& locker);
} // namespace VideoCommon::Shader } // namespace VideoCommon::Shader

View file

@ -102,7 +102,7 @@ void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
decompiled = false; decompiled = false;
auto info = ScanFlow(program_code, program_size, main_offset, settings); auto info = ScanFlow(program_code, program_size, main_offset, settings, locker);
auto& shader_info = *info; auto& shader_info = *info;
coverage_begin = shader_info.start; coverage_begin = shader_info.start;
coverage_end = shader_info.end; coverage_end = shader_info.end;

View file

@ -23,9 +23,9 @@ using Tegra::Shader::PredOperation;
using Tegra::Shader::Register; using Tegra::Shader::Register;
ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size, ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size,
CompilerSettings settings) CompilerSettings settings, ConstBufferLocker& locker)
: program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{}, : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{},
program_manager{true, true}, settings{settings} { program_manager{true, true}, settings{settings}, locker{locker} {
Decode(); Decode();
} }

View file

@ -68,7 +68,7 @@ struct GlobalMemoryUsage {
class ShaderIR final { class ShaderIR final {
public: public:
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size, explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size,
CompilerSettings settings); CompilerSettings settings, ConstBufferLocker& locker);
~ShaderIR(); ~ShaderIR();
const std::map<u32, NodeBlock>& GetBasicBlocks() const { const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@ -389,6 +389,7 @@ private:
NodeBlock global_code; NodeBlock global_code;
ASTManager program_manager; ASTManager program_manager;
CompilerSettings settings{}; CompilerSettings settings{};
ConstBufferLocker& locker;
std::set<u32> used_registers; std::set<u32> used_registers;
std::set<Tegra::Shader::Pred> used_predicates; std::set<Tegra::Shader::Pred> used_predicates;