From ddd82ef42b7bb7bea4c80edeb96bca8512580df3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 24 Apr 2020 01:44:14 -0300 Subject: [PATCH] shader/memory_util: Deduplicate code Deduplicate code shared between vk_pipeline_cache and gl_shader_cache as well as shader decoder code. While we are at it, fix a bug in gl_shader_cache where compute shaders had an start offset of a stage shader. --- src/video_core/CMakeLists.txt | 2 + .../renderer_opengl/gl_shader_cache.cpp | 82 +++---------------- .../renderer_vulkan/vk_pipeline_cache.cpp | 69 +++------------- .../renderer_vulkan/vk_pipeline_cache.h | 8 +- src/video_core/shader/control_flow.cpp | 12 +-- src/video_core/shader/decode.cpp | 12 +-- src/video_core/shader/memory_util.cpp | 77 +++++++++++++++++ src/video_core/shader/memory_util.h | 47 +++++++++++ src/video_core/shader/shader_ir.h | 3 +- 9 files changed, 153 insertions(+), 159 deletions(-) create mode 100644 src/video_core/shader/memory_util.cpp create mode 100644 src/video_core/shader/memory_util.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 8ede4ba9b..ff53282c9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -124,6 +124,8 @@ add_library(video_core STATIC shader/decode.cpp shader/expr.cpp shader/expr.h + shader/memory_util.cpp + shader/memory_util.h shader/node_helper.cpp shader/node_helper.h shader/node.h diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f63156b8d..9759a7078 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -10,8 +10,6 @@ #include #include -#include - #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" @@ -28,76 +26,26 @@ #include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/utils.h" +#include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace OpenGL { using Tegra::Engines::ShaderType; +using VideoCommon::Shader::GetShaderAddress; +using VideoCommon::Shader::GetShaderCode; +using VideoCommon::Shader::GetUniqueIdentifier; +using VideoCommon::Shader::KERNEL_MAIN_OFFSET; using VideoCommon::Shader::ProgramCode; using VideoCommon::Shader::Registry; using VideoCommon::Shader::ShaderIR; +using VideoCommon::Shader::STAGE_MAIN_OFFSET; namespace { -constexpr u32 STAGE_MAIN_OFFSET = 10; -constexpr u32 KERNEL_MAIN_OFFSET = 0; - constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; -/// Gets the address for the specified shader stage program -GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { - const auto& gpu{system.GPU().Maxwell3D()}; - const auto& shader_config{gpu.regs.shader_config[static_cast(program)]}; - return gpu.regs.code_address.CodeAddress() + shader_config.offset; -} - -/// Gets if the current instruction offset is a scheduler instruction -constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { - // Sched instructions appear once every 4 instructions. - constexpr std::size_t SchedPeriod = 4; - const std::size_t absolute_offset = offset - main_offset; - return (absolute_offset % SchedPeriod) == 0; -} - -/// Calculates the size of a program stream -std::size_t CalculateProgramSize(const ProgramCode& program) { - constexpr std::size_t start_offset = 10; - // This is the encoded version of BRA that jumps to itself. All Nvidia - // shaders end with one. - constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; - constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; - std::size_t offset = start_offset; - while (offset < program.size()) { - const u64 instruction = program[offset]; - if (!IsSchedInstruction(offset, start_offset)) { - if ((instruction & mask) == self_jumping_branch) { - // End on Maxwell's "nop" instruction - break; - } - if (instruction == 0) { - break; - } - } - offset++; - } - // The last instruction is included in the program size - return std::min(offset + 1, program.size()); -} - -/// Gets the shader program code from memory for the specified address -ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, - const u8* host_ptr) { - ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); - ASSERT_OR_EXECUTE(host_ptr != nullptr, { - std::fill(code.begin(), code.end(), 0); - return code; - }); - memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64)); - code.resize(CalculateProgramSize(code)); - return code; -} - /// Gets the shader type from a Maxwell program type constexpr GLenum GetGLShaderType(ShaderType shader_type) { switch (shader_type) { @@ -114,17 +62,6 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) { } } -/// Hashes one (or two) program streams -u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code, - const ProgramCode& code_b = {}) { - u64 unique_identifier = boost::hash_value(code); - if (is_a) { - // VertexA programs include two programs - boost::hash_combine(unique_identifier, boost::hash_value(code_b)); - } - return unique_identifier; -} - constexpr const char* GetShaderTypeName(ShaderType shader_type) { switch (shader_type) { case ShaderType::Vertex: @@ -456,11 +393,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { const auto host_ptr{memory_manager.GetPointer(address)}; // No shader found - create a new one - ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; + ProgramCode code{GetShaderCode(memory_manager, address, host_ptr, false)}; ProgramCode code_b; if (program == Maxwell::ShaderProgram::VertexA) { const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; - code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b)); + const u8* host_ptr_b = memory_manager.GetPointer(address_b); + code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false); } const auto unique_identifier = GetUniqueIdentifier( @@ -498,7 +436,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { const auto host_ptr{memory_manager.GetPointer(code_addr)}; // No kernel found, create a new one - auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; + auto code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; const ShaderParameters params{system, disk_cache, device, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 91b1b16a5..8cedeaeba 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -27,12 +27,18 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader/compiler_settings.h" +#include "video_core/shader/memory_util.h" namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); using Tegra::Engines::ShaderType; +using VideoCommon::Shader::GetShaderAddress; +using VideoCommon::Shader::GetShaderCode; +using VideoCommon::Shader::KERNEL_MAIN_OFFSET; +using VideoCommon::Shader::ProgramCode; +using VideoCommon::Shader::STAGE_MAIN_OFFSET; namespace { @@ -45,60 +51,6 @@ constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ VideoCommon::Shader::CompileDepth::FullDecompile}; -/// Gets the address for the specified shader stage program -GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { - const auto& gpu{system.GPU().Maxwell3D()}; - const auto& shader_config{gpu.regs.shader_config[static_cast(program)]}; - return gpu.regs.code_address.CodeAddress() + shader_config.offset; -} - -/// Gets if the current instruction offset is a scheduler instruction -constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { - // Sched instructions appear once every 4 instructions. - constexpr std::size_t SchedPeriod = 4; - const std::size_t absolute_offset = offset - main_offset; - return (absolute_offset % SchedPeriod) == 0; -} - -/// Calculates the size of a program stream -std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { - const std::size_t start_offset = is_compute ? 0 : 10; - // This is the encoded version of BRA that jumps to itself. All Nvidia - // shaders end with one. - constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; - constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; - std::size_t offset = start_offset; - while (offset < program.size()) { - const u64 instruction = program[offset]; - if (!IsSchedInstruction(offset, start_offset)) { - if ((instruction & mask) == self_jumping_branch) { - // End on Maxwell's "nop" instruction - break; - } - if (instruction == 0) { - break; - } - } - ++offset; - } - // The last instruction is included in the program size - return std::min(offset + 1, program.size()); -} - -/// Gets the shader program code from memory for the specified address -ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, - const u8* host_ptr, bool is_compute) { - ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); - ASSERT_OR_EXECUTE(host_ptr != nullptr, { - std::fill(program_code.begin(), program_code.end(), 0); - return program_code; - }); - memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), - program_code.size() * sizeof(u64)); - program_code.resize(CalculateProgramSize(program_code, is_compute)); - return program_code; -} - constexpr std::size_t GetStageFromProgram(std::size_t program) { return program == 0 ? 0 : program - 1; } @@ -212,9 +164,9 @@ std::array VKPipelineCache::GetShaders() { const auto host_ptr{memory_manager.GetPointer(program_addr)}; // No shader found - create a new one - constexpr u32 stage_offset = 10; + constexpr u32 stage_offset = STAGE_MAIN_OFFSET; const auto stage = static_cast(index == 0 ? 0 : index - 1); - auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); + ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false); shader = std::make_shared(system, stage, program_addr, *cpu_addr, std::move(code), stage_offset); @@ -270,11 +222,10 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach // No shader found - create a new one const auto host_ptr = memory_manager.GetPointer(program_addr); - auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); - constexpr u32 kernel_main_offset = 0; + ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true); shader = std::make_shared(system, Tegra::Engines::ShaderType::Compute, program_addr, *cpu_addr, std::move(code), - kernel_main_offset); + KERNEL_MAIN_OFFSET); if (cpu_addr) { Register(shader); } else { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 602a0a340..2cce53162 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -25,6 +25,7 @@ #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" #include "video_core/surface.h" @@ -47,8 +48,6 @@ class CachedShader; using Shader = std::shared_ptr; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using ProgramCode = std::vector; - struct GraphicsPipelineCacheKey { FixedPipelineState fixed_state; std::array shaders; @@ -113,7 +112,8 @@ namespace Vulkan { class CachedShader final : public RasterizerCacheObject { public: explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, - VAddr cpu_addr, ProgramCode program_code, u32 main_offset); + VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code, + u32 main_offset); ~CachedShader(); GPUVAddr GetGpuAddr() const { @@ -145,7 +145,7 @@ private: Tegra::Engines::ShaderType stage); GPUVAddr gpu_addr{}; - ProgramCode program_code; + VideoCommon::Shader::ProgramCode program_code; VideoCommon::Shader::Registry registry; VideoCommon::Shader::ShaderIR shader_ir; ShaderEntries entries; diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index e00a3fb70..8d86020f6 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -13,6 +13,7 @@ #include "common/common_types.h" #include "video_core/shader/ast.h" #include "video_core/shader/control_flow.h" +#include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" @@ -115,17 +116,6 @@ Pred GetPredicate(u32 index, bool negated) { return static_cast(static_cast(index) + (negated ? 8ULL : 0ULL)); } -/** - * Returns whether the instruction at the specified offset is a 'sched' instruction. - * Sched instructions always appear before a sequence of 3 instructions. - */ -constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { - constexpr u32 SchedPeriod = 4; - u32 absolute_offset = offset - main_offset; - - return (absolute_offset % SchedPeriod) == 0; -} - enum class ParseResult : u32 { ControlCaught, BlockEnd, diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 87ac9ac6c..1167ff4ec 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -13,6 +13,7 @@ #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" #include "video_core/shader/control_flow.h" +#include "video_core/shader/memory_util.h" #include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" @@ -23,17 +24,6 @@ using Tegra::Shader::OpCode; namespace { -/** - * Returns whether the instruction at the specified offset is a 'sched' instruction. - * Sched instructions always appear before a sequence of 3 instructions. - */ -constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { - constexpr u32 SchedPeriod = 4; - u32 absolute_offset = offset - main_offset; - - return (absolute_offset % SchedPeriod) == 0; -} - void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, const std::list& used_samplers) { if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp new file mode 100644 index 000000000..074f21691 --- /dev/null +++ b/src/video_core/shader/memory_util.cpp @@ -0,0 +1,77 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include + +#include "common/common_types.h" +#include "core/core.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/shader/memory_util.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +GPUVAddr GetShaderAddress(Core::System& system, + Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) { + const auto& gpu{system.GPU().Maxwell3D()}; + const auto& shader_config{gpu.regs.shader_config[static_cast(program)]}; + return gpu.regs.code_address.CodeAddress() + shader_config.offset; +} + +bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { + // Sched instructions appear once every 4 instructions. + constexpr std::size_t SchedPeriod = 4; + const std::size_t absolute_offset = offset - main_offset; + return (absolute_offset % SchedPeriod) == 0; +} + +std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { + // This is the encoded version of BRA that jumps to itself. All Nvidia + // shaders end with one. + static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL; + static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL; + + const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; + std::size_t offset = start_offset; + while (offset < program.size()) { + const u64 instruction = program[offset]; + if (!IsSchedInstruction(offset, start_offset)) { + if ((instruction & MASK) == SELF_JUMPING_BRANCH) { + // End on Maxwell's "nop" instruction + break; + } + if (instruction == 0) { + break; + } + } + ++offset; + } + // The last instruction is included in the program size + return std::min(offset + 1, program.size()); +} + +ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, + const u8* host_ptr, bool is_compute) { + ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); + ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; }); + memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64)); + code.resize(CalculateProgramSize(code, is_compute)); + return code; +} + +u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, + const ProgramCode& code_b) { + u64 unique_identifier = boost::hash_value(code); + if (is_a) { + // VertexA programs include two programs + boost::hash_combine(unique_identifier, boost::hash_value(code_b)); + } + return unique_identifier; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h new file mode 100644 index 000000000..be90d24fd --- /dev/null +++ b/src/video_core/shader/memory_util.h @@ -0,0 +1,47 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_type.h" + +namespace Core { +class System; +} + +namespace Tegra { +class MemoryManager; +} + +namespace VideoCommon::Shader { + +using ProgramCode = std::vector; + +constexpr u32 STAGE_MAIN_OFFSET = 10; +constexpr u32 KERNEL_MAIN_OFFSET = 0; + +/// Gets the address for the specified shader stage program +GPUVAddr GetShaderAddress(Core::System& system, + Tegra::Engines::Maxwell3D::Regs::ShaderProgram program); + +/// Gets if the current instruction offset is a scheduler instruction +bool IsSchedInstruction(std::size_t offset, std::size_t main_offset); + +/// Calculates the size of a program stream +std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute); + +/// Gets the shader program code from memory for the specified address +ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, + const u8* host_ptr, bool is_compute); + +/// Hashes one (or two) program streams +u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, + const ProgramCode& code_b = {}); + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index c6e7bdf50..69de5e68b 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -18,6 +18,7 @@ #include "video_core/engines/shader_header.h" #include "video_core/shader/ast.h" #include "video_core/shader/compiler_settings.h" +#include "video_core/shader/memory_util.h" #include "video_core/shader/node.h" #include "video_core/shader/registry.h" @@ -25,8 +26,6 @@ namespace VideoCommon::Shader { struct ShaderBlock; -using ProgramCode = std::vector; - constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; class ConstBuffer {