shader: Add support for native 16-bit floats

This commit is contained in:
ReinUsesLisp 2021-06-16 03:03:08 -03:00 committed by ameerj
parent 376aa94819
commit cbbca26d18
9 changed files with 50 additions and 14 deletions

View file

@ -211,6 +211,7 @@ add_library(shader_recompiler STATIC
frontend/maxwell/translate/translate.h frontend/maxwell/translate/translate.h
frontend/maxwell/translate_program.cpp frontend/maxwell/translate_program.cpp
frontend/maxwell/translate_program.h frontend/maxwell/translate_program.h
host_translate_info.h
ir_opt/collect_shader_info_pass.cpp ir_opt/collect_shader_info_pass.cpp
ir_opt/constant_propagation_pass.cpp ir_opt/constant_propagation_pass.cpp
ir_opt/dead_code_elimination_pass.cpp ir_opt/dead_code_elimination_pass.cpp

View file

@ -13,6 +13,7 @@
#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
#include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h"
#include "shader_recompiler/frontend/maxwell/translate_program.h" #include "shader_recompiler/frontend/maxwell/translate_program.h"
#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Maxwell { namespace Shader::Maxwell {
@ -120,7 +121,7 @@ void AddNVNStorageBuffers(IR::Program& program) {
} // Anonymous namespace } // Anonymous namespace
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
Environment& env, Flow::CFG& cfg) { Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
IR::Program program; IR::Program program;
program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
program.blocks = GenerateBlocks(program.syntax_list); program.blocks = GenerateBlocks(program.syntax_list);
@ -150,8 +151,9 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
RemoveUnreachableBlocks(program); RemoveUnreachableBlocks(program);
// Replace instructions before the SSA rewrite // Replace instructions before the SSA rewrite
Optimization::LowerFp16ToFp32(program); if (!host_info.support_float16) {
Optimization::LowerFp16ToFp32(program);
}
Optimization::SsaRewritePass(program); Optimization::SsaRewritePass(program);
Optimization::GlobalMemoryToStorageBufferPass(program); Optimization::GlobalMemoryToStorageBufferPass(program);

View file

@ -8,13 +8,14 @@
#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/object_pool.h" #include "shader_recompiler/object_pool.h"
namespace Shader::Maxwell { namespace Shader::Maxwell {
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, [[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, Environment& env, ObjectPool<IR::Block>& block_pool, Environment& env,
Flow::CFG& cfg); Flow::CFG& cfg, const HostTranslateInfo& host_info);
[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
Environment& env_vertex_b); Environment& env_vertex_b);

View file

@ -0,0 +1,18 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
namespace Shader {
// Try to keep entries here to a minimum
// They can accidentally change the cached information in a shader
/// Misc information about the host
struct HostTranslateInfo {
bool support_float16{}; ///< True when the device supports 16-bit floats
bool support_int64{}; ///< True when the device supports 64-bit integers
};
} // namespace Shader

View file

@ -201,6 +201,10 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
.has_gl_precise_bug = device.HasPreciseBug(), .has_gl_precise_bug = device.HasPreciseBug(),
.ignore_nan_fp_comparisons = true, .ignore_nan_fp_comparisons = true,
},
host_info{
.support_float16 = false,
.support_int64 = true,
} { } {
if (use_asynchronous_shaders) { if (use_asynchronous_shaders) {
workers = CreateWorkers(); workers = CreateWorkers();
@ -373,15 +377,15 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
if (!uses_vertex_a || index != 1) { if (!uses_vertex_a || index != 1) {
// Normal path // Normal path
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
for (const auto& desc : programs[index].info.storage_buffers_descriptors) { for (const auto& desc : programs[index].info.storage_buffers_descriptors) {
total_storage_buffers += desc.count; total_storage_buffers += desc.count;
} }
} else { } else {
// VertexB path when VertexA is present. // VertexB path when VertexA is present.
Shader::IR::Program& program_va{programs[0]}; auto& program_va{programs[0]};
Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
for (const auto& desc : program_vb.info.storage_buffers_descriptors) { for (const auto& desc : program_vb.info.storage_buffers_descriptors) {
total_storage_buffers += desc.count; total_storage_buffers += desc.count;
} }
@ -449,7 +453,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
u32 num_storage_buffers{}; u32 num_storage_buffers{};
for (const auto& desc : program.info.storage_buffers_descriptors) { for (const auto& desc : program.info.storage_buffers_descriptors) {

View file

@ -14,6 +14,7 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "common/thread_worker.h" #include "common/thread_worker.h"
#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/object_pool.h" #include "shader_recompiler/object_pool.h"
#include "video_core/engines/shader_type.h" #include "video_core/engines/shader_type.h"
#include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h"
@ -82,6 +83,8 @@ private:
std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache; std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache;
Shader::Profile profile; Shader::Profile profile;
Shader::HostTranslateInfo host_info;
std::filesystem::path shader_cache_filename; std::filesystem::path shader_cache_filename;
std::unique_ptr<ShaderWorker> workers; std::unique_ptr<ShaderWorker> workers;
}; };

View file

@ -307,6 +307,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
.has_broken_signed_operations = false, .has_broken_signed_operations = false,
.ignore_nan_fp_comparisons = false, .ignore_nan_fp_comparisons = false,
}; };
host_info = Shader::HostTranslateInfo{
.support_float16 = device.IsFloat16Supported(),
.support_int64 = true,
};
} }
PipelineCache::~PipelineCache() = default; PipelineCache::~PipelineCache() = default;
@ -484,11 +488,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
if (!uses_vertex_a || index != 1) { if (!uses_vertex_a || index != 1) {
// Normal path // Normal path
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
} else { } else {
// VertexB path when VertexA is present. // VertexB path when VertexA is present.
Shader::IR::Program& program_va{programs[0]}; auto& program_va{programs[0]};
Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
} }
} }
@ -575,7 +579,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
const std::vector<u32> code{EmitSPIRV(profile, program)}; const std::vector<u32> code{EmitSPIRV(profile, program)};
device.SaveShader(code); device.SaveShader(code);
vk::ShaderModule spv_module{BuildShader(device, code)}; vk::ShaderModule spv_module{BuildShader(device, code)};

View file

@ -19,6 +19,7 @@
#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/object_pool.h" #include "shader_recompiler/object_pool.h"
#include "shader_recompiler/profile.h" #include "shader_recompiler/profile.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
@ -157,6 +158,8 @@ private:
ShaderPools main_pools; ShaderPools main_pools;
Shader::Profile profile; Shader::Profile profile;
Shader::HostTranslateInfo host_info;
std::filesystem::path pipeline_cache_filename; std::filesystem::path pipeline_cache_filename;
Common::ThreadWorker workers; Common::ThreadWorker workers;

View file

@ -497,8 +497,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
} }
if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
// Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
// LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math");
// is_float16_supported = false; is_float16_supported = false;
} }
graphics_queue = logical.GetQueue(graphics_family); graphics_queue = logical.GetQueue(graphics_family);