Merge pull request #4359 from ReinUsesLisp/clamp-shared

renderer_{opengl,vulkan}: Clamp shared memory to host's limit
This commit is contained in:
Rodrigo Locatti 2020-07-21 04:51:05 -03:00 committed by GitHub
commit 7278c59d70
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 42 additions and 9 deletions

View file

@ -913,11 +913,19 @@ void ARBDecompiler::DeclareCompute() {
const ComputeInfo& info = registry.GetComputeInfo(); const ComputeInfo& info = registry.GetComputeInfo();
AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
info.workgroup_size[2]); info.workgroup_size[2]);
if (info.shared_memory_size_in_words > 0) { if (info.shared_memory_size_in_words == 0) {
const u32 size_in_bytes = info.shared_memory_size_in_words * 4; return;
}
const u32 limit = device.GetMaxComputeSharedMemorySize();
u32 size_in_bytes = info.shared_memory_size_in_words * 4;
if (size_in_bytes > limit) {
LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
size_in_bytes, limit);
size_in_bytes = limit;
}
AddLine("SHARED_MEMORY {};", size_in_bytes); AddLine("SHARED_MEMORY {};", size_in_bytes);
AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
}
} }
void ARBDecompiler::DeclareInputAttributes() { void ARBDecompiler::DeclareInputAttributes() {

View file

@ -212,6 +212,7 @@ Device::Device()
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
GLAD_GL_NV_shader_thread_shuffle; GLAD_GL_NV_shader_thread_shuffle;
has_shader_ballot = GLAD_GL_ARB_shader_ballot; has_shader_ballot = GLAD_GL_ARB_shader_ballot;
@ -250,6 +251,7 @@ Device::Device(std::nullptr_t) {
shader_storage_alignment = 4; shader_storage_alignment = 4;
max_vertex_attributes = 16; max_vertex_attributes = 16;
max_varyings = 15; max_varyings = 15;
max_compute_shared_memory_size = 0x10000;
has_warp_intrinsics = true; has_warp_intrinsics = true;
has_shader_ballot = true; has_shader_ballot = true;
has_vertex_viewport_layer = true; has_vertex_viewport_layer = true;

View file

@ -52,6 +52,10 @@ public:
return max_varyings; return max_varyings;
} }
u32 GetMaxComputeSharedMemorySize() const {
return max_compute_shared_memory_size;
}
bool HasWarpIntrinsics() const { bool HasWarpIntrinsics() const {
return has_warp_intrinsics; return has_warp_intrinsics;
} }
@ -118,6 +122,7 @@ private:
std::size_t shader_storage_alignment{}; std::size_t shader_storage_alignment{};
u32 max_vertex_attributes{}; u32 max_vertex_attributes{};
u32 max_varyings{}; u32 max_varyings{};
u32 max_compute_shared_memory_size{};
bool has_warp_intrinsics{}; bool has_warp_intrinsics{};
bool has_shader_ballot{}; bool has_shader_ballot{};
bool has_vertex_viewport_layer{}; bool has_vertex_viewport_layer{};

View file

@ -602,8 +602,15 @@ private:
return; return;
} }
const auto& info = registry.GetComputeInfo(); const auto& info = registry.GetComputeInfo();
if (const u32 size = info.shared_memory_size_in_words; size > 0) { if (u32 size = info.shared_memory_size_in_words * 4; size > 0) {
code.AddLine("shared uint smem[{}];", size); const u32 limit = device.GetMaxComputeSharedMemorySize();
if (size > limit) {
LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
size, limit);
size = limit;
}
code.AddLine("shared uint smem[{}];", size / 4);
code.AddNewLine(); code.AddNewLine();
} }
code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",

View file

@ -122,6 +122,11 @@ public:
return properties.limits.maxPushConstantsSize; return properties.limits.maxPushConstantsSize;
} }
/// Returns the maximum size for shared memory.
u32 GetMaxComputeSharedMemorySize() const {
return properties.limits.maxComputeSharedMemorySize;
}
/// Returns true if ASTC is natively supported. /// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const { bool IsOptimalAstcSupported() const {
return is_optimal_astc_supported; return is_optimal_astc_supported;

View file

@ -685,13 +685,19 @@ private:
} }
t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint);
const u32 smem_size = specialization.shared_memory_size; u32 smem_size = specialization.shared_memory_size * 4;
if (smem_size == 0) { if (smem_size == 0) {
// Avoid declaring an empty array. // Avoid declaring an empty array.
return; return;
} }
const auto element_count = static_cast<u32>(Common::AlignUp(smem_size, 4) / 4); const u32 limit = device.GetMaxComputeSharedMemorySize();
const Id type_array = TypeArray(t_uint, Constant(t_uint, element_count)); if (smem_size > limit) {
LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}",
smem_size, limit);
smem_size = limit;
}
const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4));
const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array);
Name(type_pointer, "SharedMemory"); Name(type_pointer, "SharedMemory");