Merge pull request #4359 from ReinUsesLisp/clamp-shared

renderer_{opengl,vulkan}: Clamp shared memory to host's limit
2024-11-08 12:09:58 +00:00 · 2020-07-21 04:51:05 -03:00 · 2020-07-21 04:51:05 -03:00 · 7278c59d70
commit 7278c59d70
parent 721e6015a8 a5a72cbd20
6 changed files with 42 additions and 9 deletions
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@ -913,11 +913,19 @@ void ARBDecompiler::DeclareCompute() {
    const ComputeInfo& info = registry.GetComputeInfo();
    AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
            info.workgroup_size[2]);
-    if (info.shared_memory_size_in_words > 0) {
+    if (info.shared_memory_size_in_words == 0) {
-        const u32 size_in_bytes = info.shared_memory_size_in_words * 4;
+        return;
    }
    const u32 limit = device.GetMaxComputeSharedMemorySize();
    u32 size_in_bytes = info.shared_memory_size_in_words * 4;
    if (size_in_bytes > limit) {
        LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
                  size_in_bytes, limit);
        size_in_bytes = limit;
    }
    AddLine("SHARED_MEMORY {};", size_in_bytes);
    AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
    }
 }
 void ARBDecompiler::DeclareInputAttributes() {
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@ -212,6 +212,7 @@ Device::Device()
    shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
    max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
    max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
    max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
    has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
                          GLAD_GL_NV_shader_thread_shuffle;
    has_shader_ballot = GLAD_GL_ARB_shader_ballot;
@ -250,6 +251,7 @@ Device::Device(std::nullptr_t) {
    shader_storage_alignment = 4;
    max_vertex_attributes = 16;
    max_varyings = 15;
    max_compute_shared_memory_size = 0x10000;
    has_warp_intrinsics = true;
    has_shader_ballot = true;
    has_vertex_viewport_layer = true;
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@ -52,6 +52,10 @@ public:
        return max_varyings;
    }
    u32 GetMaxComputeSharedMemorySize() const {
        return max_compute_shared_memory_size;
    }
    bool HasWarpIntrinsics() const {
        return has_warp_intrinsics;
    }
@ -118,6 +122,7 @@ private:
    std::size_t shader_storage_alignment{};
    u32 max_vertex_attributes{};
    u32 max_varyings{};
    u32 max_compute_shared_memory_size{};
    bool has_warp_intrinsics{};
    bool has_shader_ballot{};
    bool has_vertex_viewport_layer{};
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@ -602,8 +602,15 @@ private:
            return;
        }
        const auto& info = registry.GetComputeInfo();
-        if (const u32 size = info.shared_memory_size_in_words; size > 0) {
+        if (u32 size = info.shared_memory_size_in_words * 4; size > 0) {
-            code.AddLine("shared uint smem[{}];", size);
+            const u32 limit = device.GetMaxComputeSharedMemorySize();
            if (size > limit) {
                LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
                          size, limit);
                size = limit;
            }
            code.AddLine("shared uint smem[{}];", size / 4);
            code.AddNewLine();
        }
        code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@ -122,6 +122,11 @@ public:
        return properties.limits.maxPushConstantsSize;
    }
    /// Returns the maximum size for shared memory.
    u32 GetMaxComputeSharedMemorySize() const {
        return properties.limits.maxComputeSharedMemorySize;
    }
    /// Returns true if ASTC is natively supported.
    bool IsOptimalAstcSupported() const {
        return is_optimal_astc_supported;
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@ -685,13 +685,19 @@ private:
        }
        t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint);
-        const u32 smem_size = specialization.shared_memory_size;
+        u32 smem_size = specialization.shared_memory_size * 4;
        if (smem_size == 0) {
            // Avoid declaring an empty array.
            return;
        }
-        const auto element_count = static_cast<u32>(Common::AlignUp(smem_size, 4) / 4);
+        const u32 limit = device.GetMaxComputeSharedMemorySize();
-        const Id type_array = TypeArray(t_uint, Constant(t_uint, element_count));
+        if (smem_size > limit) {
            LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}",
                      smem_size, limit);
            smem_size = limit;
        }
        const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4));
        const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array);
        Name(type_pointer, "SharedMemory");