From 93cf2b3ca8edeb1e8f1e00182f920b8d50664ed5 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 7 Feb 2023 21:33:57 -0500 Subject: [PATCH] texture_cache: OpenGL: Implement MSAA uploads and copies --- src/video_core/host_shaders/CMakeLists.txt | 2 ++ .../convert_msaa_to_non_msaa.comp | 30 +++++++++++++++++ .../convert_non_msaa_to_msaa.comp | 29 ++++++++++++++++ .../renderer_opengl/gl_texture_cache.cpp | 8 +++++ .../renderer_opengl/gl_texture_cache.h | 9 ++++- .../renderer_opengl/util_shaders.cpp | 33 ++++++++++++++++++- src/video_core/renderer_opengl/util_shaders.h | 5 +++ .../renderer_vulkan/vk_texture_cache.cpp | 5 +++ .../renderer_vulkan/vk_texture_cache.h | 7 ++++ src/video_core/texture_cache/formatter.cpp | 3 ++ src/video_core/texture_cache/texture_cache.h | 14 ++++---- src/video_core/texture_cache/util.cpp | 5 --- 12 files changed, 136 insertions(+), 14 deletions(-) create mode 100644 src/video_core/host_shaders/convert_msaa_to_non_msaa.comp create mode 100644 src/video_core/host_shaders/convert_non_msaa_to_msaa.comp diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 52cd5bb81..2442c3c29 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -22,6 +22,8 @@ set(SHADER_FILES convert_d24s8_to_abgr8.frag convert_depth_to_float.frag convert_float_to_depth.frag + convert_msaa_to_non_msaa.comp + convert_non_msaa_to_msaa.comp convert_s8d24_to_abgr8.frag full_screen_triangle.vert fxaa.frag diff --git a/src/video_core/host_shaders/convert_msaa_to_non_msaa.comp b/src/video_core/host_shaders/convert_msaa_to_non_msaa.comp new file mode 100644 index 000000000..fc3854d18 --- /dev/null +++ b/src/video_core/host_shaders/convert_msaa_to_non_msaa.comp @@ -0,0 +1,30 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 core +layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout (binding = 0, rgba8) uniform readonly restrict image2DMSArray msaa_in; +layout (binding = 1, rgba8) uniform writeonly restrict image2DArray output_img; + +void main() { + const ivec3 coords = ivec3(gl_GlobalInvocationID); + if (any(greaterThanEqual(coords, imageSize(msaa_in)))) { + return; + } + + // TODO: Specialization constants for num_samples? + const int num_samples = imageSamples(msaa_in); + for (int curr_sample = 0; curr_sample < num_samples; ++curr_sample) { + const vec4 pixel = imageLoad(msaa_in, coords, curr_sample); + + const int single_sample_x = 2 * coords.x + (curr_sample & 1); + const int single_sample_y = 2 * coords.y + ((curr_sample / 2) & 1); + const ivec3 dest_coords = ivec3(single_sample_x, single_sample_y, coords.z); + + if (any(greaterThanEqual(dest_coords, imageSize(output_img)))) { + continue; + } + imageStore(output_img, dest_coords, pixel); + } +} diff --git a/src/video_core/host_shaders/convert_non_msaa_to_msaa.comp b/src/video_core/host_shaders/convert_non_msaa_to_msaa.comp new file mode 100644 index 000000000..dedd962f1 --- /dev/null +++ b/src/video_core/host_shaders/convert_non_msaa_to_msaa.comp @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 core +layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout (binding = 0, rgba8) uniform readonly restrict image2DArray img_in; +layout (binding = 1, rgba8) uniform writeonly restrict image2DMSArray output_msaa; + +void main() { + const ivec3 coords = ivec3(gl_GlobalInvocationID); + if (any(greaterThanEqual(coords, imageSize(output_msaa)))) { + return; + } + + // TODO: Specialization constants for num_samples? + const int num_samples = imageSamples(output_msaa); + for (int curr_sample = 0; curr_sample < num_samples; ++curr_sample) { + const int single_sample_x = 2 * coords.x + (curr_sample & 1); + const int single_sample_y = 2 * coords.y + ((curr_sample / 2) & 1); + const ivec3 single_coords = ivec3(single_sample_x, single_sample_y, coords.z); + + if (any(greaterThanEqual(single_coords, imageSize(img_in)))) { + continue; + } + const vec4 pixel = imageLoad(img_in, single_coords); + imageStore(output_msaa, coords, curr_sample, pixel); + } +} diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 9f7ce7414..eb6e43a08 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -557,6 +557,14 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, } } +void TextureCacheRuntime::CopyImageMSAA(Image& dst_image, Image& src_image, + std::span copies) { + LOG_DEBUG(Render_OpenGL, "Copying from {} samples to {} samples", src_image.info.num_samples, + dst_image.info.num_samples); + // TODO: Leverage the format conversion pass if possible/accurate. + util_shaders.CopyMSAA(dst_image, src_image, copies); +} + void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, std::span copies) { LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 5d9d370f2..e30875496 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -93,12 +93,19 @@ public: return device.CanReportMemoryUsage(); } - bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) { + bool ShouldReinterpret([[maybe_unused]] Image& dst, + [[maybe_unused]] Image& src) const noexcept { + return true; + } + + bool CanUploadMSAA() const noexcept { return true; } void CopyImage(Image& dst, Image& src, std::span copies); + void CopyImageMSAA(Image& dst, Image& src, std::span copies); + void ReinterpretImage(Image& dst, Image& src, std::span copies); void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 404def62e..2c7ac210b 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -12,6 +12,8 @@ #include "video_core/host_shaders/astc_decoder_comp.h" #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" +#include "video_core/host_shaders/convert_msaa_to_non_msaa_comp.h" +#include "video_core/host_shaders/convert_non_msaa_to_msaa_comp.h" #include "video_core/host_shaders/opengl_convert_s8d24_comp.h" #include "video_core/host_shaders/opengl_copy_bc4_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" @@ -51,7 +53,9 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)), - convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)) { + convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)), + convert_ms_to_nonms_program(MakeProgram(CONVERT_MSAA_TO_NON_MSAA_COMP)), + convert_nonms_to_ms_program(MakeProgram(CONVERT_NON_MSAA_TO_MSAA_COMP)) { const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); swizzle_table_buffer.Create(); glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); @@ -269,6 +273,33 @@ void UtilShaders::ConvertS8D24(Image& dst_image, std::span copi program_manager.RestoreGuestCompute(); } +void UtilShaders::CopyMSAA(Image& dst_image, Image& src_image, + std::span copies) { + const bool is_ms_to_non_ms = src_image.info.num_samples > 1 && dst_image.info.num_samples == 1; + const auto program_handle = + is_ms_to_non_ms ? convert_ms_to_nonms_program.handle : convert_nonms_to_ms_program.handle; + program_manager.BindComputeProgram(program_handle); + + for (const ImageCopy& copy : copies) { + ASSERT(copy.src_subresource.base_layer == 0); + ASSERT(copy.src_subresource.num_layers == 1); + ASSERT(copy.dst_subresource.base_layer == 0); + ASSERT(copy.dst_subresource.num_layers == 1); + + glBindImageTexture(0, src_image.StorageHandle(), copy.src_subresource.base_level, GL_TRUE, + 0, GL_READ_ONLY, GL_RGBA8); + glBindImageTexture(1, dst_image.StorageHandle(), copy.dst_subresource.base_level, GL_TRUE, + 0, GL_WRITE_ONLY, GL_RGBA8); + + const u32 num_dispatches_x = Common::DivCeil(copy.extent.width, 8U); + const u32 num_dispatches_y = Common::DivCeil(copy.extent.height, 8U); + const u32 num_dispatches_z = copy.extent.depth; + + glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); + } + program_manager.RestoreGuestCompute(); +} + GLenum StoreFormat(u32 bytes_per_block) { switch (bytes_per_block) { case 1: diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 44efb6ecf..9013808e7 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -40,6 +40,9 @@ public: void ConvertS8D24(Image& dst_image, std::span copies); + void CopyMSAA(Image& dst_image, Image& src_image, + std::span copies); + private: ProgramManager& program_manager; @@ -51,6 +54,8 @@ private: OGLProgram pitch_unswizzle_program; OGLProgram copy_bc4_program; OGLProgram convert_s8d24_program; + OGLProgram convert_ms_to_nonms_program; + OGLProgram convert_nonms_to_ms_program; }; GLenum StoreFormat(u32 bytes_per_block); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index d39372ec4..9b85dfb5e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1230,6 +1230,11 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, }); } +void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src, + std::span copies) { + UNIMPLEMENTED_MSG("Copying images with different samples is not implemented in Vulkan."); +} + u64 TextureCacheRuntime::GetDeviceLocalMemory() const { return device.GetDeviceLocalMemory(); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 1f27a3589..b9ee83de7 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -70,6 +70,8 @@ public: void CopyImage(Image& dst, Image& src, std::span copies); + void CopyImageMSAA(Image& dst, Image& src, std::span copies); + bool ShouldReinterpret(Image& dst, Image& src); void ReinterpretImage(Image& dst, Image& src, std::span copies); @@ -80,6 +82,11 @@ public: return false; } + bool CanUploadMSAA() const noexcept { + // TODO: Implement buffer to MSAA uploads + return false; + } + void AccelerateImageUpload(Image&, const StagingBufferRef&, std::span); diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index 418890126..30f72361d 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp @@ -22,6 +22,9 @@ std::string Name(const ImageBase& image) { const u32 num_layers = image.info.resources.layers; const u32 num_levels = image.info.resources.levels; std::string resource; + if (image.info.num_samples > 1) { + resource += fmt::format(":{}xMSAA", image.info.num_samples); + } if (num_layers > 1) { resource += fmt::format(":L{}", num_layers); } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1b01990a4..3e2cbb0b0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -773,7 +773,7 @@ void TextureCache

::RefreshContents(Image& image, ImageId image_id) { image.flags &= ~ImageFlagBits::CpuModified; TrackImage(image, image_id); - if (image.info.num_samples > 1) { + if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) { LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); return; } @@ -1167,14 +1167,14 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA if (True(overlap.flags & ImageFlagBits::GpuModified)) { new_image.flags |= ImageFlagBits::GpuModified; } + const auto& resolution = Settings::values.resolution_info; + const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); + const u32 up_scale = can_rescale ? resolution.up_scale : 1; + const u32 down_shift = can_rescale ? resolution.down_shift : 0; + auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift); if (overlap.info.num_samples != new_image.info.num_samples) { - LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); + runtime.CopyImageMSAA(new_image, overlap, std::move(copies)); } else { - const auto& resolution = Settings::values.resolution_info; - const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); - const u32 up_scale = can_rescale ? resolution.up_scale : 1; - const u32 down_shift = can_rescale ? resolution.down_shift : 0; - auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift); runtime.CopyImage(new_image, overlap, std::move(copies)); } if (True(overlap.flags & ImageFlagBits::Tracked)) { diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 03acc68d9..697f86641 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -573,10 +573,6 @@ u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept { if (info.type == ImageType::Buffer) { return info.size.width * BytesPerBlock(info.format); } - if (info.num_samples > 1) { - // Multisample images can't be uploaded or downloaded to the host - return 0; - } if (info.type == ImageType::Linear) { return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); } @@ -703,7 +699,6 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale, u32 down_shift) { ASSERT(dst.resources.levels >= src.resources.levels); - ASSERT(dst.num_samples == src.num_samples); const bool is_dst_3d = dst.type == ImageType::e3D; if (is_dst_3d) {