From 957840be9151e7c3b97b638cc0d10d73173c4036 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 5 Nov 2022 22:26:38 +0100 Subject: [PATCH 1/5] Fermi2D: Rework blit engine and add a software blitter. --- src/video_core/CMakeLists.txt | 4 + src/video_core/control/channel_state.cpp | 2 +- src/video_core/engines/fermi_2d.cpp | 19 +- src/video_core/engines/fermi_2d.h | 9 +- src/video_core/engines/sw_blitter/blitter.cpp | 213 ++++ src/video_core/engines/sw_blitter/blitter.h | 27 + .../engines/sw_blitter/converter.cpp | 1097 +++++++++++++++++ src/video_core/engines/sw_blitter/converter.h | 35 + .../renderer_opengl/gl_rasterizer.cpp | 3 +- .../renderer_vulkan/vk_rasterizer.cpp | 3 +- src/video_core/texture_cache/texture_cache.h | 29 +- .../texture_cache/texture_cache_base.h | 8 +- 12 files changed, 1431 insertions(+), 18 deletions(-) create mode 100644 src/video_core/engines/sw_blitter/blitter.cpp create mode 100644 src/video_core/engines/sw_blitter/blitter.h create mode 100644 src/video_core/engines/sw_blitter/converter.cpp create mode 100644 src/video_core/engines/sw_blitter/converter.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d7f7d336c..b03a30992 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -28,6 +28,10 @@ add_library(video_core STATIC dirty_flags.h dma_pusher.cpp dma_pusher.h + engines/sw_blitter/blitter.cpp + engines/sw_blitter/blitter.h + engines/sw_blitter/converter.cpp + engines/sw_blitter/converter.h engines/const_buffer_info.h engines/engine_interface.h engines/engine_upload.cpp diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp index cdecc3a91..832025d75 100644 --- a/src/video_core/control/channel_state.cpp +++ b/src/video_core/control/channel_state.cpp @@ -20,7 +20,7 @@ void ChannelState::Init(Core::System& system, GPU& gpu) { ASSERT(memory_manager); dma_pusher = std::make_unique(system, gpu, *memory_manager, *this); maxwell_3d = std::make_unique(system, *memory_manager); - fermi_2d = std::make_unique(); + fermi_2d = std::make_unique(*memory_manager); kepler_compute = std::make_unique(system, *memory_manager); maxwell_dma = std::make_unique(system, *memory_manager); kepler_memory = std::make_unique(system, *memory_manager); diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 453e0fb01..2c722c778 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -3,17 +3,25 @@ #include "common/assert.h" #include "common/logging/log.h" +#include "common/microprofile.h" #include "video_core/engines/fermi_2d.h" -#include "video_core/memory_manager.h" +#include "video_core/engines/sw_blitter/blitter.h" #include "video_core/rasterizer_interface.h" #include "video_core/surface.h" +#include "video_core/textures/decoders.h" + +MICROPROFILE_DECLARE(GPU_BlitEngine); +MICROPROFILE_DEFINE(GPU_BlitEngine, "GPU", "Blit Engine", MP_RGB(224, 224, 128)); using VideoCore::Surface::BytesPerBlock; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; namespace Tegra::Engines { -Fermi2D::Fermi2D() { +using namespace Texture; + +Fermi2D::Fermi2D(MemoryManager& memory_manager_) { + sw_blitter = std::make_unique(memory_manager_); // Nvidia's OpenGL driver seems to assume these values regs.src.depth = 1; regs.dst.depth = 1; @@ -42,6 +50,7 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 } void Fermi2D::Blit() { + MICROPROFILE_SCOPE(GPU_BlitEngine); LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", regs.src.Address(), regs.dst.Address()); @@ -52,9 +61,12 @@ void Fermi2D::Blit() { UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); const auto& args = regs.pixels_from_memory; + constexpr s64 null_derivate = 1ULL << 32; Config config{ .operation = regs.operation, .filter = args.sample_mode.filter, + .must_accelerate = args.du_dx != null_derivate || args.dv_dy != null_derivate || + args.sample_mode.filter == Filter::Bilinear, .dst_x0 = args.dst_x0, .dst_y0 = args.dst_y0, .dst_x1 = args.dst_x0 + args.dst_width, @@ -78,8 +90,9 @@ void Fermi2D::Blit() { config.src_x1 -= config.src_x0; config.src_x0 = 0; } + if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { - UNIMPLEMENTED(); + sw_blitter->Blit(src, regs.dst, config); } } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 1229aa35b..24b518cb5 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -5,6 +5,7 @@ #include #include +#include #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" @@ -21,6 +22,10 @@ class RasterizerInterface; namespace Tegra::Engines { +namespace Blitter { +class SoftwareBlitEngine; +} + /** * This Engine is known as G80_2D. Documentation can be found in: * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml @@ -32,7 +37,7 @@ namespace Tegra::Engines { class Fermi2D final : public EngineInterface { public: - explicit Fermi2D(); + explicit Fermi2D(MemoryManager& memory_manager_); ~Fermi2D() override; /// Binds a rasterizer to this engine. @@ -286,6 +291,7 @@ public: struct Config { Operation operation; Filter filter; + bool must_accelerate; s32 dst_x0; s32 dst_y0; s32 dst_x1; @@ -298,6 +304,7 @@ public: private: VideoCore::RasterizerInterface* rasterizer = nullptr; + std::unique_ptr sw_blitter; /// Performs the copy from the source surface to the destination surface as configured in the /// registers. diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp new file mode 100644 index 000000000..caf51cbe3 --- /dev/null +++ b/src/video_core/engines/sw_blitter/blitter.cpp @@ -0,0 +1,213 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include + +#include "video_core/engines/sw_blitter/blitter.h" +#include "video_core/engines/sw_blitter/converter.h" +#include "video_core/memory_manager.h" +#include "video_core/surface.h" +#include "video_core/textures/decoders.h" + +namespace Tegra { +class MemoryManager; +} + +using VideoCore::Surface::BytesPerBlock; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; + +namespace Tegra::Engines::Blitter { + +using namespace Texture; + +namespace { + +void NeighrestNeighbor(std::span input, std::span output, u32 src_width, u32 src_height, + u32 dst_width, u32 dst_height, size_t bpp) { + const size_t dx_du = std::llround((static_cast(src_width) / dst_width) * (1ULL << 32)); + const size_t dy_dv = std::llround((static_cast(src_height) / dst_height) * (1ULL << 32)); + size_t src_y = 0; + for (u32 y = 0; y < dst_height; y++) { + size_t src_x = 0; + for (u32 x = 0; x < dst_width; x++) { + const size_t read_from = ((src_y * src_width + src_x) >> 32) * bpp; + const size_t write_to = (y * dst_width + x) * bpp; + + std::memcpy(&output[write_to], &input[read_from], bpp); + src_x += dx_du; + } + src_y += dy_dv; + } +} + +void NeighrestNeighborFast(std::span input, std::span output, u32 src_width, + u32 src_height, u32 dst_width, u32 dst_height) { + const size_t dx_du = std::llround((static_cast(src_width) / dst_width) * (1ULL << 32)); + const size_t dy_dv = std::llround((static_cast(src_height) / dst_height) * (1ULL << 32)); + size_t src_y = 0; + for (u32 y = 0; y < dst_height; y++) { + size_t src_x = 0; + for (u32 x = 0; x < dst_width; x++) { + const size_t read_from = ((src_y * src_width + src_x) >> 32) * 4; + const size_t write_to = (y * dst_width + x) * 4; + + std::memcpy(&output[write_to], &input[read_from], sizeof(f32) * 4); + src_x += dx_du; + } + src_y += dy_dv; + } +} + +/* +void Bilinear(std::span input, std::span output, size_t src_width, + size_t src_height, size_t dst_width, size_t dst_height) { + const auto inv_lerp = [](u32 coord, u32 end) { return +static_cast(std::min(std::max(static_cast(coord), 0), end - 1)) / (end); }; + + + for (u32 y = 0; y < dst_height; y++) { + const f32 ty_0 = inv_lerp(y, dst_extent_y); + const f32 ty_1 = inv_lerp(y + 1, dst_extent_y); + for (u32 x = 0; x < dst_width; x++) { + const f32 tx_0 = inv_lerp(x, dst_extent_x); + const f32 tx_1 = inv_lerp(x + 1, dst_extent_x); + const std::array get_pixel = [&](f32 tx, f32 ty, u32 width, u32 height) { + std::array result{}; + + return (std::llround(width * tx) + std::llround(height * ty) * width) * 4; + }; + std::array result{}; + + const size_t read_from = get_pixel(src_width, src_height); + const size_t write_to = get_pixel(tx_0, ty_0, dst_width, dst_height); + + std::memcpy(&output[write_to], &input[read_from], bpp); + } + } +} +*/ + +} // namespace + +struct SoftwareBlitEngine::BlitEngineImpl { + std::vector tmp_buffer; + std::vector src_buffer; + std::vector dst_buffer; + std::vector intermediate_src; + std::vector intermediate_dst; + ConverterFactory converter_factory; +}; + +SoftwareBlitEngine::SoftwareBlitEngine(MemoryManager& memory_manager_) + : memory_manager{memory_manager_} { + impl = std::make_unique(); +} + +SoftwareBlitEngine::~SoftwareBlitEngine() = default; + +bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, + Fermi2D::Config& config) { + UNIMPLEMENTED_IF(config.filter == Fermi2D::Filter::Bilinear); + + const auto get_surface_size = [](Fermi2D::Surface& surface, u32 bytes_per_pixel) { + if (surface.linear == Fermi2D::MemoryLayout::BlockLinear) { + return CalculateSize(true, bytes_per_pixel, surface.width, surface.height, + surface.depth, surface.block_height, surface.block_depth); + } + return static_cast(surface.pitch * surface.height); + }; + const auto process_pitch_linear = [](bool unpack, std::span input, std::span output, + u32 extent_x, u32 extent_y, u32 pitch, u32 x0, u32 y0, + size_t bpp) { + const size_t base_offset = x0 * bpp; + const size_t copy_size = extent_x * bpp; + for (u32 y = y0; y < extent_y; y++) { + const size_t first_offset = y * pitch + base_offset; + const size_t second_offset = y * extent_x * bpp; + u8* write_to = unpack ? &output[first_offset] : &output[second_offset]; + const u8* read_from = unpack ? &input[second_offset] : &input[first_offset]; + std::memcpy(write_to, read_from, copy_size); + } + }; + + const u32 src_extent_x = config.src_x1 - config.src_x0; + const u32 src_extent_y = config.src_y1 - config.src_y0; + + const u32 dst_extent_x = config.dst_x1 - config.dst_x0; + const u32 dst_extent_y = config.dst_y1 - config.dst_y0; + const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); + const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); + const size_t src_size = get_surface_size(src, src_bytes_per_pixel); + impl->tmp_buffer.resize(src_size); + memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size); + + const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; + + const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel; + + impl->src_buffer.resize(src_copy_size); + + const bool no_passthrough = + src.format != dst.format || src_extent_x != dst_extent_x || src_extent_y != dst_extent_y; + + const auto convertion_phase_same_format = [&]() { + NeighrestNeighbor(impl->src_buffer, impl->dst_buffer, src_extent_x, src_extent_y, + dst_extent_x, dst_extent_y, dst_bytes_per_pixel); + }; + + const auto convertion_phase_ir = [&]() { + auto* input_converter = impl->converter_factory.GetFormatConverter(src.format); + impl->intermediate_src.resize((src_copy_size / src_bytes_per_pixel) * 4); + impl->intermediate_dst.resize((dst_copy_size / dst_bytes_per_pixel) * 4); + input_converter->ConvertTo(impl->src_buffer, impl->intermediate_src); + + NeighrestNeighborFast(impl->intermediate_src, impl->intermediate_dst, src_extent_x, + src_extent_y, dst_extent_x, dst_extent_y); + + auto* output_converter = impl->converter_factory.GetFormatConverter(dst.format); + output_converter->ConvertFrom(impl->intermediate_dst, impl->dst_buffer); + }; + + // Do actuall Blit + + impl->dst_buffer.resize(dst_copy_size); + if (src.linear == Fermi2D::MemoryLayout::BlockLinear) { + UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width, + src.height, src.depth, config.src_x0, config.src_y0, src_extent_x, + src_extent_y, src.block_height, src.block_depth, + src_extent_x * src_bytes_per_pixel); + } else { + process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y, + src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel); + } + + // Conversion Phase + if (no_passthrough) { + if (src.format != dst.format) { + convertion_phase_ir(); + } else { + convertion_phase_same_format(); + } + } else { + impl->dst_buffer.swap(impl->src_buffer); + } + + const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); + impl->tmp_buffer.resize(dst_size); + memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); + + if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { + SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width, + dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, + dst_extent_y, dst.block_height, dst.block_depth, + dst_extent_x * dst_bytes_per_pixel); + } else { + process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y, + dst.pitch, config.dst_x0, config.dst_y0, + static_cast(dst_bytes_per_pixel)); + } + memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); + return true; +} + +} // namespace Tegra::Engines::Blitter diff --git a/src/video_core/engines/sw_blitter/blitter.h b/src/video_core/engines/sw_blitter/blitter.h new file mode 100644 index 000000000..3edf40c3e --- /dev/null +++ b/src/video_core/engines/sw_blitter/blitter.h @@ -0,0 +1,27 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "video_core/engines/fermi_2d.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines::Blitter { + +class SoftwareBlitEngine { +public: + SoftwareBlitEngine(MemoryManager& memory_manager_); + ~SoftwareBlitEngine(); + + bool Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, Fermi2D::Config& copy_config); + +private: + MemoryManager& memory_manager; + struct BlitEngineImpl; + std::unique_ptr impl; +}; + +} // namespace Tegra::Engines::Blitter diff --git a/src/video_core/engines/sw_blitter/converter.cpp b/src/video_core/engines/sw_blitter/converter.cpp new file mode 100644 index 000000000..2e376f430 --- /dev/null +++ b/src/video_core/engines/sw_blitter/converter.cpp @@ -0,0 +1,1097 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "video_core/engines/sw_blitter/converter.h" +#include "video_core/surface.h" +#include "video_core/textures/decoders.h" + +#ifdef _MSC_VER +#define FORCE_INLINE __forceinline +#else +#define FORCE_INLINE inline __attribute__((always_inline)) +#endif + +namespace Tegra::Engines::Blitter { + +enum class Swizzle : size_t { + R = 0, + G = 1, + B = 2, + A = 3, + None, +}; + +enum class ComponentType : u32 { + SNORM = 1, + UNORM = 2, + SINT = 3, + UINT = 4, + SNORM_FORCE_FP16 = 5, + UNORM_FORCE_FP16 = 6, + FLOAT = 7, + SRGB = 8, +}; + +namespace { + +constexpr std::array SRGB_TO_RGB_LUT = { + 0.000000e+00f, 3.035270e-04f, 6.070540e-04f, 9.105810e-04f, 1.214108e-03f, 1.517635e-03f, + 1.821162e-03f, 2.124689e-03f, 2.428216e-03f, 2.731743e-03f, 3.035270e-03f, 3.346536e-03f, + 3.676507e-03f, 4.024717e-03f, 4.391442e-03f, 4.776953e-03f, 5.181517e-03f, 5.605392e-03f, + 6.048833e-03f, 6.512091e-03f, 6.995410e-03f, 7.499032e-03f, 8.023193e-03f, 8.568126e-03f, + 9.134059e-03f, 9.721218e-03f, 1.032982e-02f, 1.096009e-02f, 1.161224e-02f, 1.228649e-02f, + 1.298303e-02f, 1.370208e-02f, 1.444384e-02f, 1.520851e-02f, 1.599629e-02f, 1.680738e-02f, + 1.764195e-02f, 1.850022e-02f, 1.938236e-02f, 2.028856e-02f, 2.121901e-02f, 2.217389e-02f, + 2.315337e-02f, 2.415763e-02f, 2.518686e-02f, 2.624122e-02f, 2.732089e-02f, 2.842604e-02f, + 2.955684e-02f, 3.071344e-02f, 3.189603e-02f, 3.310477e-02f, 3.433981e-02f, 3.560131e-02f, + 3.688945e-02f, 3.820437e-02f, 3.954624e-02f, 4.091520e-02f, 4.231141e-02f, 4.373503e-02f, + 4.518620e-02f, 4.666509e-02f, 4.817183e-02f, 4.970657e-02f, 5.126946e-02f, 5.286065e-02f, + 5.448028e-02f, 5.612849e-02f, 5.780543e-02f, 5.951124e-02f, 6.124605e-02f, 6.301001e-02f, + 6.480327e-02f, 6.662594e-02f, 6.847817e-02f, 7.036009e-02f, 7.227185e-02f, 7.421357e-02f, + 7.618538e-02f, 7.818742e-02f, 8.021982e-02f, 8.228271e-02f, 8.437621e-02f, 8.650046e-02f, + 8.865558e-02f, 9.084171e-02f, 9.305897e-02f, 9.530747e-02f, 9.758735e-02f, 9.989873e-02f, + 1.022417e-01f, 1.046165e-01f, 1.070231e-01f, 1.094617e-01f, 1.119324e-01f, 1.144354e-01f, + 1.169707e-01f, 1.195384e-01f, 1.221388e-01f, 1.247718e-01f, 1.274377e-01f, 1.301365e-01f, + 1.328683e-01f, 1.356333e-01f, 1.384316e-01f, 1.412633e-01f, 1.441285e-01f, 1.470273e-01f, + 1.499598e-01f, 1.529261e-01f, 1.559265e-01f, 1.589608e-01f, 1.620294e-01f, 1.651322e-01f, + 1.682694e-01f, 1.714411e-01f, 1.746474e-01f, 1.778884e-01f, 1.811642e-01f, 1.844750e-01f, + 1.878208e-01f, 1.912017e-01f, 1.946178e-01f, 1.980693e-01f, 2.015563e-01f, 2.050787e-01f, + 2.086369e-01f, 2.122308e-01f, 2.158605e-01f, 2.195262e-01f, 2.232280e-01f, 2.269659e-01f, + 2.307401e-01f, 2.345506e-01f, 2.383976e-01f, 2.422811e-01f, 2.462013e-01f, 2.501583e-01f, + 2.541521e-01f, 2.581829e-01f, 2.622507e-01f, 2.663556e-01f, 2.704978e-01f, 2.746773e-01f, + 2.788943e-01f, 2.831487e-01f, 2.874408e-01f, 2.917706e-01f, 2.961383e-01f, 3.005438e-01f, + 3.049873e-01f, 3.094689e-01f, 3.139887e-01f, 3.185468e-01f, 3.231432e-01f, 3.277781e-01f, + 3.324515e-01f, 3.371636e-01f, 3.419144e-01f, 3.467041e-01f, 3.515326e-01f, 3.564001e-01f, + 3.613068e-01f, 3.662526e-01f, 3.712377e-01f, 3.762621e-01f, 3.813260e-01f, 3.864294e-01f, + 3.915725e-01f, 3.967552e-01f, 4.019778e-01f, 4.072402e-01f, 4.125426e-01f, 4.178851e-01f, + 4.232677e-01f, 4.286905e-01f, 4.341536e-01f, 4.396572e-01f, 4.452012e-01f, 4.507858e-01f, + 4.564110e-01f, 4.620770e-01f, 4.677838e-01f, 4.735315e-01f, 4.793202e-01f, 4.851499e-01f, + 4.910209e-01f, 4.969330e-01f, 5.028865e-01f, 5.088813e-01f, 5.149177e-01f, 5.209956e-01f, + 5.271151e-01f, 5.332764e-01f, 5.394795e-01f, 5.457245e-01f, 5.520114e-01f, 5.583404e-01f, + 5.647115e-01f, 5.711249e-01f, 5.775805e-01f, 5.840784e-01f, 5.906188e-01f, 5.972018e-01f, + 6.038274e-01f, 6.104956e-01f, 6.172066e-01f, 6.239604e-01f, 6.307572e-01f, 6.375968e-01f, + 6.444797e-01f, 6.514056e-01f, 6.583748e-01f, 6.653873e-01f, 6.724432e-01f, 6.795425e-01f, + 6.866853e-01f, 6.938717e-01f, 7.011019e-01f, 7.083758e-01f, 7.156935e-01f, 7.230551e-01f, + 7.304608e-01f, 7.379104e-01f, 7.454042e-01f, 7.529422e-01f, 7.605245e-01f, 7.681512e-01f, + 7.758222e-01f, 7.835378e-01f, 7.912979e-01f, 7.991027e-01f, 8.069522e-01f, 8.148466e-01f, + 8.227857e-01f, 8.307699e-01f, 8.387990e-01f, 8.468732e-01f, 8.549926e-01f, 8.631572e-01f, + 8.713671e-01f, 8.796224e-01f, 8.879231e-01f, 8.962694e-01f, 9.046612e-01f, 9.130986e-01f, + 9.215819e-01f, 9.301109e-01f, 9.386857e-01f, 9.473065e-01f, 9.559733e-01f, 9.646863e-01f, + 9.734453e-01f, 9.822506e-01f, 9.911021e-01f, 1.000000e+00f}; + +constexpr std::array RGB_TO_SRGB_LUT = { + 0.000000e+00f, 4.984009e-02f, 8.494473e-02f, 1.107021e-01f, 1.318038e-01f, 1.500052e-01f, + 1.661857e-01f, 1.808585e-01f, 1.943532e-01f, 2.068957e-01f, 2.186491e-01f, 2.297351e-01f, + 2.402475e-01f, 2.502604e-01f, 2.598334e-01f, 2.690152e-01f, 2.778465e-01f, 2.863614e-01f, + 2.945889e-01f, 3.025538e-01f, 3.102778e-01f, 3.177796e-01f, 3.250757e-01f, 3.321809e-01f, + 3.391081e-01f, 3.458689e-01f, 3.524737e-01f, 3.589320e-01f, 3.652521e-01f, 3.714419e-01f, + 3.775084e-01f, 3.834581e-01f, 3.892968e-01f, 3.950301e-01f, 4.006628e-01f, 4.061998e-01f, + 4.116451e-01f, 4.170030e-01f, 4.222770e-01f, 4.274707e-01f, 4.325873e-01f, 4.376298e-01f, + 4.426010e-01f, 4.475037e-01f, 4.523403e-01f, 4.571131e-01f, 4.618246e-01f, 4.664766e-01f, + 4.710712e-01f, 4.756104e-01f, 4.800958e-01f, 4.845292e-01f, 4.889122e-01f, 4.932462e-01f, + 4.975329e-01f, 5.017734e-01f, 5.059693e-01f, 5.101216e-01f, 5.142317e-01f, 5.183006e-01f, + 5.223295e-01f, 5.263194e-01f, 5.302714e-01f, 5.341862e-01f, 5.380651e-01f, 5.419087e-01f, + 5.457181e-01f, 5.494938e-01f, 5.532369e-01f, 5.569480e-01f, 5.606278e-01f, 5.642771e-01f, + 5.678965e-01f, 5.714868e-01f, 5.750484e-01f, 5.785821e-01f, 5.820884e-01f, 5.855680e-01f, + 5.890211e-01f, 5.924487e-01f, 5.958509e-01f, 5.992285e-01f, 6.025819e-01f, 6.059114e-01f, + 6.092176e-01f, 6.125010e-01f, 6.157619e-01f, 6.190008e-01f, 6.222180e-01f, 6.254140e-01f, + 6.285890e-01f, 6.317436e-01f, 6.348780e-01f, 6.379926e-01f, 6.410878e-01f, 6.441637e-01f, + 6.472208e-01f, 6.502595e-01f, 6.532799e-01f, 6.562824e-01f, 6.592672e-01f, 6.622347e-01f, + 6.651851e-01f, 6.681187e-01f, 6.710356e-01f, 6.739363e-01f, 6.768209e-01f, 6.796897e-01f, + 6.825429e-01f, 6.853807e-01f, 6.882034e-01f, 6.910111e-01f, 6.938041e-01f, 6.965826e-01f, + 6.993468e-01f, 7.020969e-01f, 7.048331e-01f, 7.075556e-01f, 7.102645e-01f, 7.129600e-01f, + 7.156424e-01f, 7.183118e-01f, 7.209683e-01f, 7.236121e-01f, 7.262435e-01f, 7.288625e-01f, + 7.314693e-01f, 7.340640e-01f, 7.366470e-01f, 7.392181e-01f, 7.417776e-01f, 7.443256e-01f, + 7.468624e-01f, 7.493880e-01f, 7.519025e-01f, 7.544061e-01f, 7.568989e-01f, 7.593810e-01f, + 7.618526e-01f, 7.643137e-01f, 7.667645e-01f, 7.692052e-01f, 7.716358e-01f, 7.740564e-01f, + 7.764671e-01f, 7.788681e-01f, 7.812595e-01f, 7.836413e-01f, 7.860138e-01f, 7.883768e-01f, + 7.907307e-01f, 7.930754e-01f, 7.954110e-01f, 7.977377e-01f, 8.000556e-01f, 8.023647e-01f, + 8.046651e-01f, 8.069569e-01f, 8.092403e-01f, 8.115152e-01f, 8.137818e-01f, 8.160402e-01f, + 8.182903e-01f, 8.205324e-01f, 8.227665e-01f, 8.249926e-01f, 8.272109e-01f, 8.294214e-01f, + 8.316242e-01f, 8.338194e-01f, 8.360070e-01f, 8.381871e-01f, 8.403597e-01f, 8.425251e-01f, + 8.446831e-01f, 8.468339e-01f, 8.489776e-01f, 8.511142e-01f, 8.532437e-01f, 8.553662e-01f, + 8.574819e-01f, 8.595907e-01f, 8.616927e-01f, 8.637881e-01f, 8.658767e-01f, 8.679587e-01f, + 8.700342e-01f, 8.721032e-01f, 8.741657e-01f, 8.762218e-01f, 8.782716e-01f, 8.803151e-01f, + 8.823524e-01f, 8.843835e-01f, 8.864085e-01f, 8.884274e-01f, 8.904402e-01f, 8.924471e-01f, + 8.944480e-01f, 8.964431e-01f, 8.984324e-01f, 9.004158e-01f, 9.023935e-01f, 9.043654e-01f, + 9.063318e-01f, 9.082925e-01f, 9.102476e-01f, 9.121972e-01f, 9.141413e-01f, 9.160800e-01f, + 9.180133e-01f, 9.199412e-01f, 9.218637e-01f, 9.237810e-01f, 9.256931e-01f, 9.276000e-01f, + 9.295017e-01f, 9.313982e-01f, 9.332896e-01f, 9.351761e-01f, 9.370575e-01f, 9.389339e-01f, + 9.408054e-01f, 9.426719e-01f, 9.445336e-01f, 9.463905e-01f, 9.482424e-01f, 9.500897e-01f, + 9.519322e-01f, 9.537700e-01f, 9.556032e-01f, 9.574316e-01f, 9.592555e-01f, 9.610748e-01f, + 9.628896e-01f, 9.646998e-01f, 9.665055e-01f, 9.683068e-01f, 9.701037e-01f, 9.718961e-01f, + 9.736842e-01f, 9.754679e-01f, 9.772474e-01f, 9.790225e-01f, 9.807934e-01f, 9.825601e-01f, + 9.843225e-01f, 9.860808e-01f, 9.878350e-01f, 9.895850e-01f, 9.913309e-01f, 9.930727e-01f, + 9.948106e-01f, 9.965444e-01f, 9.982741e-01f, 1.000000e+00f}; + +} // namespace + +struct R32B32G32A32_FLOATTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; + static constexpr std::array component_sizes = {32, 32, 32, 32}; + static constexpr std::array component_swizzle = { + Swizzle::A, Swizzle::G, Swizzle::B, Swizzle::R}; +}; + +struct R32G32B32A32_SINTTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; + static constexpr std::array component_sizes = {32, 32, 32, 32}; + static constexpr std::array component_swizzle = { + Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct R32G32B32A32_UINTTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; + static constexpr std::array component_sizes = {32, 32, 32, 32}; + static constexpr std::array component_swizzle = { + Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct R16G16B16A16_UNORMTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; + static constexpr std::array component_sizes = {16, 16, 16, 16}; + static constexpr std::array component_swizzle = { + Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct R16G16B16A16_SNORMTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM}; + static constexpr std::array component_sizes = {16, 16, 16, 16}; + static constexpr std::array component_swizzle = { + Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct R16G16B16A16_SINTTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; + static constexpr std::array component_sizes = {16, 16, 16, 16}; + static constexpr std::array component_swizzle = { + Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct R16G16B16A16_UINTTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; + static constexpr std::array component_sizes = {16, 16, 16, 16}; + static constexpr std::array component_swizzle = { + Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct R16G16B16A16_FLOATTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; + static constexpr std::array component_sizes = {16, 16, 16, 16}; + static constexpr std::array component_swizzle = { + Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct R32G32_FLOATTraits { + static constexpr size_t num_components = 2; + static constexpr std::array component_types = { + ComponentType::FLOAT, ComponentType::FLOAT}; + static constexpr std::array component_sizes = {32, 32}; + static constexpr std::array component_swizzle = {Swizzle::G, + Swizzle::R}; +}; + +struct R32G32_SINTTraits { + static constexpr size_t num_components = 2; + static constexpr std::array component_types = { + ComponentType::SINT, ComponentType::SINT}; + static constexpr std::array component_sizes = {32, 32}; + static constexpr std::array component_swizzle = {Swizzle::G, + Swizzle::R}; +}; + +struct R32G32_UINTTraits { + static constexpr size_t num_components = 2; + static constexpr std::array component_types = { + ComponentType::UINT, ComponentType::UINT}; + static constexpr std::array component_sizes = {32, 32}; + static constexpr std::array component_swizzle = {Swizzle::G, + Swizzle::R}; +}; + +struct R16G16B16X16_FLOATTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; + static constexpr std::array component_sizes = {16, 16, 16, 16}; + static constexpr std::array component_swizzle = { + Swizzle::None, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct A8R8G8B8_UNORMTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; + static constexpr std::array component_sizes = {8, 8, 8, 8}; + static constexpr std::array component_swizzle = { + Swizzle::B, Swizzle::G, Swizzle::R, Swizzle::A}; +}; + +struct A8R8G8B8_SRGBTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB}; + static constexpr std::array component_sizes = {8, 8, 8, 8}; + static constexpr std::array component_swizzle = { + Swizzle::B, Swizzle::G, Swizzle::R, Swizzle::A}; +}; + +struct A2B10G10R10_UNORMTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; + static constexpr std::array component_sizes = {10, 10, 10, 2}; + static constexpr std::array component_swizzle = { + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; +}; + +struct A2B10G10R10_UINTTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; + static constexpr std::array component_sizes = {10, 10, 10, 2}; + static constexpr std::array component_swizzle = { + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; +}; + +struct A8B8G8R8_UNORMTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; + static constexpr std::array component_sizes = {8, 8, 8, 8}; + static constexpr std::array component_swizzle = { + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; +}; + +struct A8B8G8R8_SRGBTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB}; + static constexpr std::array component_sizes = {8, 8, 8, 8}; + static constexpr std::array component_swizzle = { + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; +}; + +struct A8B8G8R8_SNORMTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM}; + static constexpr std::array component_sizes = {8, 8, 8, 8}; + static constexpr std::array component_swizzle = { + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; +}; + +struct A8B8G8R8_SINTTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; + static constexpr std::array component_sizes = {8, 8, 8, 8}; + static constexpr std::array component_swizzle = { + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; +}; + +struct A8B8G8R8_UINTTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; + static constexpr std::array component_sizes = {8, 8, 8, 8}; + static constexpr std::array component_swizzle = { + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; +}; + +struct R16G16_UNORMTraits { + static constexpr size_t num_components = 2; + static constexpr std::array component_types = { + ComponentType::UNORM, ComponentType::UNORM}; + static constexpr std::array component_sizes = {16, 16}; + static constexpr std::array component_swizzle = {Swizzle::G, + Swizzle::R}; +}; + +struct R16G16_SNORMTraits { + static constexpr size_t num_components = 2; + static constexpr std::array component_types = { + ComponentType::SNORM, ComponentType::SNORM}; + static constexpr std::array component_sizes = {16, 16}; + static constexpr std::array component_swizzle = {Swizzle::G, + Swizzle::R}; +}; + +struct R16G16_SINTTraits { + static constexpr size_t num_components = 2; + static constexpr std::array component_types = { + ComponentType::SINT, ComponentType::SINT}; + static constexpr std::array component_sizes = {16, 16}; + static constexpr std::array component_swizzle = {Swizzle::G, + Swizzle::R}; +}; + +struct R16G16_UINTTraits { + static constexpr size_t num_components = 2; + static constexpr std::array component_types = { + ComponentType::UINT, ComponentType::UINT}; + static constexpr std::array component_sizes = {16, 16}; + static constexpr std::array component_swizzle = {Swizzle::G, + Swizzle::R}; +}; + +struct R16G16_FLOATTraits { + static constexpr size_t num_components = 2; + static constexpr std::array component_types = { + ComponentType::FLOAT, ComponentType::FLOAT}; + static constexpr std::array component_sizes = {16, 16}; + static constexpr std::array component_swizzle = {Swizzle::G, + Swizzle::R}; +}; + +struct B10G11R11_FLOATTraits { + static constexpr size_t num_components = 3; + static constexpr std::array component_types = { + ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; + static constexpr std::array component_sizes = {11, 11, 10}; + static constexpr std::array component_swizzle = { + Swizzle::R, Swizzle::G, Swizzle::B}; +}; + +struct R32_SINTTraits { + static constexpr size_t num_components = 1; + static constexpr std::array component_types = { + ComponentType::SINT}; + static constexpr std::array component_sizes = {32}; + static constexpr std::array component_swizzle = {Swizzle::R}; +}; + +struct R32_UINTTraits { + static constexpr size_t num_components = 1; + static constexpr std::array component_types = { + ComponentType::UINT}; + static constexpr std::array component_sizes = {32}; + static constexpr std::array component_swizzle = {Swizzle::R}; +}; + +struct R32_FLOATTraits { + static constexpr size_t num_components = 1; + static constexpr std::array component_types = { + ComponentType::FLOAT}; + static constexpr std::array component_sizes = {32}; + static constexpr std::array component_swizzle = {Swizzle::R}; +}; + +struct R5G6B5_UNORMTraits { + static constexpr size_t num_components = 3; + static constexpr std::array component_types = { + ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; + static constexpr std::array component_sizes = {5, 6, 5}; + static constexpr std::array component_swizzle = { + Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct A1R5G5B5_UNORMTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; + static constexpr std::array component_sizes = {5, 5, 5, 1}; + static constexpr std::array component_swizzle = { + Swizzle::B, Swizzle::G, Swizzle::R, Swizzle::A}; +}; + +struct R8G8_UNORMTraits { + static constexpr size_t num_components = 2; + static constexpr std::array component_types = { + ComponentType::UNORM, ComponentType::UNORM}; + static constexpr std::array component_sizes = {8, 8}; + static constexpr std::array component_swizzle = {Swizzle::G, + Swizzle::R}; +}; + +struct R8G8_SNORMTraits { + static constexpr size_t num_components = 2; + static constexpr std::array component_types = { + ComponentType::SNORM, ComponentType::SNORM}; + static constexpr std::array component_sizes = {8, 8}; + static constexpr std::array component_swizzle = {Swizzle::G, + Swizzle::R}; +}; + +struct R8G8_SINTTraits { + static constexpr size_t num_components = 2; + static constexpr std::array component_types = { + ComponentType::SINT, ComponentType::SINT}; + static constexpr std::array component_sizes = {8, 8}; + static constexpr std::array component_swizzle = {Swizzle::G, + Swizzle::R}; +}; + +struct R8G8_UINTTraits { + static constexpr size_t num_components = 2; + static constexpr std::array component_types = { + ComponentType::UINT, ComponentType::UINT}; + static constexpr std::array component_sizes = {8, 8}; + static constexpr std::array component_swizzle = {Swizzle::G, + Swizzle::R}; +}; + +struct R16_UNORMTraits { + static constexpr size_t num_components = 1; + static constexpr std::array component_types = { + ComponentType::UNORM}; + static constexpr std::array component_sizes = {16}; + static constexpr std::array component_swizzle = {Swizzle::R}; +}; + +struct R16_SNORMTraits { + static constexpr size_t num_components = 1; + static constexpr std::array component_types = { + ComponentType::SNORM}; + static constexpr std::array component_sizes = {16}; + static constexpr std::array component_swizzle = {Swizzle::R}; +}; + +struct R16_SINTTraits { + static constexpr size_t num_components = 1; + static constexpr std::array component_types = { + ComponentType::SINT}; + static constexpr std::array component_sizes = {16}; + static constexpr std::array component_swizzle = {Swizzle::R}; +}; + +struct R16_UINTTraits { + static constexpr size_t num_components = 1; + static constexpr std::array component_types = { + ComponentType::UINT}; + static constexpr std::array component_sizes = {16}; + static constexpr std::array component_swizzle = {Swizzle::R}; +}; + +struct R16_FLOATTraits { + static constexpr size_t num_components = 1; + static constexpr std::array component_types = { + ComponentType::FLOAT}; + static constexpr std::array component_sizes = {16}; + static constexpr std::array component_swizzle = {Swizzle::R}; +}; + +struct R8_UNORMTraits { + static constexpr size_t num_components = 1; + static constexpr std::array component_types = { + ComponentType::UNORM}; + static constexpr std::array component_sizes = {8}; + static constexpr std::array component_swizzle = {Swizzle::R}; +}; + +struct R8_SNORMTraits { + static constexpr size_t num_components = 1; + static constexpr std::array component_types = { + ComponentType::SNORM}; + static constexpr std::array component_sizes = {8}; + static constexpr std::array component_swizzle = {Swizzle::R}; +}; + +struct R8_SINTTraits { + static constexpr size_t num_components = 1; + static constexpr std::array component_types = { + ComponentType::SINT}; + static constexpr std::array component_sizes = {8}; + static constexpr std::array component_swizzle = {Swizzle::R}; +}; + +struct R8_UINTTraits { + static constexpr size_t num_components = 1; + static constexpr std::array component_types = { + ComponentType::UINT}; + static constexpr std::array component_sizes = {8}; + static constexpr std::array component_swizzle = {Swizzle::R}; +}; + +template +class ConverterImpl : public Converter { +private: + static constexpr size_t num_components = ConverterTraits::num_components; + static constexpr std::array component_types = + ConverterTraits::component_types; + static constexpr std::array component_sizes = + ConverterTraits::component_sizes; + static constexpr std::array component_swizzle = + ConverterTraits::component_swizzle; + + static constexpr size_t CalculateByteSize() { + size_t size = 0; + for (const size_t component_size : component_sizes) { + size += component_size; + } + const size_t power = (sizeof(size_t) * 8) - std::countl_zero(size) - 1ULL; + const size_t base_size = 1ULL << power; + const size_t mask = base_size - 1ULL; + return ((size & mask) != 0 ? base_size << 1ULL : base_size) / 8; + } + + static constexpr size_t total_bytes_per_pixel = CalculateByteSize(); + static constexpr size_t total_words_per_pixel = + (total_bytes_per_pixel + sizeof(u32) - 1U) / sizeof(u32); + static constexpr size_t components_per_ir_rep = 4; + + template + static constexpr std::array GetBoundWordsOffsets() { + std::array result; + result.fill(0); + constexpr size_t total_bits_per_word = sizeof(u32) * 8; + size_t accumulated_size = 0; + size_t count = 0; + for (size_t i = 0; i < num_components; i++) { + if constexpr (get_offsets) { + result[i] = accumulated_size; + } else { + result[i] = count; + } + accumulated_size += component_sizes[i]; + if (accumulated_size > total_bits_per_word) { + if constexpr (get_offsets) { + result[i] = 0; + } else { + result[i]++; + } + count++; + accumulated_size = component_sizes[i]; + } + } + return result; + } + + static constexpr std::array bound_words = GetBoundWordsOffsets(); + static constexpr std::array bound_offsets = + GetBoundWordsOffsets(); + + static constexpr std::array GetComponentsMask() { + std::array result; + for (size_t i = 0; i < num_components; i++) { + result[i] = (((u32)~0) >> (8 * sizeof(u32) - component_sizes[i])) << bound_offsets[i]; + } + return result; + } + + static constexpr std::array component_mask = GetComponentsMask(); + + // We are forcing inline so the compiler can SIMD the conversations, since it may do 4 function + // calls, it may fail to detect the benefit of inlining. + template + FORCE_INLINE void ConvertToComponent(u32 which_word, f32& out_component) { + const u32 value = (which_word >> bound_offsets[which_component]) & + static_cast((1ULL << component_sizes[which_component]) - 1ULL); + const auto sign_extend = [](u32 base_value, size_t bits) { + const size_t shift_amount = sizeof(u32) * 8 - bits; + s32 shifted_value = static_cast(base_value << shift_amount); + return shifted_value >> shift_amount; + }; + const auto force_to_fp16 = [](f32 base_value) { + u32 tmp = std::bit_cast(base_value); + constexpr size_t fp32_mantissa_bits = 23; + constexpr size_t fp16_mantissa_bits = 10; + constexpr size_t mantissa_mask = + ~((1ULL << (fp32_mantissa_bits - fp16_mantissa_bits)) - 1ULL); + tmp = tmp & mantissa_mask; + // TODO: force the exponent within the range of half float. Not needed in UNORM / SNORM + return std::bit_cast(tmp); + }; + const auto from_fp_n = [&sign_extend](u32 base_value, size_t bits, size_t mantissa) { + constexpr size_t fp32_mantissa_bits = 23; + size_t shift_towards = fp32_mantissa_bits - mantissa; + const u32 new_value = + static_cast(sign_extend(base_value, bits) << shift_towards) & (~(1U << 31)); + return std::bit_cast(new_value); + }; + const auto calculate_snorm = [&]() { + return static_cast( + static_cast(sign_extend(value, component_sizes[which_component])) / + ((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL)); + }; + const auto calculate_unorm = [&]() { + return static_cast(static_cast(value) / + ((1ULL << (component_sizes[which_component])) - 1ULL)); + }; + if constexpr (component_types[which_component] == ComponentType::SNORM) { + out_component = calculate_snorm(); + } else if constexpr (component_types[which_component] == ComponentType::UNORM) { + out_component = calculate_unorm(); + } else if constexpr (component_types[which_component] == ComponentType::SINT) { + out_component = static_cast( + static_cast(sign_extend(value, component_sizes[which_component]))); + } else if constexpr (component_types[which_component] == ComponentType::UINT) { + out_component = static_cast( + static_cast(sign_extend(value, component_sizes[which_component]))); + } else if constexpr (component_types[which_component] == ComponentType::SNORM_FORCE_FP16) { + out_component = calculate_snorm(); + out_component = force_to_fp16(out_component); + } else if constexpr (component_types[which_component] == ComponentType::UNORM_FORCE_FP16) { + out_component = calculate_unorm(); + out_component = force_to_fp16(out_component); + } else if constexpr (component_types[which_component] == ComponentType::FLOAT) { + if constexpr (component_sizes[which_component] == 32) { + out_component = std::bit_cast(value); + } else if constexpr (component_sizes[which_component] == 16) { + static constexpr u32 sign_mask = 0x8000; + static constexpr u32 mantissa_mask = 0x8000; + out_component = std::bit_cast(((value & sign_mask) << 16) | + (((value & 0x7c00) + 0x1C000) << 13) | + ((value & mantissa_mask) << 13)); + } else { + out_component = from_fp_n(value, component_sizes[which_component], + component_sizes[which_component] - 5); + } + } else if constexpr (component_types[which_component] == ComponentType::SRGB) { + if constexpr (component_swizzle[which_component] == Swizzle::A) { + out_component = calculate_unorm(); + } else if constexpr (component_sizes[which_component] == 8) { + out_component = SRGB_TO_RGB_LUT[value]; + } else { + out_component = calculate_unorm(); + UNIMPLEMENTED_MSG("SRGB Conversion with component sizes of {} is unimplemented", + component_sizes[which_component]); + } + } + } + + // We are forcing inline so the compiler can SIMD the conversations, since it may do 4 function + // calls, it may fail to detect the benefit of inlining. + template + FORCE_INLINE void ConvertFromComponent(u32& which_word, f32 in_component) { + const auto insert_to_word = [&](T new_word) { + which_word |= (static_cast(new_word) << bound_offsets[which_component]) & + component_mask[which_component]; + }; + const auto to_fp_n = [](f32 base_value, size_t bits, size_t mantissa) { + constexpr size_t fp32_mantissa_bits = 23; + u32 tmp_value = std::bit_cast(std::max(base_value, 0.0f)); + size_t shift_towards = fp32_mantissa_bits - mantissa; + return tmp_value >> shift_towards; + }; + const auto calculate_unorm = [&]() { + return static_cast(static_cast(in_component) * + ((1ULL << (component_sizes[which_component])) - 1ULL)); + }; + if constexpr (component_types[which_component] == ComponentType::SNORM || + component_types[which_component] == ComponentType::SNORM_FORCE_FP16) { + s32 tmp_word = + static_cast(static_cast(in_component) * + ((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL)); + insert_to_word(tmp_word); + + } else if constexpr (component_types[which_component] == ComponentType::UNORM || + component_types[which_component] == ComponentType::UNORM_FORCE_FP16) { + u32 tmp_word = calculate_unorm(); + insert_to_word(tmp_word); + } else if constexpr (component_types[which_component] == ComponentType::SINT) { + s32 tmp_word = static_cast(in_component); + insert_to_word(tmp_word); + } else if constexpr (component_types[which_component] == ComponentType::UINT) { + u32 tmp_word = static_cast(in_component); + insert_to_word(tmp_word); + } else if constexpr (component_types[which_component] == ComponentType::FLOAT) { + if constexpr (component_sizes[which_component] == 32) { + u32 tmp_word = std::bit_cast(in_component); + insert_to_word(tmp_word); + } else if constexpr (component_sizes[which_component] == 16) { + static constexpr u32 sign_mask = 0x8000; + static constexpr u32 mantissa_mask = 0x8000; + const u32 tmp_word = std::bit_cast(in_component); + const u32 half = ((tmp_word >> 16) & sign_mask) | + ((((tmp_word & 0x7f800000) - 0x38000000) >> 13) & 0x7c00) | + ((tmp_word >> 13) & 0x03ff); + insert_to_word(half); + } else { + insert_to_word(to_fp_n(in_component, component_sizes[which_component], + component_sizes[which_component] - 5)); + } + } else if constexpr (component_types[which_component] == ComponentType::SRGB) { + if constexpr (component_swizzle[which_component] != Swizzle::A) { + if constexpr (component_sizes[which_component] == 8) { + const u32 index = calculate_unorm(); + in_component = RGB_TO_SRGB_LUT[index]; + } else { + UNIMPLEMENTED_MSG("SRGB Conversion with component sizes of {} is unimplemented", + component_sizes[which_component]); + } + } + const u32 tmp_word = calculate_unorm(); + insert_to_word(tmp_word); + } + } + +public: + void ConvertTo(std::span input, std::span output) override { + const size_t num_pixels = output.size() / components_per_ir_rep; + for (size_t pixel = 0; pixel < num_pixels; pixel++) { + std::array words{}; + + std::memcpy(words.data(), &input[pixel * total_bytes_per_pixel], total_bytes_per_pixel); + std::span new_components(&output[pixel * components_per_ir_rep], + components_per_ir_rep); + if constexpr (component_swizzle[0] != Swizzle::None) { + ConvertToComponent<0>(words[bound_words[0]], + new_components[static_cast(component_swizzle[0])]); + } else { + new_components[0] = 0.0f; + } + if constexpr (num_components >= 2) { + if constexpr (component_swizzle[1] != Swizzle::None) { + ConvertToComponent<1>( + words[bound_words[1]], + new_components[static_cast(component_swizzle[1])]); + } else { + new_components[1] = 0.0f; + } + } else { + new_components[1] = 0.0f; + } + if constexpr (num_components >= 3) { + if constexpr (component_swizzle[2] != Swizzle::None) { + ConvertToComponent<2>( + words[bound_words[2]], + new_components[static_cast(component_swizzle[2])]); + } else { + new_components[2] = 0.0f; + } + } else { + new_components[2] = 0.0f; + } + if constexpr (num_components >= 4) { + if constexpr (component_swizzle[3] != Swizzle::None) { + ConvertToComponent<3>( + words[bound_words[3]], + new_components[static_cast(component_swizzle[3])]); + } else { + new_components[3] = 0.0f; + } + } else { + new_components[3] = 0.0f; + } + } + } + + void ConvertFrom(std::span input, std::span output) override { + const size_t num_pixels = output.size() / total_bytes_per_pixel; + for (size_t pixel = 0; pixel < num_pixels; pixel++) { + std::span old_components(&input[pixel * components_per_ir_rep], + components_per_ir_rep); + std::array words{}; + if constexpr (component_swizzle[0] != Swizzle::None) { + ConvertFromComponent<0>(words[bound_words[0]], + old_components[static_cast(component_swizzle[0])]); + } + if constexpr (num_components >= 2) { + if constexpr (component_swizzle[1] != Swizzle::None) { + ConvertFromComponent<1>( + words[bound_words[1]], + old_components[static_cast(component_swizzle[1])]); + } + } + if constexpr (num_components >= 3) { + if constexpr (component_swizzle[2] != Swizzle::None) { + ConvertFromComponent<2>( + words[bound_words[2]], + old_components[static_cast(component_swizzle[2])]); + } + } + if constexpr (num_components >= 4) { + if constexpr (component_swizzle[3] != Swizzle::None) { + ConvertFromComponent<3>( + words[bound_words[3]], + old_components[static_cast(component_swizzle[3])]); + } + } + std::memcpy(&output[pixel * total_bytes_per_pixel], words.data(), + total_bytes_per_pixel); + } + } + + ConverterImpl() = default; + ~ConverterImpl() = default; +}; + +struct ConverterFactory::ConverterFactoryImpl { + std::unordered_map> converters_cache; +}; + +ConverterFactory::ConverterFactory() { + impl = std::make_unique(); +} + +ConverterFactory::~ConverterFactory() = default; + +Converter* ConverterFactory::GetFormatConverter(RenderTargetFormat format) { + auto it = impl->converters_cache.find(format); + if (it == impl->converters_cache.end()) [[unlikely]] { + return BuildConverter(format); + } + return it->second.get(); +} + +class NullConverter : public Converter { +public: + void ConvertTo([[maybe_unused]] std::span input, std::span output) override { + std::fill(output.begin(), output.end(), 0.0f); + } + void ConvertFrom([[maybe_unused]] std::span input, std::span output) override { + const u8 fill_value = 0U; + std::fill(output.begin(), output.end(), fill_value); + } +}; + +Converter* ConverterFactory::BuildConverter(RenderTargetFormat format) { + switch (format) { + case RenderTargetFormat::R32B32G32A32_FLOAT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R32G32B32A32_SINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R32G32B32A32_UINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R16G16B16A16_UNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R16G16B16A16_SNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R16G16B16A16_SINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R16G16B16A16_UINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R16G16B16A16_FLOAT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R32G32_FLOAT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R32G32_SINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R32G32_UINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R16G16B16X16_FLOAT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::A8R8G8B8_UNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::A8R8G8B8_SRGB: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::A2B10G10R10_UNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::A2B10G10R10_UINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::A8B8G8R8_UNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::A8B8G8R8_SRGB: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::A8B8G8R8_SNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::A8B8G8R8_SINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::A8B8G8R8_UINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R16G16_UNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R16G16_SNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R16G16_SINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R16G16_UINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R16G16_FLOAT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::B10G11R11_FLOAT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R32_SINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R32_UINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R32_FLOAT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R5G6B5_UNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::A1R5G5B5_UNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R8G8_UNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R8G8_SNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R8G8_SINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R8G8_UINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R16_UNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R16_SNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R16_SINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R16_UINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R16_FLOAT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R8_UNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R8_SNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R8_SINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R8_UINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + default: { + UNIMPLEMENTED_MSG("This format {} converter is not implemented", format); + return impl->converters_cache.emplace(format, std::make_unique()) + .first->second.get(); + } + } +} + +} // namespace Tegra::Engines::Blitter diff --git a/src/video_core/engines/sw_blitter/converter.h b/src/video_core/engines/sw_blitter/converter.h new file mode 100644 index 000000000..03337e906 --- /dev/null +++ b/src/video_core/engines/sw_blitter/converter.h @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include + +#include "common/common_types.h" + +#pragma once + +#include "video_core/gpu.h" + +namespace Tegra::Engines::Blitter { + +class Converter { +public: + virtual void ConvertTo(std::span input, std::span output) = 0; + virtual void ConvertFrom(std::span input, std::span output) = 0; +}; + +class ConverterFactory { +public: + ConverterFactory(); + ~ConverterFactory(); + + Converter* GetFormatConverter(RenderTargetFormat format); + +private: + Converter* BuildConverter(RenderTargetFormat format); + + struct ConverterFactoryImpl; + std::unique_ptr impl; +}; + +} // namespace Tegra::Engines::Blitter diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 354c6e429..f71a316b6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -466,8 +466,7 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf const Tegra::Engines::Fermi2D::Config& copy_config) { MICROPROFILE_SCOPE(OpenGL_Blits); std::scoped_lock lock{texture_cache.mutex}; - texture_cache.BlitImage(dst, src, copy_config); - return true; + return texture_cache.BlitImage(dst, src, copy_config); } Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 12b13cc59..d8ad8815c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -542,8 +542,7 @@ bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { std::scoped_lock lock{texture_cache.mutex}; - texture_cache.BlitImage(dst, src, copy_config); - return true; + return texture_cache.BlitImage(dst, src, copy_config); } Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8ef75fe73..8e68a2e53 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -506,10 +506,14 @@ void TextureCache

::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz } template -void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, +bool TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Config& copy) { - const BlitImages images = GetBlitImages(dst, src, copy); + const auto result = GetBlitImages(dst, src, copy); + if (!result) { + return false; + } + const BlitImages images = *result; const ImageId dst_id = images.dst_id; const ImageId src_id = images.src_id; @@ -596,6 +600,7 @@ void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, copy.operation); } + return true; } template @@ -1133,7 +1138,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA } template -typename TextureCache

::BlitImages TextureCache

::GetBlitImages( +std::optional::BlitImages> TextureCache

::GetBlitImages( const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Config& copy) { @@ -1154,6 +1159,20 @@ typename TextureCache

::BlitImages TextureCache

::GetBlitImages( has_deleted_images = false; src_id = FindImage(src_info, src_addr, try_options); dst_id = FindImage(dst_info, dst_addr, try_options); + if (!copy.must_accelerate) { + do { + if (!src_id && !dst_id) { + return std::nullopt; + } + if (src_id && True(slot_images[src_id].flags & ImageFlagBits::GpuModified)) { + break; + } + if (dst_id && True(slot_images[dst_id].flags & ImageFlagBits::GpuModified)) { + break; + } + return std::nullopt; + } while (false); + } const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; if (src_image && src_image->info.num_samples > 1) { RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews}; @@ -1194,12 +1213,12 @@ typename TextureCache

::BlitImages TextureCache

::GetBlitImages( dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{}); } while (has_deleted_images); } - return BlitImages{ + return {BlitImages{ .dst_id = dst_id, .src_id = src_id, .dst_format = dst_info.format, .src_format = src_info.format, - }; + }}; } template diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 2fa8445eb..9db7195bf 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -174,7 +174,7 @@ public: void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); /// Blit an image with the given parameters - void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, + bool BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Config& copy); @@ -285,9 +285,9 @@ private: [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); /// Return a blit image pair from the given guest blit parameters - [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, - const Tegra::Engines::Fermi2D::Surface& src, - const Tegra::Engines::Fermi2D::Config& copy); + [[nodiscard]] std::optional GetBlitImages( + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy); /// Find or create a sampler from a guest descriptor sampler [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); From 5fbd6954efbfe3dd95fcd496cd25664c092947fc Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 6 Nov 2022 11:08:22 +0100 Subject: [PATCH 2/5] Fermi2D: Implement Bilinear software filtering and address feedback. --- src/video_core/engines/fermi_2d.cpp | 11 +- src/video_core/engines/sw_blitter/blitter.cpp | 95 ++++++---- src/video_core/engines/sw_blitter/blitter.h | 2 +- .../engines/sw_blitter/converter.cpp | 171 +++++++++++------- src/video_core/engines/sw_blitter/converter.h | 9 +- src/video_core/gpu.h | 4 +- src/video_core/surface.cpp | 2 + 7 files changed, 179 insertions(+), 115 deletions(-) diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 2c722c778..c6478ae85 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -62,11 +62,15 @@ void Fermi2D::Blit() { const auto& args = regs.pixels_from_memory; constexpr s64 null_derivate = 1ULL << 32; + Surface src = regs.src; + const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); + const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 && + src.format != regs.dst.format; Config config{ .operation = regs.operation, .filter = args.sample_mode.filter, - .must_accelerate = args.du_dx != null_derivate || args.dv_dy != null_derivate || - args.sample_mode.filter == Filter::Bilinear, + .must_accelerate = + args.du_dx != null_derivate || args.dv_dy != null_derivate || delegate_to_gpu, .dst_x0 = args.dst_x0, .dst_y0 = args.dst_y0, .dst_x1 = args.dst_x0 + args.dst_width, @@ -76,8 +80,7 @@ void Fermi2D::Blit() { .src_x1 = static_cast((args.du_dx * args.dst_width + args.src_x0) >> 32), .src_y1 = static_cast((args.dv_dy * args.dst_height + args.src_y0) >> 32), }; - Surface src = regs.src; - const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); + const auto need_align_to_pitch = src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch && static_cast(src.width) == config.src_x1 && diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp index caf51cbe3..c923a80e9 100644 --- a/src/video_core/engines/sw_blitter/blitter.cpp +++ b/src/video_core/engines/sw_blitter/blitter.cpp @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later +#include +#include #include #include "video_core/engines/sw_blitter/blitter.h" @@ -22,8 +24,10 @@ using namespace Texture; namespace { -void NeighrestNeighbor(std::span input, std::span output, u32 src_width, u32 src_height, - u32 dst_width, u32 dst_height, size_t bpp) { +constexpr size_t ir_components = 4; + +void NeighrestNeighbor(std::span input, std::span output, u32 src_width, + u32 src_height, u32 dst_width, u32 dst_height, size_t bpp) { const size_t dx_du = std::llround((static_cast(src_width) / dst_width) * (1ULL << 32)); const size_t dy_dv = std::llround((static_cast(src_height) / dst_height) * (1ULL << 32)); size_t src_y = 0; @@ -40,7 +44,7 @@ void NeighrestNeighbor(std::span input, std::span output, u32 src_width, } } -void NeighrestNeighborFast(std::span input, std::span output, u32 src_width, +void NeighrestNeighborFast(std::span input, std::span output, u32 src_width, u32 src_height, u32 dst_width, u32 dst_height) { const size_t dx_du = std::llround((static_cast(src_width) / dst_width) * (1ULL << 32)); const size_t dy_dv = std::llround((static_cast(src_height) / dst_height) * (1ULL << 32)); @@ -48,44 +52,62 @@ void NeighrestNeighborFast(std::span input, std::span output, u32 src_ for (u32 y = 0; y < dst_height; y++) { size_t src_x = 0; for (u32 x = 0; x < dst_width; x++) { - const size_t read_from = ((src_y * src_width + src_x) >> 32) * 4; - const size_t write_to = (y * dst_width + x) * 4; + const size_t read_from = ((src_y * src_width + src_x) >> 32) * ir_components; + const size_t write_to = (y * dst_width + x) * ir_components; - std::memcpy(&output[write_to], &input[read_from], sizeof(f32) * 4); + std::memcpy(&output[write_to], &input[read_from], sizeof(f32) * ir_components); src_x += dx_du; } src_y += dy_dv; } } -/* -void Bilinear(std::span input, std::span output, size_t src_width, - size_t src_height, size_t dst_width, size_t dst_height) { - const auto inv_lerp = [](u32 coord, u32 end) { return -static_cast(std::min(std::max(static_cast(coord), 0), end - 1)) / (end); }; - - +void Bilinear(std::span input, std::span output, size_t src_width, + size_t src_height, size_t dst_width, size_t dst_height) { + const auto bilinear_sample = [](std::span x0_y0, std::span x1_y0, + std::span x0_y1, std::span x1_y1, + f32 weight_x, f32 weight_y) { + std::array result{}; + for (size_t i = 0; i < ir_components; i++) { + const f32 a = std::lerp(x0_y0[i], x1_y0[i], weight_x); + const f32 b = std::lerp(x0_y1[i], x1_y1[i], weight_x); + result[i] = std::lerp(a, b, weight_y); + } + return result; + }; + const f32 dx_du = + dst_width > 1 ? static_cast(src_width - 1) / static_cast(dst_width - 1) : 0.f; + const f32 dy_dv = + dst_height > 1 ? static_cast(src_height - 1) / static_cast(dst_height - 1) : 0.f; for (u32 y = 0; y < dst_height; y++) { - const f32 ty_0 = inv_lerp(y, dst_extent_y); - const f32 ty_1 = inv_lerp(y + 1, dst_extent_y); for (u32 x = 0; x < dst_width; x++) { - const f32 tx_0 = inv_lerp(x, dst_extent_x); - const f32 tx_1 = inv_lerp(x + 1, dst_extent_x); - const std::array get_pixel = [&](f32 tx, f32 ty, u32 width, u32 height) { - std::array result{}; + const f32 x_low = std::floor(static_cast(x) * dx_du); + const f32 y_low = std::floor(static_cast(y) * dy_dv); + const f32 x_high = std::ceil(static_cast(x) * dx_du); + const f32 y_high = std::ceil(static_cast(y) * dy_dv); + const f32 weight_x = (static_cast(x) * dx_du) - x_low; + const f32 weight_y = (static_cast(y) * dy_dv) - y_low; - return (std::llround(width * tx) + std::llround(height * ty) * width) * 4; + const auto read_src = [&](f32 in_x, f32 in_y) { + const size_t read_from = + ((static_cast(in_x) * src_width + static_cast(in_y)) >> 32) * + ir_components; + return std::span(&input[read_from], ir_components); }; - std::array result{}; - const size_t read_from = get_pixel(src_width, src_height); - const size_t write_to = get_pixel(tx_0, ty_0, dst_width, dst_height); + auto x0_y0 = read_src(x_low, y_low); + auto x1_y0 = read_src(x_high, y_low); + auto x0_y1 = read_src(x_low, y_high); + auto x1_y1 = read_src(x_high, y_high); - std::memcpy(&output[write_to], &input[read_from], bpp); + const auto result = bilinear_sample(x0_y0, x1_y0, x0_y1, x1_y1, weight_x, weight_y); + + const size_t write_to = (y * dst_width + x) * ir_components; + + std::memcpy(&output[write_to], &result, sizeof(f32) * ir_components); } } } -*/ } // namespace @@ -107,8 +129,6 @@ SoftwareBlitEngine::~SoftwareBlitEngine() = default; bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, Fermi2D::Config& config) { - UNIMPLEMENTED_IF(config.filter == Fermi2D::Filter::Bilinear); - const auto get_surface_size = [](Fermi2D::Surface& surface, u32 bytes_per_pixel) { if (surface.linear == Fermi2D::MemoryLayout::BlockLinear) { return CalculateSize(true, bytes_per_pixel, surface.width, surface.height, @@ -116,9 +136,9 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, } return static_cast(surface.pitch * surface.height); }; - const auto process_pitch_linear = [](bool unpack, std::span input, std::span output, - u32 extent_x, u32 extent_y, u32 pitch, u32 x0, u32 y0, - size_t bpp) { + const auto process_pitch_linear = [](bool unpack, std::span input, + std::span output, u32 extent_x, u32 extent_y, + u32 pitch, u32 x0, u32 y0, size_t bpp) { const size_t base_offset = x0 * bpp; const size_t copy_size = extent_x * bpp; for (u32 y = y0; y < extent_y; y++) { @@ -157,12 +177,17 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, const auto convertion_phase_ir = [&]() { auto* input_converter = impl->converter_factory.GetFormatConverter(src.format); - impl->intermediate_src.resize((src_copy_size / src_bytes_per_pixel) * 4); - impl->intermediate_dst.resize((dst_copy_size / dst_bytes_per_pixel) * 4); + impl->intermediate_src.resize((src_copy_size / src_bytes_per_pixel) * ir_components); + impl->intermediate_dst.resize((dst_copy_size / dst_bytes_per_pixel) * ir_components); input_converter->ConvertTo(impl->src_buffer, impl->intermediate_src); - NeighrestNeighborFast(impl->intermediate_src, impl->intermediate_dst, src_extent_x, - src_extent_y, dst_extent_x, dst_extent_y); + if (config.filter != Fermi2D::Filter::Bilinear) { + NeighrestNeighborFast(impl->intermediate_src, impl->intermediate_dst, src_extent_x, + src_extent_y, dst_extent_x, dst_extent_y); + } else { + Bilinear(impl->intermediate_src, impl->intermediate_dst, src_extent_x, src_extent_y, + dst_extent_x, dst_extent_y); + } auto* output_converter = impl->converter_factory.GetFormatConverter(dst.format); output_converter->ConvertFrom(impl->intermediate_dst, impl->dst_buffer); @@ -183,7 +208,7 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, // Conversion Phase if (no_passthrough) { - if (src.format != dst.format) { + if (src.format != dst.format || config.filter == Fermi2D::Filter::Bilinear) { convertion_phase_ir(); } else { convertion_phase_same_format(); diff --git a/src/video_core/engines/sw_blitter/blitter.h b/src/video_core/engines/sw_blitter/blitter.h index 3edf40c3e..85b55c836 100644 --- a/src/video_core/engines/sw_blitter/blitter.h +++ b/src/video_core/engines/sw_blitter/blitter.h @@ -13,7 +13,7 @@ namespace Tegra::Engines::Blitter { class SoftwareBlitEngine { public: - SoftwareBlitEngine(MemoryManager& memory_manager_); + explicit SoftwareBlitEngine(MemoryManager& memory_manager_); ~SoftwareBlitEngine(); bool Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, Fermi2D::Config& copy_config); diff --git a/src/video_core/engines/sw_blitter/converter.cpp b/src/video_core/engines/sw_blitter/converter.cpp index 2e376f430..408d87944 100644 --- a/src/video_core/engines/sw_blitter/converter.cpp +++ b/src/video_core/engines/sw_blitter/converter.cpp @@ -139,7 +139,7 @@ struct R32B32G32A32_FLOATTraits { ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; static constexpr std::array component_sizes = {32, 32, 32, 32}; static constexpr std::array component_swizzle = { - Swizzle::A, Swizzle::G, Swizzle::B, Swizzle::R}; + Swizzle::R, Swizzle::B, Swizzle::G, Swizzle::A}; }; struct R32G32B32A32_SINTTraits { @@ -148,7 +148,7 @@ struct R32G32B32A32_SINTTraits { ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; static constexpr std::array component_sizes = {32, 32, 32, 32}; static constexpr std::array component_swizzle = { - Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; }; struct R32G32B32A32_UINTTraits { @@ -157,7 +157,7 @@ struct R32G32B32A32_UINTTraits { ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; static constexpr std::array component_sizes = {32, 32, 32, 32}; static constexpr std::array component_swizzle = { - Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; }; struct R16G16B16A16_UNORMTraits { @@ -166,7 +166,7 @@ struct R16G16B16A16_UNORMTraits { ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; static constexpr std::array component_sizes = {16, 16, 16, 16}; static constexpr std::array component_swizzle = { - Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; }; struct R16G16B16A16_SNORMTraits { @@ -175,7 +175,7 @@ struct R16G16B16A16_SNORMTraits { ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM}; static constexpr std::array component_sizes = {16, 16, 16, 16}; static constexpr std::array component_swizzle = { - Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; }; struct R16G16B16A16_SINTTraits { @@ -184,7 +184,7 @@ struct R16G16B16A16_SINTTraits { ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; static constexpr std::array component_sizes = {16, 16, 16, 16}; static constexpr std::array component_swizzle = { - Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; }; struct R16G16B16A16_UINTTraits { @@ -193,7 +193,7 @@ struct R16G16B16A16_UINTTraits { ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; static constexpr std::array component_sizes = {16, 16, 16, 16}; static constexpr std::array component_swizzle = { - Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; }; struct R16G16B16A16_FLOATTraits { @@ -202,7 +202,7 @@ struct R16G16B16A16_FLOATTraits { ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; static constexpr std::array component_sizes = {16, 16, 16, 16}; static constexpr std::array component_swizzle = { - Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; }; struct R32G32_FLOATTraits { @@ -210,8 +210,8 @@ struct R32G32_FLOATTraits { static constexpr std::array component_types = { ComponentType::FLOAT, ComponentType::FLOAT}; static constexpr std::array component_sizes = {32, 32}; - static constexpr std::array component_swizzle = {Swizzle::G, - Swizzle::R}; + static constexpr std::array component_swizzle = {Swizzle::R, + Swizzle::G}; }; struct R32G32_SINTTraits { @@ -219,8 +219,8 @@ struct R32G32_SINTTraits { static constexpr std::array component_types = { ComponentType::SINT, ComponentType::SINT}; static constexpr std::array component_sizes = {32, 32}; - static constexpr std::array component_swizzle = {Swizzle::G, - Swizzle::R}; + static constexpr std::array component_swizzle = {Swizzle::R, + Swizzle::G}; }; struct R32G32_UINTTraits { @@ -228,8 +228,8 @@ struct R32G32_UINTTraits { static constexpr std::array component_types = { ComponentType::UINT, ComponentType::UINT}; static constexpr std::array component_sizes = {32, 32}; - static constexpr std::array component_swizzle = {Swizzle::G, - Swizzle::R}; + static constexpr std::array component_swizzle = {Swizzle::R, + Swizzle::G}; }; struct R16G16B16X16_FLOATTraits { @@ -238,7 +238,7 @@ struct R16G16B16X16_FLOATTraits { ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; static constexpr std::array component_sizes = {16, 16, 16, 16}; static constexpr std::array component_swizzle = { - Swizzle::None, Swizzle::B, Swizzle::G, Swizzle::R}; + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None}; }; struct A8R8G8B8_UNORMTraits { @@ -247,7 +247,7 @@ struct A8R8G8B8_UNORMTraits { ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; static constexpr std::array component_sizes = {8, 8, 8, 8}; static constexpr std::array component_swizzle = { - Swizzle::B, Swizzle::G, Swizzle::R, Swizzle::A}; + Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B}; }; struct A8R8G8B8_SRGBTraits { @@ -256,25 +256,25 @@ struct A8R8G8B8_SRGBTraits { ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB}; static constexpr std::array component_sizes = {8, 8, 8, 8}; static constexpr std::array component_swizzle = { - Swizzle::B, Swizzle::G, Swizzle::R, Swizzle::A}; + Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B}; }; struct A2B10G10R10_UNORMTraits { static constexpr size_t num_components = 4; static constexpr std::array component_types = { ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; - static constexpr std::array component_sizes = {10, 10, 10, 2}; + static constexpr std::array component_sizes = {2, 10, 10, 10}; static constexpr std::array component_swizzle = { - Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; + Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; }; struct A2B10G10R10_UINTTraits { static constexpr size_t num_components = 4; static constexpr std::array component_types = { ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; - static constexpr std::array component_sizes = {10, 10, 10, 2}; + static constexpr std::array component_sizes = {2, 10, 10, 10}; static constexpr std::array component_swizzle = { - Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; + Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; }; struct A8B8G8R8_UNORMTraits { @@ -283,7 +283,7 @@ struct A8B8G8R8_UNORMTraits { ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; static constexpr std::array component_sizes = {8, 8, 8, 8}; static constexpr std::array component_swizzle = { - Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; + Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; }; struct A8B8G8R8_SRGBTraits { @@ -292,7 +292,7 @@ struct A8B8G8R8_SRGBTraits { ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB}; static constexpr std::array component_sizes = {8, 8, 8, 8}; static constexpr std::array component_swizzle = { - Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; + Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; }; struct A8B8G8R8_SNORMTraits { @@ -301,7 +301,7 @@ struct A8B8G8R8_SNORMTraits { ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM}; static constexpr std::array component_sizes = {8, 8, 8, 8}; static constexpr std::array component_swizzle = { - Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; + Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; }; struct A8B8G8R8_SINTTraits { @@ -310,7 +310,7 @@ struct A8B8G8R8_SINTTraits { ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; static constexpr std::array component_sizes = {8, 8, 8, 8}; static constexpr std::array component_swizzle = { - Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; + Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; }; struct A8B8G8R8_UINTTraits { @@ -319,7 +319,7 @@ struct A8B8G8R8_UINTTraits { ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; static constexpr std::array component_sizes = {8, 8, 8, 8}; static constexpr std::array component_swizzle = { - Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; + Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; }; struct R16G16_UNORMTraits { @@ -327,8 +327,8 @@ struct R16G16_UNORMTraits { static constexpr std::array component_types = { ComponentType::UNORM, ComponentType::UNORM}; static constexpr std::array component_sizes = {16, 16}; - static constexpr std::array component_swizzle = {Swizzle::G, - Swizzle::R}; + static constexpr std::array component_swizzle = {Swizzle::R, + Swizzle::G}; }; struct R16G16_SNORMTraits { @@ -336,8 +336,8 @@ struct R16G16_SNORMTraits { static constexpr std::array component_types = { ComponentType::SNORM, ComponentType::SNORM}; static constexpr std::array component_sizes = {16, 16}; - static constexpr std::array component_swizzle = {Swizzle::G, - Swizzle::R}; + static constexpr std::array component_swizzle = {Swizzle::R, + Swizzle::G}; }; struct R16G16_SINTTraits { @@ -345,8 +345,8 @@ struct R16G16_SINTTraits { static constexpr std::array component_types = { ComponentType::SINT, ComponentType::SINT}; static constexpr std::array component_sizes = {16, 16}; - static constexpr std::array component_swizzle = {Swizzle::G, - Swizzle::R}; + static constexpr std::array component_swizzle = {Swizzle::R, + Swizzle::G}; }; struct R16G16_UINTTraits { @@ -354,8 +354,8 @@ struct R16G16_UINTTraits { static constexpr std::array component_types = { ComponentType::UINT, ComponentType::UINT}; static constexpr std::array component_sizes = {16, 16}; - static constexpr std::array component_swizzle = {Swizzle::G, - Swizzle::R}; + static constexpr std::array component_swizzle = {Swizzle::R, + Swizzle::G}; }; struct R16G16_FLOATTraits { @@ -363,17 +363,17 @@ struct R16G16_FLOATTraits { static constexpr std::array component_types = { ComponentType::FLOAT, ComponentType::FLOAT}; static constexpr std::array component_sizes = {16, 16}; - static constexpr std::array component_swizzle = {Swizzle::G, - Swizzle::R}; + static constexpr std::array component_swizzle = {Swizzle::R, + Swizzle::G}; }; struct B10G11R11_FLOATTraits { static constexpr size_t num_components = 3; static constexpr std::array component_types = { ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; - static constexpr std::array component_sizes = {11, 11, 10}; + static constexpr std::array component_sizes = {10, 11, 11}; static constexpr std::array component_swizzle = { - Swizzle::R, Swizzle::G, Swizzle::B}; + Swizzle::B, Swizzle::G, Swizzle::R}; }; struct R32_SINTTraits { @@ -400,22 +400,40 @@ struct R32_FLOATTraits { static constexpr std::array component_swizzle = {Swizzle::R}; }; +struct X8R8G8B8_UNORMTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; + static constexpr std::array component_sizes = {8, 8, 8, 8}; + static constexpr std::array component_swizzle = { + Swizzle::None, Swizzle::R, Swizzle::G, Swizzle::B}; +}; + +struct X8R8G8B8_SRGBTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB}; + static constexpr std::array component_sizes = {8, 8, 8, 8}; + static constexpr std::array component_swizzle = { + Swizzle::None, Swizzle::R, Swizzle::G, Swizzle::B}; +}; + struct R5G6B5_UNORMTraits { static constexpr size_t num_components = 3; static constexpr std::array component_types = { ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; static constexpr std::array component_sizes = {5, 6, 5}; static constexpr std::array component_swizzle = { - Swizzle::B, Swizzle::G, Swizzle::R}; + Swizzle::R, Swizzle::G, Swizzle::B}; }; struct A1R5G5B5_UNORMTraits { static constexpr size_t num_components = 4; static constexpr std::array component_types = { ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; - static constexpr std::array component_sizes = {5, 5, 5, 1}; + static constexpr std::array component_sizes = {1, 5, 5, 5}; static constexpr std::array component_swizzle = { - Swizzle::B, Swizzle::G, Swizzle::R, Swizzle::A}; + Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B}; }; struct R8G8_UNORMTraits { @@ -423,8 +441,8 @@ struct R8G8_UNORMTraits { static constexpr std::array component_types = { ComponentType::UNORM, ComponentType::UNORM}; static constexpr std::array component_sizes = {8, 8}; - static constexpr std::array component_swizzle = {Swizzle::G, - Swizzle::R}; + static constexpr std::array component_swizzle = {Swizzle::R, + Swizzle::G}; }; struct R8G8_SNORMTraits { @@ -432,8 +450,8 @@ struct R8G8_SNORMTraits { static constexpr std::array component_types = { ComponentType::SNORM, ComponentType::SNORM}; static constexpr std::array component_sizes = {8, 8}; - static constexpr std::array component_swizzle = {Swizzle::G, - Swizzle::R}; + static constexpr std::array component_swizzle = {Swizzle::R, + Swizzle::G}; }; struct R8G8_SINTTraits { @@ -441,8 +459,8 @@ struct R8G8_SINTTraits { static constexpr std::array component_types = { ComponentType::SINT, ComponentType::SINT}; static constexpr std::array component_sizes = {8, 8}; - static constexpr std::array component_swizzle = {Swizzle::G, - Swizzle::R}; + static constexpr std::array component_swizzle = {Swizzle::R, + Swizzle::G}; }; struct R8G8_UINTTraits { @@ -450,8 +468,8 @@ struct R8G8_UINTTraits { static constexpr std::array component_types = { ComponentType::UINT, ComponentType::UINT}; static constexpr std::array component_sizes = {8, 8}; - static constexpr std::array component_swizzle = {Swizzle::G, - Swizzle::R}; + static constexpr std::array component_swizzle = {Swizzle::R, + Swizzle::G}; }; struct R16_UNORMTraits { @@ -611,7 +629,7 @@ private: constexpr size_t fp16_mantissa_bits = 10; constexpr size_t mantissa_mask = ~((1ULL << (fp32_mantissa_bits - fp16_mantissa_bits)) - 1ULL); - tmp = tmp & mantissa_mask; + tmp = tmp & static_cast(mantissa_mask); // TODO: force the exponent within the range of half float. Not needed in UNORM / SNORM return std::bit_cast(tmp); }; @@ -624,12 +642,13 @@ private: }; const auto calculate_snorm = [&]() { return static_cast( - static_cast(sign_extend(value, component_sizes[which_component])) / - ((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL)); + static_cast(sign_extend(value, component_sizes[which_component])) / + static_cast((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL)); }; const auto calculate_unorm = [&]() { - return static_cast(static_cast(value) / - ((1ULL << (component_sizes[which_component])) - 1ULL)); + return static_cast( + static_cast(value) / + static_cast((1ULL << (component_sizes[which_component])) - 1ULL)); }; if constexpr (component_types[which_component] == ComponentType::SNORM) { out_component = calculate_snorm(); @@ -688,14 +707,15 @@ private: return tmp_value >> shift_towards; }; const auto calculate_unorm = [&]() { - return static_cast(static_cast(in_component) * - ((1ULL << (component_sizes[which_component])) - 1ULL)); + return static_cast( + static_cast(in_component) * + static_cast((1ULL << (component_sizes[which_component])) - 1ULL)); }; if constexpr (component_types[which_component] == ComponentType::SNORM || component_types[which_component] == ComponentType::SNORM_FORCE_FP16) { - s32 tmp_word = - static_cast(static_cast(in_component) * - ((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL)); + s32 tmp_word = static_cast( + static_cast(in_component) * + static_cast((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL)); insert_to_word(tmp_word); } else if constexpr (component_types[which_component] == ComponentType::UNORM || @@ -714,11 +734,12 @@ private: insert_to_word(tmp_word); } else if constexpr (component_sizes[which_component] == 16) { static constexpr u32 sign_mask = 0x8000; - static constexpr u32 mantissa_mask = 0x8000; + static constexpr u32 mantissa_mask = 0x03ff; + static constexpr u32 exponent_mask = 0x7c00; const u32 tmp_word = std::bit_cast(in_component); const u32 half = ((tmp_word >> 16) & sign_mask) | - ((((tmp_word & 0x7f800000) - 0x38000000) >> 13) & 0x7c00) | - ((tmp_word >> 13) & 0x03ff); + ((((tmp_word & 0x7f800000) - 0x38000000) >> 13) & exponent_mask) | + ((tmp_word >> 13) & mantissa_mask); insert_to_word(half); } else { insert_to_word(to_fp_n(in_component, component_sizes[which_component], @@ -740,7 +761,7 @@ private: } public: - void ConvertTo(std::span input, std::span output) override { + void ConvertTo(std::span input, std::span output) override { const size_t num_pixels = output.size() / components_per_ir_rep; for (size_t pixel = 0; pixel < num_pixels; pixel++) { std::array words{}; @@ -790,11 +811,11 @@ public: } } - void ConvertFrom(std::span input, std::span output) override { + void ConvertFrom(std::span input, std::span output) override { const size_t num_pixels = output.size() / total_bytes_per_pixel; for (size_t pixel = 0; pixel < num_pixels; pixel++) { - std::span old_components(&input[pixel * components_per_ir_rep], - components_per_ir_rep); + std::span old_components(&input[pixel * components_per_ir_rep], + components_per_ir_rep); std::array words{}; if constexpr (component_swizzle[0] != Swizzle::None) { ConvertFromComponent<0>(words[bound_words[0]], @@ -827,7 +848,7 @@ public: } ConverterImpl() = default; - ~ConverterImpl() = default; + ~ConverterImpl() override = default; }; struct ConverterFactory::ConverterFactoryImpl { @@ -850,13 +871,15 @@ Converter* ConverterFactory::GetFormatConverter(RenderTargetFormat format) { class NullConverter : public Converter { public: - void ConvertTo([[maybe_unused]] std::span input, std::span output) override { + void ConvertTo([[maybe_unused]] std::span input, std::span output) override { std::fill(output.begin(), output.end(), 0.0f); } - void ConvertFrom([[maybe_unused]] std::span input, std::span output) override { + void ConvertFrom([[maybe_unused]] std::span input, std::span output) override { const u8 fill_value = 0U; std::fill(output.begin(), output.end(), fill_value); } + NullConverter() = default; + ~NullConverter() = default; }; Converter* ConverterFactory::BuildConverter(RenderTargetFormat format) { @@ -1011,6 +1034,16 @@ Converter* ConverterFactory::BuildConverter(RenderTargetFormat format) { .emplace(format, std::make_unique>()) .first->second.get(); break; + case RenderTargetFormat::X8R8G8B8_UNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::X8R8G8B8_SRGB: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; case RenderTargetFormat::R5G6B5_UNORM: return impl->converters_cache .emplace(format, std::make_unique>()) diff --git a/src/video_core/engines/sw_blitter/converter.h b/src/video_core/engines/sw_blitter/converter.h index 03337e906..f9bdc516e 100644 --- a/src/video_core/engines/sw_blitter/converter.h +++ b/src/video_core/engines/sw_blitter/converter.h @@ -1,21 +1,22 @@ // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later +#pragma once + #include #include #include "common/common_types.h" -#pragma once - #include "video_core/gpu.h" namespace Tegra::Engines::Blitter { class Converter { public: - virtual void ConvertTo(std::span input, std::span output) = 0; - virtual void ConvertFrom(std::span input, std::span output) = 0; + virtual void ConvertTo(std::span input, std::span output) = 0; + virtual void ConvertFrom(std::span input, std::span output) = 0; + virtual ~Converter() = default; }; class ConverterFactory { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index d0709dc69..87ebf2054 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -61,8 +61,8 @@ enum class RenderTargetFormat : u32 { R32_SINT = 0xE3, R32_UINT = 0xE4, R32_FLOAT = 0xE5, - // X8R8G8B8_UNORM = 0xE6, - // X8R8G8B8_SRGB = 0xE7, + X8R8G8B8_UNORM = 0xE6, + X8R8G8B8_SRGB = 0xE7, R5G6B5_UNORM = 0xE8, A1R5G5B5_UNORM = 0xE9, R8G8_UNORM = 0xEA, diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 6bd133d10..80a7d908f 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -118,8 +118,10 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) case Tegra::RenderTargetFormat::R16G16B16X16_FLOAT: return PixelFormat::R16G16B16X16_FLOAT; case Tegra::RenderTargetFormat::A8R8G8B8_UNORM: + case Tegra::RenderTargetFormat::X8R8G8B8_UNORM: return PixelFormat::B8G8R8A8_UNORM; case Tegra::RenderTargetFormat::A8R8G8B8_SRGB: + case Tegra::RenderTargetFormat::X8R8G8B8_SRGB: return PixelFormat::B8G8R8A8_SRGB; case Tegra::RenderTargetFormat::A2B10G10R10_UNORM: return PixelFormat::A2B10G10R10_UNORM; From daf2ef8f1c4eba1117e3ce0c49f59f934f26ae88 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 6 Nov 2022 12:50:54 +0100 Subject: [PATCH 3/5] MaxwellDMA: Implement BlockLinear to BlockLinear copies. --- src/video_core/engines/maxwell_dma.cpp | 67 +++++++++++++++++++++++++- src/video_core/engines/maxwell_dma.h | 3 ++ 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 1bf6ca2dd..334429514 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -62,7 +62,8 @@ void MaxwellDMA::Launch() { if (!is_src_pitch && !is_dst_pitch) { // If both the source and the destination are in block layout, assert. - UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented"); + CopyBlockLinearToBlockLinear(); + ReleaseSemaphore(); return; } @@ -291,6 +292,70 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); } +void MaxwellDMA::CopyBlockLinearToBlockLinear() { + UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); + + const bool is_remapping = regs.launch_dma.remap_enable != 0; + + // Deswizzle the input and copy it over. + const Parameters& src = regs.src_params; + const Parameters& dst = regs.dst_params; + + const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; + const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; + + const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size; + + u32 src_width = src.width; + u32 dst_width = dst.width; + u32 x_elements = regs.line_length_in; + u32 src_x_offset = src.origin.x; + u32 dst_x_offset = dst.origin.x; + u32 bpp_shift = 0U; + if (!is_remapping) { + bpp_shift = Common::FoldRight( + 4U, [](u32 x, u32 y) { return std::min(x, static_cast(std::countr_zero(y))); }, + src_width, dst_width, x_elements, src_x_offset, dst_x_offset, + static_cast(regs.offset_in), static_cast(regs.offset_out)); + src_width >>= bpp_shift; + dst_width >>= bpp_shift; + x_elements >>= bpp_shift; + src_x_offset >>= bpp_shift; + dst_x_offset >>= bpp_shift; + } + + const u32 bytes_per_pixel = base_bpp << bpp_shift; + const size_t src_size = CalculateSize(true, bytes_per_pixel, src_width, src.height, src.depth, + src.block_size.height, src.block_size.depth); + const size_t dst_size = CalculateSize(true, bytes_per_pixel, dst_width, dst.height, dst.depth, + dst.block_size.height, dst.block_size.depth); + + const u32 pitch = x_elements * bytes_per_pixel; + const size_t mid_buffer_size = pitch * regs.line_count; + + if (read_buffer.size() < src_size) { + read_buffer.resize(src_size); + } + if (write_buffer.size() < dst_size) { + write_buffer.resize(dst_size); + } + + intermediate_buffer.resize(mid_buffer_size); + + memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); + memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); + + UnswizzleSubrect(intermediate_buffer, read_buffer, bytes_per_pixel, src_width, src.height, + src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count, + src.block_size.height, src.block_size.depth, pitch); + + SwizzleSubrect(write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height, + dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, + dst.block_size.height, dst.block_size.depth, pitch); + + memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); +} + void MaxwellDMA::ReleaseSemaphore() { const auto type = regs.launch_dma.semaphore_type; const GPUVAddr address = regs.semaphore.address; diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 953e34adc..d40d3d302 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -223,6 +223,8 @@ private: void CopyPitchToBlockLinear(); + void CopyBlockLinearToBlockLinear(); + void FastCopyBlockLinearToPitch(); void ReleaseSemaphore(); @@ -234,6 +236,7 @@ private: std::vector read_buffer; std::vector write_buffer; + std::vector intermediate_buffer; static constexpr std::size_t NUM_REGS = 0x800; struct Regs { From 7356ab1de6ab7336da426b9176daafb3ebb503f5 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 6 Nov 2022 15:19:08 +0100 Subject: [PATCH 4/5] GPU: Implement additional render target formats. --- .../engines/sw_blitter/converter.cpp | 106 +++++++++++++++++- src/video_core/gpu.h | 14 +-- .../renderer_opengl/maxwell_to_gl.h | 1 + .../renderer_vulkan/maxwell_to_vk.cpp | 1 + src/video_core/surface.cpp | 10 +- src/video_core/surface.h | 4 + src/video_core/texture_cache/formatter.h | 2 + 7 files changed, 126 insertions(+), 12 deletions(-) diff --git a/src/video_core/engines/sw_blitter/converter.cpp b/src/video_core/engines/sw_blitter/converter.cpp index 408d87944..37c5eff69 100644 --- a/src/video_core/engines/sw_blitter/converter.cpp +++ b/src/video_core/engines/sw_blitter/converter.cpp @@ -133,13 +133,13 @@ constexpr std::array RGB_TO_SRGB_LUT = { } // namespace -struct R32B32G32A32_FLOATTraits { +struct R32G32B32A32_FLOATTraits { static constexpr size_t num_components = 4; static constexpr std::array component_types = { ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; static constexpr std::array component_sizes = {32, 32, 32, 32}; static constexpr std::array component_swizzle = { - Swizzle::R, Swizzle::B, Swizzle::G, Swizzle::A}; + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; }; struct R32G32B32A32_SINTTraits { @@ -160,6 +160,33 @@ struct R32G32B32A32_UINTTraits { Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; }; +struct R32G32B32X32_FLOATTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; + static constexpr std::array component_sizes = {32, 32, 32, 32}; + static constexpr std::array component_swizzle = { + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None}; +}; + +struct R32G32B32X32_SINTTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; + static constexpr std::array component_sizes = {32, 32, 32, 32}; + static constexpr std::array component_swizzle = { + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None}; +}; + +struct R32G32B32X32_UINTTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; + static constexpr std::array component_sizes = {32, 32, 32, 32}; + static constexpr std::array component_swizzle = { + Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None}; +}; + struct R16G16B16A16_UNORMTraits { static constexpr size_t num_components = 4; static constexpr std::array component_types = { @@ -277,6 +304,15 @@ struct A2B10G10R10_UINTTraits { Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; }; +struct A2R10G10B10_UNORMTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; + static constexpr std::array component_sizes = {2, 10, 10, 10}; + static constexpr std::array component_swizzle = { + Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B}; +}; + struct A8B8G8R8_UNORMTraits { static constexpr size_t num_components = 4; static constexpr std::array component_types = { @@ -544,6 +580,33 @@ struct R8_UINTTraits { static constexpr std::array component_swizzle = {Swizzle::R}; }; +struct X1R5G5B5_UNORMTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; + static constexpr std::array component_sizes = {1, 5, 5, 5}; + static constexpr std::array component_swizzle = { + Swizzle::None, Swizzle::R, Swizzle::G, Swizzle::B}; +}; + +struct X8B8G8R8_UNORMTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; + static constexpr std::array component_sizes = {8, 8, 8, 8}; + static constexpr std::array component_swizzle = { + Swizzle::None, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct X8B8G8R8_SRGBTraits { + static constexpr size_t num_components = 4; + static constexpr std::array component_types = { + ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB}; + static constexpr std::array component_sizes = {8, 8, 8, 8}; + static constexpr std::array component_swizzle = { + Swizzle::None, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + template class ConverterImpl : public Converter { private: @@ -884,9 +947,9 @@ public: Converter* ConverterFactory::BuildConverter(RenderTargetFormat format) { switch (format) { - case RenderTargetFormat::R32B32G32A32_FLOAT: + case RenderTargetFormat::R32G32B32A32_FLOAT: return impl->converters_cache - .emplace(format, std::make_unique>()) + .emplace(format, std::make_unique>()) .first->second.get(); break; case RenderTargetFormat::R32G32B32A32_SINT: @@ -899,6 +962,21 @@ Converter* ConverterFactory::BuildConverter(RenderTargetFormat format) { .emplace(format, std::make_unique>()) .first->second.get(); break; + case RenderTargetFormat::R32G32B32X32_FLOAT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R32G32B32X32_SINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::R32G32B32X32_UINT: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; case RenderTargetFormat::R16G16B16A16_UNORM: return impl->converters_cache .emplace(format, std::make_unique>()) @@ -964,6 +1042,11 @@ Converter* ConverterFactory::BuildConverter(RenderTargetFormat format) { .emplace(format, std::make_unique>()) .first->second.get(); break; + case RenderTargetFormat::A2R10G10B10_UNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; case RenderTargetFormat::A8B8G8R8_UNORM: return impl->converters_cache .emplace(format, std::make_unique>()) @@ -1119,6 +1202,21 @@ Converter* ConverterFactory::BuildConverter(RenderTargetFormat format) { .emplace(format, std::make_unique>()) .first->second.get(); break; + case RenderTargetFormat::X1R5G5B5_UNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::X8B8G8R8_UNORM: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; + case RenderTargetFormat::X8B8G8R8_SRGB: + return impl->converters_cache + .emplace(format, std::make_unique>()) + .first->second.get(); + break; default: { UNIMPLEMENTED_MSG("This format {} converter is not implemented", format); return impl->converters_cache.emplace(format, std::make_unique()) diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 87ebf2054..8a871593a 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -27,12 +27,12 @@ struct CommandList; // TODO: Implement the commented ones enum class RenderTargetFormat : u32 { NONE = 0x0, - R32B32G32A32_FLOAT = 0xC0, + R32G32B32A32_FLOAT = 0xC0, R32G32B32A32_SINT = 0xC1, R32G32B32A32_UINT = 0xC2, - // R32G32B32X32_FLOAT = 0xC3, - // R32G32B32X32_SINT = 0xC4, - // R32G32B32X32_UINT = 0xC5, + R32G32B32X32_FLOAT = 0xC3, + R32G32B32X32_SINT = 0xC4, + R32G32B32X32_UINT = 0xC5, R16G16B16A16_UNORM = 0xC6, R16G16B16A16_SNORM = 0xC7, R16G16B16A16_SINT = 0xC8, @@ -56,7 +56,7 @@ enum class RenderTargetFormat : u32 { R16G16_SINT = 0xDC, R16G16_UINT = 0xDD, R16G16_FLOAT = 0xDE, - // A2R10G10B10_UNORM = 0xDF, + A2R10G10B10_UNORM = 0xDF, B10G11R11_FLOAT = 0xE0, R32_SINT = 0xE3, R32_UINT = 0xE4, @@ -79,11 +79,11 @@ enum class RenderTargetFormat : u32 { R8_SINT = 0xF5, R8_UINT = 0xF6, - /* - A8_UNORM = 0xF7, + // A8_UNORM = 0xF7, X1R5G5B5_UNORM = 0xF8, X8B8G8R8_UNORM = 0xF9, X8B8G8R8_SRGB = 0xFA, + /* Z1R5G5B5_UNORM = 0xFB, O1R5G5B5_UNORM = 0xFC, Z8R8G8B8_UNORM = 0xFD, diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index e14f9b2db..ef1190e1f 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -28,6 +28,7 @@ constexpr std::array FORMAT_TAB {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2R10G10B10_UNORM {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // A5B5G5R1_UNORM {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 5c156087b..1da53f203 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -125,6 +125,7 @@ struct FormatTuple { {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT + {VK_FORMAT_A2R10G10B10_UNORM_PACK32, Attachable | Storage}, // A2R10G10B10_UNORM {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle) {VK_FORMAT_R5G5B5A1_UNORM_PACK16}, // A5B5G5R1_UNORM (specially swizzled) {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 80a7d908f..b618e1a25 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -93,11 +93,14 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { switch (format) { - case Tegra::RenderTargetFormat::R32B32G32A32_FLOAT: + case Tegra::RenderTargetFormat::R32G32B32A32_FLOAT: + case Tegra::RenderTargetFormat::R32G32B32X32_FLOAT: return PixelFormat::R32G32B32A32_FLOAT; case Tegra::RenderTargetFormat::R32G32B32A32_SINT: + case Tegra::RenderTargetFormat::R32G32B32X32_SINT: return PixelFormat::R32G32B32A32_SINT; case Tegra::RenderTargetFormat::R32G32B32A32_UINT: + case Tegra::RenderTargetFormat::R32G32B32X32_UINT: return PixelFormat::R32G32B32A32_UINT; case Tegra::RenderTargetFormat::R16G16B16A16_UNORM: return PixelFormat::R16G16B16A16_UNORM; @@ -127,9 +130,13 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) return PixelFormat::A2B10G10R10_UNORM; case Tegra::RenderTargetFormat::A2B10G10R10_UINT: return PixelFormat::A2B10G10R10_UINT; + case Tegra::RenderTargetFormat::A2R10G10B10_UNORM: + return PixelFormat::A2R10G10B10_UNORM; case Tegra::RenderTargetFormat::A8B8G8R8_UNORM: + case Tegra::RenderTargetFormat::X8B8G8R8_UNORM: return PixelFormat::A8B8G8R8_UNORM; case Tegra::RenderTargetFormat::A8B8G8R8_SRGB: + case Tegra::RenderTargetFormat::X8B8G8R8_SRGB: return PixelFormat::A8B8G8R8_SRGB; case Tegra::RenderTargetFormat::A8B8G8R8_SNORM: return PixelFormat::A8B8G8R8_SNORM; @@ -158,6 +165,7 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) case Tegra::RenderTargetFormat::R5G6B5_UNORM: return PixelFormat::R5G6B5_UNORM; case Tegra::RenderTargetFormat::A1R5G5B5_UNORM: + case Tegra::RenderTargetFormat::X1R5G5B5_UNORM: return PixelFormat::A1R5G5B5_UNORM; case Tegra::RenderTargetFormat::R8G8_UNORM: return PixelFormat::R8G8_UNORM; diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 57ca7f597..44b79af20 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -23,6 +23,7 @@ enum class PixelFormat { A1R5G5B5_UNORM, A2B10G10R10_UNORM, A2B10G10R10_UINT, + A2R10G10B10_UNORM, A1B5G5R5_UNORM, A5B5G5R1_UNORM, R8_UNORM, @@ -159,6 +160,7 @@ constexpr std::array BLOCK_WIDTH_TABLE = {{ 1, // A1R5G5B5_UNORM 1, // A2B10G10R10_UNORM 1, // A2B10G10R10_UINT + 1, // A2R10G10B10_UNORM 1, // A1B5G5R5_UNORM 1, // A5B5G5R1_UNORM 1, // R8_UNORM @@ -264,6 +266,7 @@ constexpr std::array BLOCK_HEIGHT_TABLE = {{ 1, // A1R5G5B5_UNORM 1, // A2B10G10R10_UNORM 1, // A2B10G10R10_UINT + 1, // A2R10G10B10_UNORM 1, // A1B5G5R5_UNORM 1, // A5B5G5R1_UNORM 1, // R8_UNORM @@ -369,6 +372,7 @@ constexpr std::array BITS_PER_BLOCK_TABLE = {{ 16, // A1R5G5B5_UNORM 32, // A2B10G10R10_UNORM 32, // A2B10G10R10_UINT + 32, // A2R10G10B10_UNORM 16, // A1B5G5R5_UNORM 16, // A5B5G5R1_UNORM 8, // R8_UNORM diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index acc854715..f1f0a057b 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -35,6 +35,8 @@ struct fmt::formatter : fmt::formatter Date: Thu, 24 Nov 2022 20:45:06 +0100 Subject: [PATCH 5/5] Fermi2D: Cleanup and address feedback. --- src/video_core/engines/sw_blitter/blitter.cpp | 16 +-- .../engines/sw_blitter/converter.cpp | 6 + .../engines/sw_blitter/generate_converters.py | 136 ++++++++++++++++++ 3 files changed, 150 insertions(+), 8 deletions(-) create mode 100644 src/video_core/engines/sw_blitter/generate_converters.py diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp index c923a80e9..2f1ea4626 100644 --- a/src/video_core/engines/sw_blitter/blitter.cpp +++ b/src/video_core/engines/sw_blitter/blitter.cpp @@ -26,8 +26,8 @@ namespace { constexpr size_t ir_components = 4; -void NeighrestNeighbor(std::span input, std::span output, u32 src_width, - u32 src_height, u32 dst_width, u32 dst_height, size_t bpp) { +void NearestNeighbor(std::span input, std::span output, u32 src_width, u32 src_height, + u32 dst_width, u32 dst_height, size_t bpp) { const size_t dx_du = std::llround((static_cast(src_width) / dst_width) * (1ULL << 32)); const size_t dy_dv = std::llround((static_cast(src_height) / dst_height) * (1ULL << 32)); size_t src_y = 0; @@ -44,8 +44,8 @@ void NeighrestNeighbor(std::span input, std::span output, u32 src_ } } -void NeighrestNeighborFast(std::span input, std::span output, u32 src_width, - u32 src_height, u32 dst_width, u32 dst_height) { +void NearestNeighborFast(std::span input, std::span output, u32 src_width, + u32 src_height, u32 dst_width, u32 dst_height) { const size_t dx_du = std::llround((static_cast(src_width) / dst_width) * (1ULL << 32)); const size_t dy_dv = std::llround((static_cast(src_height) / dst_height) * (1ULL << 32)); size_t src_y = 0; @@ -171,8 +171,8 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, src.format != dst.format || src_extent_x != dst_extent_x || src_extent_y != dst_extent_y; const auto convertion_phase_same_format = [&]() { - NeighrestNeighbor(impl->src_buffer, impl->dst_buffer, src_extent_x, src_extent_y, - dst_extent_x, dst_extent_y, dst_bytes_per_pixel); + NearestNeighbor(impl->src_buffer, impl->dst_buffer, src_extent_x, src_extent_y, + dst_extent_x, dst_extent_y, dst_bytes_per_pixel); }; const auto convertion_phase_ir = [&]() { @@ -182,8 +182,8 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, input_converter->ConvertTo(impl->src_buffer, impl->intermediate_src); if (config.filter != Fermi2D::Filter::Bilinear) { - NeighrestNeighborFast(impl->intermediate_src, impl->intermediate_dst, src_extent_x, - src_extent_y, dst_extent_x, dst_extent_y); + NearestNeighborFast(impl->intermediate_src, impl->intermediate_dst, src_extent_x, + src_extent_y, dst_extent_x, dst_extent_y); } else { Bilinear(impl->intermediate_src, impl->intermediate_dst, src_extent_x, src_extent_y, dst_extent_x, dst_extent_y); diff --git a/src/video_core/engines/sw_blitter/converter.cpp b/src/video_core/engines/sw_blitter/converter.cpp index 37c5eff69..cd46dfd4f 100644 --- a/src/video_core/engines/sw_blitter/converter.cpp +++ b/src/video_core/engines/sw_blitter/converter.cpp @@ -41,6 +41,12 @@ enum class ComponentType : u32 { namespace { +/* + * Note: Use generate_converters.py to generate the structs and searches for new render target + * formats and copy paste them to this file in order to update. just call "python + * generate_converters.py" and get the code from the output. modify the file to add new formats. + */ + constexpr std::array SRGB_TO_RGB_LUT = { 0.000000e+00f, 3.035270e-04f, 6.070540e-04f, 9.105810e-04f, 1.214108e-03f, 1.517635e-03f, 1.821162e-03f, 2.124689e-03f, 2.428216e-03f, 2.731743e-03f, 3.035270e-03f, 3.346536e-03f, diff --git a/src/video_core/engines/sw_blitter/generate_converters.py b/src/video_core/engines/sw_blitter/generate_converters.py new file mode 100644 index 000000000..f641564f7 --- /dev/null +++ b/src/video_core/engines/sw_blitter/generate_converters.py @@ -0,0 +1,136 @@ +# SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +# SPDX-License-Identifier: GPL-3.0-or-later + +import re + +class Format: + def __init__(self, string_value): + self.name = string_value + tmp = string_value.split('_') + self.component_type = tmp[1] + component_data = re.findall(r"\w\d+", tmp[0]) + self.num_components = len(component_data) + sizes = [] + swizzle = [] + for data in component_data: + swizzle.append(data[0]) + sizes.append(int(data[1:])) + self.sizes = sizes + self.swizzle = swizzle + + def build_component_type_array(self): + result = "{ " + b = False + for i in range(0, self.num_components): + if b: + result += ", " + b = True + result += "ComponentType::" + self.component_type + result += " }" + return result + + def build_component_sizes_array(self): + result = "{ " + b = False + for i in range(0, self.num_components): + if b: + result += ", " + b = True + result += str(self.sizes[i]) + result += " }" + return result + + def build_component_swizzle_array(self): + result = "{ " + b = False + for i in range(0, self.num_components): + if b: + result += ", " + b = True + swizzle = self.swizzle[i] + if swizzle == "X": + swizzle = "None" + result += "Swizzle::" + swizzle + result += " }" + return result + + def print_declaration(self): + print("struct " + self.name + "Traits {") + print(" static constexpr size_t num_components = " + str(self.num_components) + ";") + print(" static constexpr std::array component_types = " + self.build_component_type_array() + ";") + print(" static constexpr std::array component_sizes = " + self.build_component_sizes_array() + ";") + print(" static constexpr std::array component_swizzle = " + self.build_component_swizzle_array() + ";") + print("};\n") + + def print_case(self): + print("case RenderTargetFormat::" + self.name + ":") + print(" return impl->converters_cache") + print(" .emplace(format, std::make_unique>())") + print(" .first->second.get();") + print(" break;") + +txt = """ +R32G32B32A32_FLOAT +R32G32B32A32_SINT +R32G32B32A32_UINT +R32G32B32X32_FLOAT +R32G32B32X32_SINT +R32G32B32X32_UINT +R16G16B16A16_UNORM +R16G16B16A16_SNORM +R16G16B16A16_SINT +R16G16B16A16_UINT +R16G16B16A16_FLOAT +R32G32_FLOAT +R32G32_SINT +R32G32_UINT +R16G16B16X16_FLOAT +A8R8G8B8_UNORM +A8R8G8B8_SRGB +A2B10G10R10_UNORM +A2B10G10R10_UINT +A2R10G10B10_UNORM +A8B8G8R8_UNORM +A8B8G8R8_SRGB +A8B8G8R8_SNORM +A8B8G8R8_SINT +A8B8G8R8_UINT +R16G16_UNORM +R16G16_SNORM +R16G16_SINT +R16G16_UINT +R16G16_FLOAT +B10G11R11_FLOAT +R32_SINT +R32_UINT +R32_FLOAT +X8R8G8B8_UNORM +X8R8G8B8_SRGB +R5G6B5_UNORM +A1R5G5B5_UNORM +R8G8_UNORM +R8G8_SNORM +R8G8_SINT +R8G8_UINT +R16_UNORM +R16_SNORM +R16_SINT +R16_UINT +R16_FLOAT +R8_UNORM +R8_SNORM +R8_SINT +R8_UINT +X1R5G5B5_UNORM +X8B8G8R8_UNORM +X8B8G8R8_SRGB +""" + +x = txt.split() +y = list(map(lambda a: Format(a), x)) +formats = list(y) +for format in formats: + format.print_declaration() + +for format in formats: + format.print_case()