From 95d156a1502ca46b8301e1527ad14db9790495e7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:29:04 -0300 Subject: [PATCH 01/13] video_core/host_shaders: Add support for prebuilt SPIR-V shaders Add support for building SPIR-V shaders from GLSL and generating headers to include the text of those same GLSL shaders to consume from OpenGL. --- src/video_core/host_shaders/CMakeLists.txt | 53 +++++++++++++++------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index c157724a9..ff20bc93b 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,8 +1,12 @@ -set(SHADER_SOURCES +set(SHADER_FILES opengl_present.frag opengl_present.vert ) +find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) + +set(GLSL_FLAGS "") + set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders) set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) @@ -10,27 +14,44 @@ set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in) set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake) -foreach(FILENAME IN ITEMS ${SHADER_SOURCES}) +foreach(FILENAME IN ITEMS ${SHADER_FILES}) string(REPLACE "." "_" SHADER_NAME ${FILENAME}) set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) - set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) - add_custom_command( - OUTPUT - ${HEADER_FILE} - COMMAND - ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE} - MAIN_DEPENDENCY - ${SOURCE_FILE} - DEPENDS - ${INPUT_FILE} - # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified - ) - set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE}) + # Skip generating source headers on Vulkan exclusive files + if (NOT ${FILENAME} MATCHES "vulkan.*") + set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) + add_custom_command( + OUTPUT + ${SOURCE_HEADER_FILE} + COMMAND + ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} + MAIN_DEPENDENCY + ${SOURCE_FILE} + DEPENDS + ${INPUT_FILE} + # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified + ) + set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) + endif() + # Skip compiling to SPIR-V OpenGL exclusive files + if (NOT ${FILENAME} MATCHES "opengl.*") + string(TOUPPER ${SHADER_NAME}_SPV SPIRV_VARIABLE_NAME) + set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_spv.h) + add_custom_command( + OUTPUT + ${SPIRV_HEADER_FILE} + COMMAND + ${GLSLANGVALIDATOR} -V ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} + MAIN_DEPENDENCY + ${SOURCE_FILE} + ) + set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE}) + endif() endforeach() add_custom_target(host_shaders DEPENDS ${SHADER_HEADERS} SOURCES - ${SHADER_SOURCES} + ${SHADER_FILES} ) From f20e18f60d1411e54d1d85615db7edefda93e265 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:35:56 -0300 Subject: [PATCH 02/13] host_shaders: Add copyright headers to OpenGL present shaders --- src/video_core/host_shaders/opengl_present.frag | 4 ++++ src/video_core/host_shaders/opengl_present.vert | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag index 8a4cb024b..84b818227 100644 --- a/src/video_core/host_shaders/opengl_present.frag +++ b/src/video_core/host_shaders/opengl_present.frag @@ -1,3 +1,7 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + #version 430 core layout (location = 0) in vec2 frag_tex_coord; diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert index 2235d31a4..c3b5adbba 100644 --- a/src/video_core/host_shaders/opengl_present.vert +++ b/src/video_core/host_shaders/opengl_present.vert @@ -1,3 +1,7 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + #version 430 core out gl_PerVertex { From 12d16248dd61d2998ee271e16d6980d6df963bbe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:39:05 -0300 Subject: [PATCH 03/13] host_shaders: Add block linear upload compute shaders --- src/video_core/host_shaders/CMakeLists.txt | 2 + .../block_linear_unswizzle_2d.comp | 122 +++++++++++++++++ .../block_linear_unswizzle_3d.comp | 125 ++++++++++++++++++ 3 files changed, 249 insertions(+) create mode 100644 src/video_core/host_shaders/block_linear_unswizzle_2d.comp create mode 100644 src/video_core/host_shaders/block_linear_unswizzle_3d.comp diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index ff20bc93b..7feb6df99 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,4 +1,6 @@ set(SHADER_FILES + block_linear_unswizzle_2d.comp + block_linear_unswizzle_3d.comp opengl_present.frag opengl_present.vert ) diff --git a/src/video_core/host_shaders/block_linear_unswizzle_2d.comp b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp new file mode 100644 index 000000000..a131be79e --- /dev/null +++ b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp @@ -0,0 +1,122 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 + +#ifdef VULKAN + +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_8bit_storage : require +#define HAS_EXTENDED_TYPES 1 +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 2 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#extension GL_NV_gpu_shader5 : enable +#ifdef GL_NV_gpu_shader5 +#define HAS_EXTENDED_TYPES 1 +#else +#define HAS_EXTENDED_TYPES 0 +#endif +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 0 + +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) uvec3 origin; +UNIFORM(1) ivec3 destination; +UNIFORM(2) uint bytes_per_block_log2; +UNIFORM(3) uint layer_stride; +UNIFORM(4) uint block_size; +UNIFORM(5) uint x_shift; +UNIFORM(6) uint block_height; +UNIFORM(7) uint block_height_mask; +END_PUSH_CONSTANTS + +layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { + uint swizzle_table[]; +}; + +#if HAS_EXTENDED_TYPES +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; +#endif +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; + +layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage2DArray output_image; + +layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; + +const uint GOB_SIZE_X = 64; +const uint GOB_SIZE_Y = 8; +const uint GOB_SIZE_Z = 1; +const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; + +const uint GOB_SIZE_X_SHIFT = 6; +const uint GOB_SIZE_Y_SHIFT = 3; +const uint GOB_SIZE_Z_SHIFT = 0; +const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; + +const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); + +uint SwizzleOffset(uvec2 pos) { + pos = pos & SWIZZLE_MASK; + return swizzle_table[pos.y * 64 + pos.x]; +} + +uvec4 ReadTexel(uint offset) { + switch (bytes_per_block_log2) { +#if HAS_EXTENDED_TYPES + case 0: + return uvec4(u8data[offset], 0, 0, 0); + case 1: + return uvec4(u16data[offset / 2], 0, 0, 0); +#else + case 0: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); + case 1: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); +#endif + case 2: + return uvec4(u32data[offset / 4], 0, 0, 0); + case 3: + return uvec4(u64data[offset / 8], 0, 0); + case 4: + return u128data[offset / 16]; + } + return uvec4(0); +} + +void main() { + uvec3 pos = gl_GlobalInvocationID + origin; + pos.x <<= bytes_per_block_log2; + + // Read as soon as possible due to its latency + const uint swizzle = SwizzleOffset(pos.xy); + + const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; + + uint offset = 0; + offset += pos.z * layer_stride; + offset += (block_y >> block_height) * block_size; + offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; + offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; + offset += swizzle; + + const uvec4 texel = ReadTexel(offset); + const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination; + imageStore(output_image, coord, texel); +} diff --git a/src/video_core/host_shaders/block_linear_unswizzle_3d.comp b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp new file mode 100644 index 000000000..bb6872e6b --- /dev/null +++ b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp @@ -0,0 +1,125 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 + +#ifdef VULKAN + +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_8bit_storage : require +#define HAS_EXTENDED_TYPES 1 +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 2 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#extension GL_NV_gpu_shader5 : enable +#ifdef GL_NV_gpu_shader5 +#define HAS_EXTENDED_TYPES 1 +#else +#define HAS_EXTENDED_TYPES 0 +#endif +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 0 + +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) uvec3 origin; +UNIFORM(1) ivec3 destination; +UNIFORM(2) uint bytes_per_block_log2; +UNIFORM(3) uint slice_size; +UNIFORM(4) uint block_size; +UNIFORM(5) uint x_shift; +UNIFORM(6) uint block_height; +UNIFORM(7) uint block_height_mask; +UNIFORM(8) uint block_depth; +UNIFORM(9) uint block_depth_mask; +END_PUSH_CONSTANTS + +layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { + uint swizzle_table[]; +}; + +#if HAS_EXTENDED_TYPES +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; +#endif +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; + +layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage3D output_image; + +layout(local_size_x = 16, local_size_y = 8, local_size_z = 8) in; + +const uint GOB_SIZE_X = 64; +const uint GOB_SIZE_Y = 8; +const uint GOB_SIZE_Z = 1; +const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; + +const uint GOB_SIZE_X_SHIFT = 6; +const uint GOB_SIZE_Y_SHIFT = 3; +const uint GOB_SIZE_Z_SHIFT = 0; +const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; + +const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); + +uint SwizzleOffset(uvec2 pos) { + pos = pos & SWIZZLE_MASK; + return swizzle_table[pos.y * 64 + pos.x]; +} + +uvec4 ReadTexel(uint offset) { + switch (bytes_per_block_log2) { +#if HAS_EXTENDED_TYPES + case 0: + return uvec4(u8data[offset], 0, 0, 0); + case 1: + return uvec4(u16data[offset / 2], 0, 0, 0); +#else + case 0: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); + case 1: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); +#endif + case 2: + return uvec4(u32data[offset / 4], 0, 0, 0); + case 3: + return uvec4(u64data[offset / 8], 0, 0); + case 4: + return u128data[offset / 16]; + } + return uvec4(0); +} + +void main() { + uvec3 pos = gl_GlobalInvocationID + origin; + pos.x <<= bytes_per_block_log2; + + // Read as soon as possible due to its latency + const uint swizzle = SwizzleOffset(pos.xy); + + const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; + + uint offset = 0; + offset += (pos.z >> block_depth) * slice_size; + offset += (pos.z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height); + offset += (block_y >> block_height) * block_size; + offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; + offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; + offset += swizzle; + + const uvec4 texel = ReadTexel(offset); + const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination; + imageStore(output_image, coord, texel); +} From 59c46f9de94d3eab3aec3ff2abc74bd3aa8a056c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:41:42 -0300 Subject: [PATCH 04/13] host_shaders: Add pitch linear upload compute shader --- src/video_core/host_shaders/CMakeLists.txt | 1 + .../host_shaders/pitch_unswizzle.comp | 86 +++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 src/video_core/host_shaders/pitch_unswizzle.comp diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 7feb6df99..1983e7dc9 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -3,6 +3,7 @@ set(SHADER_FILES block_linear_unswizzle_3d.comp opengl_present.frag opengl_present.vert + pitch_unswizzle.comp ) find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) diff --git a/src/video_core/host_shaders/pitch_unswizzle.comp b/src/video_core/host_shaders/pitch_unswizzle.comp new file mode 100644 index 000000000..cb48ec170 --- /dev/null +++ b/src/video_core/host_shaders/pitch_unswizzle.comp @@ -0,0 +1,86 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 + +#ifdef VULKAN + +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_8bit_storage : require +#define HAS_EXTENDED_TYPES 1 +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#define BINDING_INPUT_BUFFER 0 +#define BINDING_OUTPUT_IMAGE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#extension GL_NV_gpu_shader5 : enable +#ifdef GL_NV_gpu_shader5 +#define HAS_EXTENDED_TYPES 1 +#else +#define HAS_EXTENDED_TYPES 0 +#endif +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#define BINDING_INPUT_BUFFER 0 +#define BINDING_OUTPUT_IMAGE 0 + +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) uvec2 origin; +UNIFORM(1) ivec2 destination; +UNIFORM(2) uint bytes_per_block; +UNIFORM(3) uint pitch; +END_PUSH_CONSTANTS + +#if HAS_EXTENDED_TYPES +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU8 { uint8_t u8data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU16 { uint16_t u16data[]; }; +#endif +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { uint u32data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU64 { uvec2 u64data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU128 { uvec4 u128data[]; }; + +layout(binding = BINDING_OUTPUT_IMAGE) writeonly uniform uimage2D output_image; + +layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; + +uvec4 ReadTexel(uint offset) { + switch (bytes_per_block) { +#if HAS_EXTENDED_TYPES + case 1: + return uvec4(u8data[offset], 0, 0, 0); + case 2: + return uvec4(u16data[offset / 2], 0, 0, 0); +#else + case 1: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); + case 2: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); +#endif + case 4: + return uvec4(u32data[offset / 4], 0, 0, 0); + case 8: + return uvec4(u64data[offset / 8], 0, 0); + case 16: + return u128data[offset / 16]; + } + return uvec4(0); +} + +void main() { + uvec2 pos = gl_GlobalInvocationID.xy + origin; + + uint offset = 0; + offset += pos.x * bytes_per_block; + offset += pos.y * pitch; + + const uvec4 texel = ReadTexel(offset); + const ivec2 coord = ivec2(gl_GlobalInvocationID.xy) + destination; + imageStore(output_image, coord, texel); +} From 5169ce9fcd6240f5aaa639786e82f6589d917907 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:44:09 -0300 Subject: [PATCH 05/13] host_shaders: Add shader to render a full screen triangle --- src/video_core/host_shaders/CMakeLists.txt | 1 + .../host_shaders/full_screen_triangle.vert | 29 +++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 src/video_core/host_shaders/full_screen_triangle.vert diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 1983e7dc9..5770c4761 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,6 +1,7 @@ set(SHADER_FILES block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp + full_screen_triangle.vert opengl_present.frag opengl_present.vert pitch_unswizzle.comp diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert new file mode 100644 index 000000000..452ad6502 --- /dev/null +++ b/src/video_core/host_shaders/full_screen_triangle.vert @@ -0,0 +1,29 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +#ifdef VULKAN +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) vec2 tex_scale; +UNIFORM(1) vec2 tex_offset; +END_PUSH_CONSTANTS + +layout(location = 0) out vec2 texcoord; + +void main() { + float x = float((gl_VertexIndex & 1) << 2); + float y = float((gl_VertexIndex & 2) << 1); + gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0); + texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset); +} From dc81a906402b0a39c3ff1964cdf935b46b062b5d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:47:08 -0300 Subject: [PATCH 06/13] host_shaders: Add compute shader to copy BC4 as RG32UI to RGBA8 --- src/video_core/host_shaders/CMakeLists.txt | 1 + .../host_shaders/opengl_copy_bc4.comp | 70 +++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 src/video_core/host_shaders/opengl_copy_bc4.comp diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 5770c4761..338bf9eec 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -2,6 +2,7 @@ set(SHADER_FILES block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp full_screen_triangle.vert + opengl_copy_bc4.comp opengl_present.frag opengl_present.vert pitch_unswizzle.comp diff --git a/src/video_core/host_shaders/opengl_copy_bc4.comp b/src/video_core/host_shaders/opengl_copy_bc4.comp new file mode 100644 index 000000000..7b8e20fbe --- /dev/null +++ b/src/video_core/host_shaders/opengl_copy_bc4.comp @@ -0,0 +1,70 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 core +#extension GL_ARB_gpu_shader_int64 : require + +layout (local_size_x = 4, local_size_y = 4) in; + +layout(binding = 0, rg32ui) readonly uniform uimage3D bc4_input; +layout(binding = 1, rgba8ui) writeonly uniform uimage3D bc4_output; + +layout(location = 0) uniform uvec3 src_offset; +layout(location = 1) uniform uvec3 dst_offset; + +// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt +uint DecompressBlock(uint64_t bits, uvec2 coord) { + const uint code_offset = 16 + 3 * (4 * coord.y + coord.x); + const uint code = uint(bits >> code_offset) & 7; + const uint red0 = uint(bits >> 0) & 0xff; + const uint red1 = uint(bits >> 8) & 0xff; + if (red0 > red1) { + switch (code) { + case 0: + return red0; + case 1: + return red1; + case 2: + return (6 * red0 + 1 * red1) / 7; + case 3: + return (5 * red0 + 2 * red1) / 7; + case 4: + return (4 * red0 + 3 * red1) / 7; + case 5: + return (3 * red0 + 4 * red1) / 7; + case 6: + return (2 * red0 + 5 * red1) / 7; + case 7: + return (1 * red0 + 6 * red1) / 7; + } + } else { + switch (code) { + case 0: + return red0; + case 1: + return red1; + case 2: + return (4 * red0 + 1 * red1) / 5; + case 3: + return (3 * red0 + 2 * red1) / 5; + case 4: + return (2 * red0 + 3 * red1) / 5; + case 5: + return (1 * red0 + 4 * red1) / 5; + case 6: + return 0; + case 7: + return 0xff; + } + } + return 0; +} + +void main() { + uvec2 packed_bits = imageLoad(bc4_input, ivec3(gl_WorkGroupID + src_offset)).rg; + uint64_t bits = packUint2x32(packed_bits); + uint red = DecompressBlock(bits, gl_LocalInvocationID.xy); + uvec4 color = uvec4(red & 0xff, 0, 0, 0xff); + imageStore(bc4_output, ivec3(gl_GlobalInvocationID + dst_offset), color); +} From 82b7daed9cc119d1cb373e73ba3240a2e085f3eb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:48:44 -0300 Subject: [PATCH 07/13] host_shaders: Add shaders to convert between depth and color images --- src/video_core/host_shaders/CMakeLists.txt | 2 ++ .../host_shaders/convert_depth_to_float.frag | 13 +++++++++++++ .../host_shaders/convert_float_to_depth.frag | 13 +++++++++++++ 3 files changed, 28 insertions(+) create mode 100644 src/video_core/host_shaders/convert_depth_to_float.frag create mode 100644 src/video_core/host_shaders/convert_float_to_depth.frag diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 338bf9eec..faf298f1c 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,6 +1,8 @@ set(SHADER_FILES block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp + convert_depth_to_float.frag + convert_float_to_depth.frag full_screen_triangle.vert opengl_copy_bc4.comp opengl_present.frag diff --git a/src/video_core/host_shaders/convert_depth_to_float.frag b/src/video_core/host_shaders/convert_depth_to_float.frag new file mode 100644 index 000000000..624c58509 --- /dev/null +++ b/src/video_core/host_shaders/convert_depth_to_float.frag @@ -0,0 +1,13 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D depth_texture; +layout(location = 0) out float output_color; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + output_color = texelFetch(depth_texture, coord, 0).r; +} diff --git a/src/video_core/host_shaders/convert_float_to_depth.frag b/src/video_core/host_shaders/convert_float_to_depth.frag new file mode 100644 index 000000000..d86c795f4 --- /dev/null +++ b/src/video_core/host_shaders/convert_float_to_depth.frag @@ -0,0 +1,13 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D color_texture; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + float color = texelFetch(color_texture, coord, 0).r; + gl_FragDepth = color; +} From 64fbf319f1d4103531c0d8aecaa330346a9ecc81 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:59:12 -0300 Subject: [PATCH 08/13] host_shaders: Add shaders to present to the swapchain --- src/video_core/host_shaders/CMakeLists.txt | 2 ++ .../host_shaders/vulkan_present.frag | 15 +++++++++++++++ .../host_shaders/vulkan_present.vert | 19 +++++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 src/video_core/host_shaders/vulkan_present.frag create mode 100644 src/video_core/host_shaders/vulkan_present.vert diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index faf298f1c..6084984f9 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -8,6 +8,8 @@ set(SHADER_FILES opengl_present.frag opengl_present.vert pitch_unswizzle.comp + vulkan_present.frag + vulkan_present.vert ) find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) diff --git a/src/video_core/host_shaders/vulkan_present.frag b/src/video_core/host_shaders/vulkan_present.frag new file mode 100644 index 000000000..0979ff3e6 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present.frag @@ -0,0 +1,15 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +layout (location = 0) in vec2 frag_tex_coord; + +layout (location = 0) out vec4 color; + +layout (binding = 1) uniform sampler2D color_texture; + +void main() { + color = texture(color_texture, frag_tex_coord); +} diff --git a/src/video_core/host_shaders/vulkan_present.vert b/src/video_core/host_shaders/vulkan_present.vert new file mode 100644 index 000000000..00b868958 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present.vert @@ -0,0 +1,19 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +layout (location = 0) in vec2 vert_position; +layout (location = 1) in vec2 vert_tex_coord; + +layout (location = 0) out vec2 frag_tex_coord; + +layout (set = 0, binding = 0) uniform MatrixBlock { + mat4 modelview_matrix; +}; + +void main() { + gl_Position = modelview_matrix * vec4(vert_position, 0.0, 1.0); + frag_tex_coord = vert_tex_coord; +} From ae5725b70901431646342eb2d51801478d86fdca Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 02:00:48 -0300 Subject: [PATCH 09/13] host_shaders: Add texture color blit fragment shader --- src/video_core/host_shaders/CMakeLists.txt | 1 + .../host_shaders/vulkan_blit_color_float.frag | 14 ++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 src/video_core/host_shaders/vulkan_blit_color_float.frag diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 6084984f9..5b24f9866 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -8,6 +8,7 @@ set(SHADER_FILES opengl_present.frag opengl_present.vert pitch_unswizzle.comp + vulkan_blit_color_float.frag vulkan_present.frag vulkan_present.vert ) diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/vulkan_blit_color_float.frag new file mode 100644 index 000000000..4a6aae410 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_blit_color_float.frag @@ -0,0 +1,14 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D tex; + +layout(location = 0) in vec2 texcoord; +layout(location = 0) out vec4 color; + +void main() { + color = textureLod(tex, texcoord, 0); +} From 87ff58b1d713df6f4c292e56d1af20c0bfb9598f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 02:02:07 -0300 Subject: [PATCH 10/13] host_shaders: Add helper to blit depth stencil fragment shader --- src/video_core/host_shaders/CMakeLists.txt | 1 + .../host_shaders/vulkan_blit_depth_stencil.frag | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 src/video_core/host_shaders/vulkan_blit_depth_stencil.frag diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 5b24f9866..7059c2d2a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -9,6 +9,7 @@ set(SHADER_FILES opengl_present.vert pitch_unswizzle.comp vulkan_blit_color_float.frag + vulkan_blit_depth_stencil.frag vulkan_present.frag vulkan_present.vert ) diff --git a/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag new file mode 100644 index 000000000..19bb23a5a --- /dev/null +++ b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag @@ -0,0 +1,16 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 +#extension GL_ARB_shader_stencil_export : require + +layout(binding = 0) uniform sampler2D depth_tex; +layout(binding = 1) uniform isampler2D stencil_tex; + +layout(location = 0) in vec2 texcoord; + +void main() { + gl_FragDepth = textureLod(depth_tex, texcoord, 0).r; + gl_FragStencilRefARB = textureLod(stencil_tex, texcoord, 0).r; +} From 21b18057f7035e1442be20667662efba911653df Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 02:03:50 -0300 Subject: [PATCH 11/13] host_shaders: Add Vulkan assembler compute shaders --- src/video_core/host_shaders/CMakeLists.txt | 3 ++ .../host_shaders/vulkan_quad_array.comp | 28 +++++++++++++ .../host_shaders/vulkan_quad_indexed.comp | 41 +++++++++++++++++++ src/video_core/host_shaders/vulkan_uint8.comp | 24 +++++++++++ 4 files changed, 96 insertions(+) create mode 100644 src/video_core/host_shaders/vulkan_quad_array.comp create mode 100644 src/video_core/host_shaders/vulkan_quad_indexed.comp create mode 100644 src/video_core/host_shaders/vulkan_uint8.comp diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 7059c2d2a..4c7399d5a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -12,6 +12,9 @@ set(SHADER_FILES vulkan_blit_depth_stencil.frag vulkan_present.frag vulkan_present.vert + vulkan_quad_array.comp + vulkan_quad_indexed.comp + vulkan_uint8.comp ) find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) diff --git a/src/video_core/host_shaders/vulkan_quad_array.comp b/src/video_core/host_shaders/vulkan_quad_array.comp new file mode 100644 index 000000000..212f4e998 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_quad_array.comp @@ -0,0 +1,28 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +layout (local_size_x = 1024) in; + +layout (std430, set = 0, binding = 0) buffer OutputBuffer { + uint output_indexes[]; +}; + +layout (push_constant) uniform PushConstants { + uint first; +}; + +void main() { + uint primitive = gl_GlobalInvocationID.x; + if (primitive * 6 >= output_indexes.length()) { + return; + } + + const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3); + for (uint vertex = 0; vertex < 6; ++vertex) { + uint index = first + primitive * 4 + quad_map[vertex]; + output_indexes[primitive * 6 + vertex] = index; + } +} diff --git a/src/video_core/host_shaders/vulkan_quad_indexed.comp b/src/video_core/host_shaders/vulkan_quad_indexed.comp new file mode 100644 index 000000000..8655591d0 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_quad_indexed.comp @@ -0,0 +1,41 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +layout (local_size_x = 1024) in; + +layout (std430, set = 0, binding = 0) readonly buffer InputBuffer { + uint input_indexes[]; +}; + +layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { + uint output_indexes[]; +}; + +layout (push_constant) uniform PushConstants { + uint base_vertex; + int index_shift; // 0: uint8, 1: uint16, 2: uint32 +}; + +void main() { + int primitive = int(gl_GlobalInvocationID.x); + if (primitive * 6 >= output_indexes.length()) { + return; + } + + int index_size = 8 << index_shift; + int flipped_shift = 2 - index_shift; + int mask = (1 << flipped_shift) - 1; + + const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3); + for (uint vertex = 0; vertex < 6; ++vertex) { + int offset = primitive * 4 + quad_swizzle[vertex]; + int int_offset = offset >> flipped_shift; + int bit_offset = (offset & mask) * index_size; + uint packed_input = input_indexes[int_offset]; + uint index = bitfieldExtract(packed_input, bit_offset, index_size); + output_indexes[primitive * 6 + vertex] = index + base_vertex; + } +} diff --git a/src/video_core/host_shaders/vulkan_uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp new file mode 100644 index 000000000..ad74d7af9 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_uint8.comp @@ -0,0 +1,24 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_8bit_storage : require + +layout (local_size_x = 1024) in; + +layout (std430, set = 0, binding = 0) readonly buffer InputBuffer { + uint8_t input_indexes[]; +}; + +layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { + uint16_t output_indexes[]; +}; + +void main() { + uint id = gl_GlobalInvocationID.x; + if (id < input_indexes.length()) { + output_indexes[id] = uint16_t(input_indexes[id]); + } +} From 9106ac1e6b912d7098845c346e5465b780bd70dd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 02:06:22 -0300 Subject: [PATCH 12/13] video_core: Add a delayed destruction ring abstraction --- src/video_core/CMakeLists.txt | 1 + src/video_core/delayed_destruction_ring.h | 32 +++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 src/video_core/delayed_destruction_ring.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 5b73724ce..acf96f789 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -25,6 +25,7 @@ add_library(video_core STATIC command_classes/vic.h compatible_formats.cpp compatible_formats.h + delayed_destruction_ring.h dirty_flags.cpp dirty_flags.h dma_pusher.cpp diff --git a/src/video_core/delayed_destruction_ring.h b/src/video_core/delayed_destruction_ring.h new file mode 100644 index 000000000..4f1d29c04 --- /dev/null +++ b/src/video_core/delayed_destruction_ring.h @@ -0,0 +1,32 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include + +namespace VideoCommon { + +/// Container to push objects to be destroyed a few ticks in the future +template +class DelayedDestructionRing { +public: + void Tick() { + index = (index + 1) % TICKS_TO_DESTROY; + elements[index].clear(); + } + + void Push(T&& object) { + elements[index].push_back(std::move(object)); + } + +private: + size_t index = 0; + std::array, TICKS_TO_DESTROY> elements; +}; + +} // namespace VideoCommon From 9764c13d6d2977903f407761b27d847c0056e1c4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 02:25:23 -0300 Subject: [PATCH 13/13] video_core: Rewrite the texture cache The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage.The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage. This commit aims to address those issues. --- src/video_core/CMakeLists.txt | 47 +- src/video_core/buffer_cache/buffer_cache.h | 19 +- src/video_core/command_classes/vic.cpp | 8 +- src/video_core/compatible_formats.cpp | 142 +- src/video_core/compatible_formats.h | 23 +- src/video_core/dirty_flags.cpp | 7 + src/video_core/dirty_flags.h | 3 + src/video_core/engines/fermi_2d.cpp | 89 +- src/video_core/engines/fermi_2d.h | 343 ++- src/video_core/engines/kepler_compute.cpp | 26 +- src/video_core/engines/kepler_compute.h | 5 - src/video_core/engines/maxwell_3d.cpp | 45 +- src/video_core/engines/maxwell_3d.h | 127 +- src/video_core/engines/maxwell_dma.cpp | 3 + src/video_core/fence_manager.h | 17 +- src/video_core/memory_manager.cpp | 5 +- src/video_core/morton.cpp | 250 -- src/video_core/morton.h | 18 - src/video_core/rasterizer_interface.h | 12 +- .../renderer_opengl/gl_buffer_cache.cpp | 7 +- .../renderer_opengl/gl_buffer_cache.h | 8 +- src/video_core/renderer_opengl/gl_device.cpp | 64 +- src/video_core/renderer_opengl/gl_device.h | 13 +- .../renderer_opengl/gl_fence_manager.cpp | 2 +- .../renderer_opengl/gl_fence_manager.h | 4 +- .../renderer_opengl/gl_framebuffer_cache.cpp | 85 - .../renderer_opengl/gl_framebuffer_cache.h | 68 - .../renderer_opengl/gl_rasterizer.cpp | 508 ++-- .../renderer_opengl/gl_rasterizer.h | 63 +- .../renderer_opengl/gl_resource_manager.cpp | 2 +- .../renderer_opengl/gl_sampler_cache.cpp | 52 - .../renderer_opengl/gl_sampler_cache.h | 25 - .../renderer_opengl/gl_shader_cache.cpp | 1 - .../renderer_opengl/gl_shader_decompiler.cpp | 8 +- .../renderer_opengl/gl_shader_decompiler.h | 4 +- .../renderer_opengl/gl_shader_manager.cpp | 15 + .../renderer_opengl/gl_shader_manager.h | 6 + .../renderer_opengl/gl_state_tracker.cpp | 7 + .../renderer_opengl/gl_state_tracker.h | 15 +- .../renderer_opengl/gl_stream_buffer.cpp | 32 +- .../renderer_opengl/gl_stream_buffer.h | 19 +- .../renderer_opengl/gl_texture_cache.cpp | 1592 ++++++---- .../renderer_opengl/gl_texture_cache.h | 308 +- .../renderer_opengl/maxwell_to_gl.h | 13 + .../renderer_opengl/renderer_opengl.cpp | 47 +- .../renderer_opengl/renderer_opengl.h | 1 + .../renderer_opengl/util_shaders.cpp | 224 ++ src/video_core/renderer_opengl/util_shaders.h | 51 + src/video_core/renderer_opengl/utils.cpp | 42 - src/video_core/renderer_opengl/utils.h | 16 - src/video_core/renderer_vulkan/blit_image.cpp | 624 ++++ src/video_core/renderer_vulkan/blit_image.h | 97 + .../renderer_vulkan/fixed_pipeline_state.cpp | 1 + .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/maxwell_to_vk.cpp | 30 +- .../renderer_vulkan/maxwell_to_vk.h | 2 + .../renderer_vulkan/renderer_vulkan.cpp | 13 +- .../renderer_vulkan/renderer_vulkan.h | 3 +- .../renderer_vulkan/shaders/blit.frag | 24 - .../renderer_vulkan/shaders/blit.vert | 28 - .../renderer_vulkan/shaders/quad_array.comp | 37 - .../renderer_vulkan/shaders/quad_indexed.comp | 50 - .../renderer_vulkan/shaders/uint8.comp | 33 - .../renderer_vulkan/vk_blit_screen.cpp | 301 +- .../renderer_vulkan/vk_blit_screen.h | 4 +- .../renderer_vulkan/vk_buffer_cache.cpp | 67 +- .../renderer_vulkan/vk_buffer_cache.h | 10 +- .../renderer_vulkan/vk_compute_pass.cpp | 327 +-- .../renderer_vulkan/vk_compute_pass.h | 4 +- src/video_core/renderer_vulkan/vk_device.cpp | 110 +- src/video_core/renderer_vulkan/vk_device.h | 34 + .../renderer_vulkan/vk_fence_manager.cpp | 7 +- .../renderer_vulkan/vk_fence_manager.h | 6 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 64 +- .../renderer_vulkan/vk_graphics_pipeline.h | 26 +- src/video_core/renderer_vulkan/vk_image.cpp | 135 - src/video_core/renderer_vulkan/vk_image.h | 84 - .../renderer_vulkan/vk_memory_manager.cpp | 2 +- .../renderer_vulkan/vk_memory_manager.h | 20 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 28 +- .../renderer_vulkan/vk_pipeline_cache.h | 16 +- .../renderer_vulkan/vk_rasterizer.cpp | 710 ++--- .../renderer_vulkan/vk_rasterizer.h | 131 +- .../renderer_vulkan/vk_renderpass_cache.cpp | 158 - .../renderer_vulkan/vk_renderpass_cache.h | 70 - .../renderer_vulkan/vk_sampler_cache.cpp | 83 - .../renderer_vulkan/vk_sampler_cache.h | 29 - .../renderer_vulkan/vk_scheduler.cpp | 79 +- src/video_core/renderer_vulkan/vk_scheduler.h | 14 +- .../renderer_vulkan/vk_shader_decompiler.cpp | 6 +- .../renderer_vulkan/vk_shader_decompiler.h | 8 +- .../renderer_vulkan/vk_shader_util.cpp | 11 +- .../renderer_vulkan/vk_shader_util.h | 4 +- .../renderer_vulkan/vk_state_tracker.cpp | 23 +- .../renderer_vulkan/vk_state_tracker.h | 8 + .../renderer_vulkan/vk_stream_buffer.cpp | 20 +- .../renderer_vulkan/vk_stream_buffer.h | 12 +- .../renderer_vulkan/vk_texture_cache.cpp | 1527 ++++++---- .../renderer_vulkan/vk_texture_cache.h | 444 +-- .../renderer_vulkan/vk_update_descriptor.h | 30 +- src/video_core/renderer_vulkan/wrapper.cpp | 69 +- src/video_core/renderer_vulkan/wrapper.h | 117 +- src/video_core/sampler_cache.cpp | 21 - src/video_core/sampler_cache.h | 60 - src/video_core/shader/async_shaders.cpp | 9 +- src/video_core/shader/async_shaders.h | 6 +- src/video_core/shader/decode.cpp | 6 +- src/video_core/shader/decode/image.cpp | 11 +- src/video_core/shader/decode/texture.cpp | 56 +- src/video_core/shader/node.h | 33 +- src/video_core/shader/shader_ir.h | 18 +- src/video_core/surface.cpp | 2 +- src/video_core/surface.h | 152 +- .../texture_cache/accelerated_swizzle.cpp | 70 + .../texture_cache/accelerated_swizzle.h | 45 + src/video_core/texture_cache/copy_params.h | 36 - src/video_core/texture_cache/decode_bc4.cpp | 97 + src/video_core/texture_cache/decode_bc4.h | 16 + .../texture_cache/descriptor_table.h | 82 + .../texture_cache/format_lookup_table.cpp | 378 +-- .../texture_cache/format_lookup_table.h | 42 +- src/video_core/texture_cache/formatter.cpp | 95 + src/video_core/texture_cache/formatter.h | 263 ++ src/video_core/texture_cache/image_base.cpp | 216 ++ src/video_core/texture_cache/image_base.h | 83 + src/video_core/texture_cache/image_info.cpp | 189 ++ src/video_core/texture_cache/image_info.h | 38 + .../texture_cache/image_view_base.cpp | 41 + .../texture_cache/image_view_base.h | 47 + .../texture_cache/image_view_info.cpp | 88 + .../texture_cache/image_view_info.h | 50 + src/video_core/texture_cache/render_targets.h | 51 + src/video_core/texture_cache/samples_helper.h | 55 + src/video_core/texture_cache/slot_vector.h | 156 + src/video_core/texture_cache/surface_base.cpp | 299 -- src/video_core/texture_cache/surface_base.h | 333 --- .../texture_cache/surface_params.cpp | 445 --- src/video_core/texture_cache/surface_params.h | 294 -- src/video_core/texture_cache/surface_view.cpp | 27 - src/video_core/texture_cache/surface_view.h | 68 - src/video_core/texture_cache/texture_cache.h | 2551 +++++++++-------- src/video_core/texture_cache/types.h | 140 + src/video_core/texture_cache/util.cpp | 1232 ++++++++ src/video_core/texture_cache/util.h | 107 + src/video_core/textures/astc.cpp | 58 +- src/video_core/textures/astc.h | 5 +- src/video_core/textures/convert.cpp | 93 - src/video_core/textures/convert.h | 22 - src/video_core/textures/decoders.cpp | 251 +- src/video_core/textures/decoders.h | 38 +- src/video_core/textures/texture.cpp | 16 +- src/video_core/textures/texture.h | 237 +- 152 files changed, 10609 insertions(+), 8351 deletions(-) delete mode 100644 src/video_core/renderer_opengl/gl_framebuffer_cache.cpp delete mode 100644 src/video_core/renderer_opengl/gl_framebuffer_cache.h delete mode 100644 src/video_core/renderer_opengl/gl_sampler_cache.cpp delete mode 100644 src/video_core/renderer_opengl/gl_sampler_cache.h create mode 100644 src/video_core/renderer_opengl/util_shaders.cpp create mode 100644 src/video_core/renderer_opengl/util_shaders.h delete mode 100644 src/video_core/renderer_opengl/utils.cpp delete mode 100644 src/video_core/renderer_opengl/utils.h create mode 100644 src/video_core/renderer_vulkan/blit_image.cpp create mode 100644 src/video_core/renderer_vulkan/blit_image.h delete mode 100644 src/video_core/renderer_vulkan/shaders/blit.frag delete mode 100644 src/video_core/renderer_vulkan/shaders/blit.vert delete mode 100644 src/video_core/renderer_vulkan/shaders/quad_array.comp delete mode 100644 src/video_core/renderer_vulkan/shaders/quad_indexed.comp delete mode 100644 src/video_core/renderer_vulkan/shaders/uint8.comp delete mode 100644 src/video_core/renderer_vulkan/vk_image.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_image.h delete mode 100644 src/video_core/renderer_vulkan/vk_renderpass_cache.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_renderpass_cache.h delete mode 100644 src/video_core/renderer_vulkan/vk_sampler_cache.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_sampler_cache.h delete mode 100644 src/video_core/sampler_cache.cpp delete mode 100644 src/video_core/sampler_cache.h create mode 100644 src/video_core/texture_cache/accelerated_swizzle.cpp create mode 100644 src/video_core/texture_cache/accelerated_swizzle.h delete mode 100644 src/video_core/texture_cache/copy_params.h create mode 100644 src/video_core/texture_cache/decode_bc4.cpp create mode 100644 src/video_core/texture_cache/decode_bc4.h create mode 100644 src/video_core/texture_cache/descriptor_table.h create mode 100644 src/video_core/texture_cache/formatter.cpp create mode 100644 src/video_core/texture_cache/formatter.h create mode 100644 src/video_core/texture_cache/image_base.cpp create mode 100644 src/video_core/texture_cache/image_base.h create mode 100644 src/video_core/texture_cache/image_info.cpp create mode 100644 src/video_core/texture_cache/image_info.h create mode 100644 src/video_core/texture_cache/image_view_base.cpp create mode 100644 src/video_core/texture_cache/image_view_base.h create mode 100644 src/video_core/texture_cache/image_view_info.cpp create mode 100644 src/video_core/texture_cache/image_view_info.h create mode 100644 src/video_core/texture_cache/render_targets.h create mode 100644 src/video_core/texture_cache/samples_helper.h create mode 100644 src/video_core/texture_cache/slot_vector.h delete mode 100644 src/video_core/texture_cache/surface_base.cpp delete mode 100644 src/video_core/texture_cache/surface_base.h delete mode 100644 src/video_core/texture_cache/surface_params.cpp delete mode 100644 src/video_core/texture_cache/surface_params.h delete mode 100644 src/video_core/texture_cache/surface_view.cpp delete mode 100644 src/video_core/texture_cache/surface_view.h create mode 100644 src/video_core/texture_cache/types.h create mode 100644 src/video_core/texture_cache/util.cpp create mode 100644 src/video_core/texture_cache/util.h delete mode 100644 src/video_core/textures/convert.cpp delete mode 100644 src/video_core/textures/convert.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index acf96f789..948e167c3 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -85,14 +85,10 @@ add_library(video_core STATIC renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h - renderer_opengl/gl_framebuffer_cache.cpp - renderer_opengl/gl_framebuffer_cache.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_resource_manager.cpp renderer_opengl/gl_resource_manager.h - renderer_opengl/gl_sampler_cache.cpp - renderer_opengl/gl_sampler_cache.h renderer_opengl/gl_shader_cache.cpp renderer_opengl/gl_shader_cache.h renderer_opengl/gl_shader_decompiler.cpp @@ -114,8 +110,10 @@ add_library(video_core STATIC renderer_opengl/maxwell_to_gl.h renderer_opengl/renderer_opengl.cpp renderer_opengl/renderer_opengl.h - renderer_opengl/utils.cpp - renderer_opengl/utils.h + renderer_opengl/util_shaders.cpp + renderer_opengl/util_shaders.h + renderer_vulkan/blit_image.cpp + renderer_vulkan/blit_image.h renderer_vulkan/fixed_pipeline_state.cpp renderer_vulkan/fixed_pipeline_state.h renderer_vulkan/maxwell_to_vk.cpp @@ -142,8 +140,6 @@ add_library(video_core STATIC renderer_vulkan/vk_fence_manager.h renderer_vulkan/vk_graphics_pipeline.cpp renderer_vulkan/vk_graphics_pipeline.h - renderer_vulkan/vk_image.cpp - renderer_vulkan/vk_image.h renderer_vulkan/vk_master_semaphore.cpp renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_memory_manager.cpp @@ -154,12 +150,8 @@ add_library(video_core STATIC renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h - renderer_vulkan/vk_renderpass_cache.cpp - renderer_vulkan/vk_renderpass_cache.h renderer_vulkan/vk_resource_pool.cpp renderer_vulkan/vk_resource_pool.h - renderer_vulkan/vk_sampler_cache.cpp - renderer_vulkan/vk_sampler_cache.h renderer_vulkan/vk_scheduler.cpp renderer_vulkan/vk_scheduler.h renderer_vulkan/vk_shader_decompiler.cpp @@ -180,8 +172,6 @@ add_library(video_core STATIC renderer_vulkan/vk_update_descriptor.h renderer_vulkan/wrapper.cpp renderer_vulkan/wrapper.h - sampler_cache.cpp - sampler_cache.h shader_cache.h shader_notify.cpp shader_notify.h @@ -238,19 +228,32 @@ add_library(video_core STATIC shader/transform_feedback.h surface.cpp surface.h + texture_cache/accelerated_swizzle.cpp + texture_cache/accelerated_swizzle.h + texture_cache/decode_bc4.cpp + texture_cache/decode_bc4.h + texture_cache/descriptor_table.h + texture_cache/formatter.cpp + texture_cache/formatter.h texture_cache/format_lookup_table.cpp texture_cache/format_lookup_table.h - texture_cache/surface_base.cpp - texture_cache/surface_base.h - texture_cache/surface_params.cpp - texture_cache/surface_params.h - texture_cache/surface_view.cpp - texture_cache/surface_view.h + texture_cache/image_base.cpp + texture_cache/image_base.h + texture_cache/image_info.cpp + texture_cache/image_info.h + texture_cache/image_view_base.cpp + texture_cache/image_view_base.h + texture_cache/image_view_info.cpp + texture_cache/image_view_info.h + texture_cache/render_targets.h + texture_cache/samples_helper.h + texture_cache/slot_vector.h texture_cache/texture_cache.h + texture_cache/types.h + texture_cache/util.cpp + texture_cache/util.h textures/astc.cpp textures/astc.h - textures/convert.cpp - textures/convert.h textures/decoders.cpp textures/decoders.h textures/texture.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 38961f3fd..83b9ee871 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -118,20 +118,17 @@ public: /// Prepares the buffer cache for data uploading /// @param max_size Maximum number of bytes that will be uploaded /// @return True when a stream buffer invalidation was required, false otherwise - bool Map(std::size_t max_size) { + void Map(std::size_t max_size) { std::lock_guard lock{mutex}; - bool invalidated; - std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); + std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4); buffer_offset = buffer_offset_base; - - return invalidated; } /// Finishes the upload stream void Unmap() { std::lock_guard lock{mutex}; - stream_buffer->Unmap(buffer_offset - buffer_offset_base); + stream_buffer.Unmap(buffer_offset - buffer_offset_base); } /// Function called at the end of each frame, inteded for deferred operations @@ -261,9 +258,9 @@ public: protected: explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - std::unique_ptr stream_buffer_) + StreamBuffer& stream_buffer_) : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, - stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {} + stream_buffer{stream_buffer_} {} ~BufferCache() = default; @@ -441,7 +438,7 @@ private: buffer_ptr += size; buffer_offset += size; - return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()}; + return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()}; } void AlignBuffer(std::size_t alignment) { @@ -567,9 +564,7 @@ private: VideoCore::RasterizerInterface& rasterizer; Tegra::MemoryManager& gpu_memory; Core::Memory::Memory& cpu_memory; - - std::unique_ptr stream_buffer; - BufferType stream_buffer_handle; + StreamBuffer& stream_buffer; u8* buffer_ptr = nullptr; u64 buffer_offset = 0; diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 66e21ce9c..aa8c9f9de 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -9,7 +9,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" -#include "video_core/texture_cache/surface_params.h" +#include "video_core/textures/decoders.h" extern "C" { #include @@ -105,9 +105,9 @@ void Vic::Execute() { const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, block_height, 0); std::vector swizzled_data(size); - Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4, - swizzled_data.data(), converted_frame_buffer.get(), - false, block_height, 0, 1); + Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4, + frame->width, 4, swizzled_data.data(), + converted_frame_buffer.get(), block_height, 0, 0); gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); gpu.Maxwell3D().OnMemoryWrite(); diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp index b06c32c84..1619d8664 100644 --- a/src/video_core/compatible_formats.cpp +++ b/src/video_core/compatible_formats.cpp @@ -3,9 +3,9 @@ // Refer to the license.txt file included. #include -#include #include +#include "common/common_types.h" #include "video_core/compatible_formats.h" #include "video_core/surface.h" @@ -13,23 +13,25 @@ namespace VideoCore::Surface { namespace { +using Table = std::array, MaxPixelFormat>; + // Compatibility table taken from Table 3.X.2 in: // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt -constexpr std::array VIEW_CLASS_128_BITS = { +constexpr std::array VIEW_CLASS_128_BITS{ PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_SINT, }; -constexpr std::array VIEW_CLASS_96_BITS = { +constexpr std::array VIEW_CLASS_96_BITS{ PixelFormat::R32G32B32_FLOAT, }; // Missing formats: // PixelFormat::RGB32UI, // PixelFormat::RGB32I, -constexpr std::array VIEW_CLASS_64_BITS = { +constexpr std::array VIEW_CLASS_64_BITS{ PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT, PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, @@ -38,7 +40,7 @@ constexpr std::array VIEW_CLASS_64_BITS = { // TODO: How should we handle 48 bits? -constexpr std::array VIEW_CLASS_32_BITS = { +constexpr std::array VIEW_CLASS_32_BITS{ PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT, PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT, PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM, @@ -50,43 +52,105 @@ constexpr std::array VIEW_CLASS_32_BITS = { // TODO: How should we handle 24 bits? -constexpr std::array VIEW_CLASS_16_BITS = { +constexpr std::array VIEW_CLASS_16_BITS{ PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT, PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM, PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT, }; -constexpr std::array VIEW_CLASS_8_BITS = { +constexpr std::array VIEW_CLASS_8_BITS{ PixelFormat::R8_UINT, PixelFormat::R8_UNORM, PixelFormat::R8_SINT, PixelFormat::R8_SNORM, }; -constexpr std::array VIEW_CLASS_RGTC1_RED = { +constexpr std::array VIEW_CLASS_RGTC1_RED{ PixelFormat::BC4_UNORM, PixelFormat::BC4_SNORM, }; -constexpr std::array VIEW_CLASS_RGTC2_RG = { +constexpr std::array VIEW_CLASS_RGTC2_RG{ PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM, }; -constexpr std::array VIEW_CLASS_BPTC_UNORM = { +constexpr std::array VIEW_CLASS_BPTC_UNORM{ PixelFormat::BC7_UNORM, PixelFormat::BC7_SRGB, }; -constexpr std::array VIEW_CLASS_BPTC_FLOAT = { +constexpr std::array VIEW_CLASS_BPTC_FLOAT{ PixelFormat::BC6H_SFLOAT, PixelFormat::BC6H_UFLOAT, }; +constexpr std::array VIEW_CLASS_ASTC_4x4_RGBA{ + PixelFormat::ASTC_2D_4X4_UNORM, + PixelFormat::ASTC_2D_4X4_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_5x4_RGBA{ + PixelFormat::ASTC_2D_5X4_UNORM, + PixelFormat::ASTC_2D_5X4_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_5x5_RGBA{ + PixelFormat::ASTC_2D_5X5_UNORM, + PixelFormat::ASTC_2D_5X5_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_6x5_RGBA{ + PixelFormat::ASTC_2D_6X5_UNORM, + PixelFormat::ASTC_2D_6X5_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_6x6_RGBA{ + PixelFormat::ASTC_2D_6X6_UNORM, + PixelFormat::ASTC_2D_6X6_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_8x5_RGBA{ + PixelFormat::ASTC_2D_8X5_UNORM, + PixelFormat::ASTC_2D_8X5_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{ + PixelFormat::ASTC_2D_8X8_UNORM, + PixelFormat::ASTC_2D_8X8_SRGB, +}; + +// Missing formats: +// PixelFormat::ASTC_2D_10X5_UNORM +// PixelFormat::ASTC_2D_10X5_SRGB + +// Missing formats: +// PixelFormat::ASTC_2D_10X6_UNORM +// PixelFormat::ASTC_2D_10X6_SRGB + +constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{ + PixelFormat::ASTC_2D_10X8_UNORM, + PixelFormat::ASTC_2D_10X8_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{ + PixelFormat::ASTC_2D_10X10_UNORM, + PixelFormat::ASTC_2D_10X10_SRGB, +}; + +// Missing formats +// ASTC_2D_12X10_UNORM, +// ASTC_2D_12X10_SRGB, + +constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{ + PixelFormat::ASTC_2D_12X12_UNORM, + PixelFormat::ASTC_2D_12X12_SRGB, +}; + // Compatibility table taken from Table 4.X.1 in: // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt -constexpr std::array COPY_CLASS_128_BITS = { +constexpr std::array COPY_CLASS_128_BITS{ PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT, PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM, PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM, @@ -97,7 +161,7 @@ constexpr std::array COPY_CLASS_128_BITS = { // PixelFormat::RGBA32I // COMPRESSED_RG_RGTC2 -constexpr std::array COPY_CLASS_64_BITS = { +constexpr std::array COPY_CLASS_64_BITS{ PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT, PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT, @@ -110,32 +174,36 @@ constexpr std::array COPY_CLASS_64_BITS = { // COMPRESSED_RGBA_S3TC_DXT1_EXT // COMPRESSED_SIGNED_RED_RGTC1 -void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) { - compatiblity[format_a][format_b] = true; - compatiblity[format_b][format_a] = true; +constexpr void Enable(Table& table, size_t format_a, size_t format_b) { + table[format_a][format_b / 64] |= u64(1) << (format_b % 64); + table[format_b][format_a / 64] |= u64(1) << (format_a % 64); } -void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) { - Enable(compatibility, static_cast(format_a), static_cast(format_b)); +constexpr void Enable(Table& table, PixelFormat format_a, PixelFormat format_b) { + Enable(table, static_cast(format_a), static_cast(format_b)); } template -void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) { +constexpr void EnableRange(Table& table, const Range& range) { for (auto it_a = range.begin(); it_a != range.end(); ++it_a) { for (auto it_b = it_a; it_b != range.end(); ++it_b) { - Enable(compatibility, *it_a, *it_b); + Enable(table, *it_a, *it_b); } } } -} // Anonymous namespace +constexpr bool IsSupported(const Table& table, PixelFormat format_a, PixelFormat format_b) { + const size_t a = static_cast(format_a); + const size_t b = static_cast(format_b); + return ((table[a][b / 64] >> (b % 64)) & 1) != 0; +} -FormatCompatibility::FormatCompatibility() { +constexpr Table MakeViewTable() { + Table view{}; for (size_t i = 0; i < MaxPixelFormat; ++i) { // Identity is allowed Enable(view, i, i); } - EnableRange(view, VIEW_CLASS_128_BITS); EnableRange(view, VIEW_CLASS_96_BITS); EnableRange(view, VIEW_CLASS_64_BITS); @@ -146,10 +214,36 @@ FormatCompatibility::FormatCompatibility() { EnableRange(view, VIEW_CLASS_RGTC2_RG); EnableRange(view, VIEW_CLASS_BPTC_UNORM); EnableRange(view, VIEW_CLASS_BPTC_FLOAT); + EnableRange(view, VIEW_CLASS_ASTC_4x4_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_5x4_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_5x5_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_6x5_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA); + return view; +} - copy = view; +constexpr Table MakeCopyTable() { + Table copy = MakeViewTable(); EnableRange(copy, COPY_CLASS_128_BITS); EnableRange(copy, COPY_CLASS_64_BITS); + return copy; +} + +} // Anonymous namespace + +bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b) { + static constexpr Table TABLE = MakeViewTable(); + return IsSupported(TABLE, format_a, format_b); +} + +bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) { + static constexpr Table TABLE = MakeCopyTable(); + return IsSupported(TABLE, format_a, format_b); } } // namespace VideoCore::Surface diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h index 51766349b..b5eb03bea 100644 --- a/src/video_core/compatible_formats.h +++ b/src/video_core/compatible_formats.h @@ -4,31 +4,12 @@ #pragma once -#include -#include -#include - #include "video_core/surface.h" namespace VideoCore::Surface { -class FormatCompatibility { -public: - using Table = std::array, MaxPixelFormat>; +bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b); - explicit FormatCompatibility(); - - bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept { - return view[static_cast(format_a)][static_cast(format_b)]; - } - - bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept { - return copy[static_cast(format_a)][static_cast(format_b)]; - } - -private: - Table view; - Table copy; -}; +bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b); } // namespace VideoCore::Surface diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index 2faa6ef0e..b1eaac00c 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp @@ -16,6 +16,9 @@ namespace VideoCommon::Dirty { using Tegra::Engines::Maxwell3D; void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { + FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors); + FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors); + static constexpr std::size_t num_per_rt = NUM(rt[0]); static constexpr std::size_t begin = OFF(rt); static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; @@ -23,6 +26,10 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt); } FillBlock(tables[1], begin, num, RenderTargets); + FillBlock(tables[0], OFF(render_area), NUM(render_area), RenderTargets); + + tables[0][OFF(rt_control)] = RenderTargets; + tables[1][OFF(rt_control)] = RenderTargetControl; static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets}; for (std::size_t i = 0; i < std::size(zeta_flags); ++i) { diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 3f6c1d83a..875527ddd 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h @@ -16,7 +16,10 @@ namespace VideoCommon::Dirty { enum : u8 { NullEntry = 0, + Descriptors, + RenderTargets, + RenderTargetControl, ColorBuffer0, ColorBuffer1, ColorBuffer2, diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 4293d676c..a01d334ad 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -10,7 +10,11 @@ namespace Tegra::Engines { -Fermi2D::Fermi2D() = default; +Fermi2D::Fermi2D() { + // Nvidia's OpenGL driver seems to assume these values + regs.src.depth = 1; + regs.dst.depth = 1; +} Fermi2D::~Fermi2D() = default; @@ -21,78 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Fermi2D register, increase the size of the Regs structure"); - regs.reg_array[method] = method_argument; - switch (method) { - // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, - // so trigger on the second 32-bit write. - case FERMI2D_REG_INDEX(blit_src_y) + 1: { - HandleSurfaceCopy(); - break; - } + if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) { + Blit(); } } void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { - for (std::size_t i = 0; i < amount; i++) { - CallMethod(method, base_start[i], methods_pending - static_cast(i) <= 1); + for (u32 i = 0; i < amount; ++i) { + CallMethod(method, base_start[i], methods_pending - i <= 1); } } -static std::pair DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) { - const u32 line_a = src_2 - src_1; - const u32 line_b = dst_2 - dst_1; - const u32 excess = std::max(0, line_a - src_line + src_1); - return {line_b - (excess * line_b) / line_a, excess}; -} +void Fermi2D::Blit() { + LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", + regs.src.Address(), regs.dst.Address()); -void Fermi2D::HandleSurfaceCopy() { - LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", regs.operation); + UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy"); + UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero"); + UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero"); + UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one"); + UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); - // TODO(Subv): Only raw copies are implemented. - ASSERT(regs.operation == Operation::SrcCopy); - - const u32 src_blit_x1{static_cast(regs.blit_src_x >> 32)}; - const u32 src_blit_y1{static_cast(regs.blit_src_y >> 32)}; - u32 src_blit_x2, src_blit_y2; - if (regs.blit_control.origin == Origin::Corner) { - src_blit_x2 = - static_cast((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32); - src_blit_y2 = - static_cast((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32); - } else { - src_blit_x2 = static_cast((regs.blit_src_x >> 32) + regs.blit_dst_width); - src_blit_y2 = static_cast((regs.blit_src_y >> 32) + regs.blit_dst_height); - } - u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width; - u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height; - const auto [new_dst_w, src_excess_x] = - DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width); - const auto [new_dst_h, src_excess_y] = - DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height); - dst_blit_x2 = new_dst_w + regs.blit_dst_x; - src_blit_x2 = src_blit_x2 - src_excess_x; - dst_blit_y2 = new_dst_h + regs.blit_dst_y; - src_blit_y2 = src_blit_y2 - src_excess_y; - const auto [new_src_w, dst_excess_x] = - DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width); - const auto [new_src_h, dst_excess_y] = - DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height); - src_blit_x2 = new_src_w + src_blit_x1; - dst_blit_x2 = dst_blit_x2 - dst_excess_x; - src_blit_y2 = new_src_h + src_blit_y1; - dst_blit_y2 = dst_blit_y2 - dst_excess_y; - const Common::Rectangle src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; - const Common::Rectangle dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2, - dst_blit_y2}; - const Config copy_config{ + const auto& args = regs.pixels_from_memory; + const Config config{ .operation = regs.operation, - .filter = regs.blit_control.filter, - .src_rect = src_rect, - .dst_rect = dst_rect, + .filter = args.sample_mode.filter, + .dst_x0 = args.dst_x0, + .dst_y0 = args.dst_y0, + .dst_x1 = args.dst_x0 + args.dst_width, + .dst_y1 = args.dst_y0 + args.dst_height, + .src_x0 = static_cast(args.src_x0 >> 32), + .src_y0 = static_cast(args.src_y0 >> 32), + .src_x1 = static_cast((args.du_dx * args.dst_width + args.src_x0) >> 32), + .src_y1 = static_cast((args.dv_dy * args.dst_height + args.src_y0) >> 32), }; - if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { + if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) { UNIMPLEMENTED(); } } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 0909709ec..81522988e 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -53,8 +53,8 @@ public: }; enum class Filter : u32 { - PointSample = 0, // Nearest - Linear = 1, + Point = 0, + Bilinear = 1, }; enum class Operation : u32 { @@ -67,88 +67,235 @@ public: BlendPremult = 6, }; - struct Regs { - static constexpr std::size_t NUM_REGS = 0x258; + enum class MemoryLayout : u32 { + BlockLinear = 0, + Pitch = 1, + }; - struct Surface { - RenderTargetFormat format; - BitField<0, 1, u32> linear; - union { - BitField<0, 4, u32> block_width; - BitField<4, 4, u32> block_height; - BitField<8, 4, u32> block_depth; - }; - u32 depth; - u32 layer; - u32 pitch; - u32 width; - u32 height; - u32 address_high; - u32 address_low; - - GPUVAddr Address() const { - return static_cast((static_cast(address_high) << 32) | - address_low); - } - - u32 BlockWidth() const { - return block_width.Value(); - } - - u32 BlockHeight() const { - return block_height.Value(); - } - - u32 BlockDepth() const { - return block_depth.Value(); - } - }; - static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); + enum class CpuIndexWrap : u32 { + Wrap = 0, + NoWrap = 1, + }; + struct Surface { + RenderTargetFormat format; + MemoryLayout linear; union { + BitField<0, 4, u32> block_width; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_depth; + }; + u32 depth; + u32 layer; + u32 pitch; + u32 width; + u32 height; + u32 addr_upper; + u32 addr_lower; + + [[nodiscard]] constexpr GPUVAddr Address() const noexcept { + return (static_cast(addr_upper) << 32) | static_cast(addr_lower); + } + }; + static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); + + enum class SectorPromotion : u32 { + NoPromotion = 0, + PromoteTo2V = 1, + PromoteTo2H = 2, + PromoteTo4 = 3, + }; + + enum class NumTpcs : u32 { + All = 0, + One = 1, + }; + + enum class RenderEnableMode : u32 { + False = 0, + True = 1, + Conditional = 2, + RenderIfEqual = 3, + RenderIfNotEqual = 4, + }; + + enum class ColorKeyFormat : u32 { + A16R56G6B5 = 0, + A1R5G55B5 = 1, + A8R8G8B8 = 2, + A2R10G10B10 = 3, + Y8 = 4, + Y16 = 5, + Y32 = 6, + }; + + union Beta4 { + BitField<0, 8, u32> b; + BitField<8, 8, u32> g; + BitField<16, 8, u32> r; + BitField<24, 8, u32> a; + }; + + struct Point { + u32 x; + u32 y; + }; + + enum class PatternSelect : u32 { + MonoChrome8x8 = 0, + MonoChrome64x1 = 1, + MonoChrome1x64 = 2, + Color = 3, + }; + + enum class NotifyType : u32 { + WriteOnly = 0, + WriteThenAwaken = 1, + }; + + enum class MonochromePatternColorFormat : u32 { + A8X8R8G6B5 = 0, + A1R5G5B5 = 1, + A8R8G8B8 = 2, + A8Y8 = 3, + A8X8Y16 = 4, + Y32 = 5, + }; + + enum class MonochromePatternFormat : u32 { + CGA6_M1 = 0, + LE_M1 = 1, + }; + + union Regs { + static constexpr std::size_t NUM_REGS = 0x258; + struct { + u32 object; + INSERT_UNION_PADDING_WORDS(0x3F); + u32 no_operation; + NotifyType notify; + INSERT_UNION_PADDING_WORDS(0x2); + u32 wait_for_idle; + INSERT_UNION_PADDING_WORDS(0xB); + u32 pm_trigger; + INSERT_UNION_PADDING_WORDS(0xF); + u32 context_dma_notify; + u32 dst_context_dma; + u32 src_context_dma; + u32 semaphore_context_dma; + INSERT_UNION_PADDING_WORDS(0x1C); + Surface dst; + CpuIndexWrap pixels_from_cpu_index_wrap; + u32 kind2d_check_enable; + Surface src; + SectorPromotion pixels_from_memory_sector_promotion; + INSERT_UNION_PADDING_WORDS(0x1); + NumTpcs num_tpcs; + u32 render_enable_addr_upper; + u32 render_enable_addr_lower; + RenderEnableMode render_enable_mode; + INSERT_UNION_PADDING_WORDS(0x4); + u32 clip_x0; + u32 clip_y0; + u32 clip_width; + u32 clip_height; + BitField<0, 1, u32> clip_enable; + BitField<0, 3, ColorKeyFormat> color_key_format; + u32 color_key; + BitField<0, 1, u32> color_key_enable; + BitField<0, 8, u32> rop; + u32 beta1; + Beta4 beta4; + Operation operation; + union { + BitField<0, 6, u32> x; + BitField<8, 6, u32> y; + } pattern_offset; + BitField<0, 2, PatternSelect> pattern_select; + INSERT_UNION_PADDING_WORDS(0xC); struct { - INSERT_UNION_PADDING_WORDS(0x80); - - Surface dst; - - INSERT_UNION_PADDING_WORDS(2); - - Surface src; - - INSERT_UNION_PADDING_WORDS(0x15); - - Operation operation; - - INSERT_UNION_PADDING_WORDS(0x177); - + BitField<0, 3, MonochromePatternColorFormat> color_format; + BitField<0, 1, MonochromePatternFormat> format; + u32 color0; + u32 color1; + u32 pattern0; + u32 pattern1; + } monochrome_pattern; + struct { + std::array X8R8G8B8; + std::array R5G6B5; + std::array X1R5G5B5; + std::array Y8; + } color_pattern; + INSERT_UNION_PADDING_WORDS(0x10); + struct { + u32 prim_mode; + u32 prim_color_format; + u32 prim_color; + u32 line_tie_break_bits; + INSERT_UNION_PADDING_WORDS(0x14); + u32 prim_point_xy; + INSERT_UNION_PADDING_WORDS(0x7); + std::array prim_point; + } render_solid; + struct { + u32 data_type; + u32 color_format; + u32 index_format; + u32 mono_format; + u32 wrap; + u32 color0; + u32 color1; + u32 mono_opacity; + INSERT_UNION_PADDING_WORDS(0x6); + u32 src_width; + u32 src_height; + u32 dx_du_frac; + u32 dx_du_int; + u32 dx_dv_frac; + u32 dy_dv_int; + u32 dst_x0_frac; + u32 dst_x0_int; + u32 dst_y0_frac; + u32 dst_y0_int; + u32 data; + } pixels_from_cpu; + INSERT_UNION_PADDING_WORDS(0x3); + u32 big_endian_control; + INSERT_UNION_PADDING_WORDS(0x3); + struct { + BitField<0, 3, u32> block_shape; + BitField<0, 5, u32> corral_size; + BitField<0, 1, u32> safe_overlap; union { - u32 raw; BitField<0, 1, Origin> origin; BitField<4, 1, Filter> filter; - } blit_control; - + } sample_mode; INSERT_UNION_PADDING_WORDS(0x8); - - u32 blit_dst_x; - u32 blit_dst_y; - u32 blit_dst_width; - u32 blit_dst_height; - u64 blit_du_dx; - u64 blit_dv_dy; - u64 blit_src_x; - u64 blit_src_y; - - INSERT_UNION_PADDING_WORDS(0x21); - }; - std::array reg_array; + s32 dst_x0; + s32 dst_y0; + s32 dst_width; + s32 dst_height; + s64 du_dx; + s64 dv_dy; + s64 src_x0; + s64 src_y0; + } pixels_from_memory; }; + std::array reg_array; } regs{}; struct Config { - Operation operation{}; - Filter filter{}; - Common::Rectangle src_rect; - Common::Rectangle dst_rect; + Operation operation; + Filter filter; + s32 dst_x0; + s32 dst_y0; + s32 dst_x1; + s32 dst_y1; + s32 src_x0; + s32 src_y0; + s32 src_x1; + s32 src_y1; }; private: @@ -156,25 +303,49 @@ private: /// Performs the copy from the source surface to the destination surface as configured in the /// registers. - void HandleSurfaceCopy(); + void Blit(); }; #define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \ + static_assert(offsetof(Fermi2D::Regs, field_name) == position, \ "Field " #field_name " has invalid position") -ASSERT_REG_POSITION(dst, 0x80); -ASSERT_REG_POSITION(src, 0x8C); -ASSERT_REG_POSITION(operation, 0xAB); -ASSERT_REG_POSITION(blit_control, 0x223); -ASSERT_REG_POSITION(blit_dst_x, 0x22c); -ASSERT_REG_POSITION(blit_dst_y, 0x22d); -ASSERT_REG_POSITION(blit_dst_width, 0x22e); -ASSERT_REG_POSITION(blit_dst_height, 0x22f); -ASSERT_REG_POSITION(blit_du_dx, 0x230); -ASSERT_REG_POSITION(blit_dv_dy, 0x232); -ASSERT_REG_POSITION(blit_src_x, 0x234); -ASSERT_REG_POSITION(blit_src_y, 0x236); +ASSERT_REG_POSITION(object, 0x0); +ASSERT_REG_POSITION(no_operation, 0x100); +ASSERT_REG_POSITION(notify, 0x104); +ASSERT_REG_POSITION(wait_for_idle, 0x110); +ASSERT_REG_POSITION(pm_trigger, 0x140); +ASSERT_REG_POSITION(context_dma_notify, 0x180); +ASSERT_REG_POSITION(dst_context_dma, 0x184); +ASSERT_REG_POSITION(src_context_dma, 0x188); +ASSERT_REG_POSITION(semaphore_context_dma, 0x18C); +ASSERT_REG_POSITION(dst, 0x200); +ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228); +ASSERT_REG_POSITION(kind2d_check_enable, 0x22C); +ASSERT_REG_POSITION(src, 0x230); +ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258); +ASSERT_REG_POSITION(num_tpcs, 0x260); +ASSERT_REG_POSITION(render_enable_addr_upper, 0x264); +ASSERT_REG_POSITION(render_enable_addr_lower, 0x268); +ASSERT_REG_POSITION(clip_x0, 0x280); +ASSERT_REG_POSITION(clip_y0, 0x284); +ASSERT_REG_POSITION(clip_width, 0x288); +ASSERT_REG_POSITION(clip_height, 0x28c); +ASSERT_REG_POSITION(clip_enable, 0x290); +ASSERT_REG_POSITION(color_key_format, 0x294); +ASSERT_REG_POSITION(color_key, 0x298); +ASSERT_REG_POSITION(rop, 0x2A0); +ASSERT_REG_POSITION(beta1, 0x2A4); +ASSERT_REG_POSITION(beta4, 0x2A8); +ASSERT_REG_POSITION(operation, 0x2AC); +ASSERT_REG_POSITION(pattern_offset, 0x2B0); +ASSERT_REG_POSITION(pattern_select, 0x2B4); +ASSERT_REG_POSITION(monochrome_pattern, 0x2E8); +ASSERT_REG_POSITION(color_pattern, 0x300); +ASSERT_REG_POSITION(render_solid, 0x580); +ASSERT_REG_POSITION(pixels_from_cpu, 0x800); +ASSERT_REG_POSITION(big_endian_control, 0x870); +ASSERT_REG_POSITION(pixels_from_memory, 0x880); #undef ASSERT_REG_POSITION diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 898370739..ba387506e 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -58,24 +58,6 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun } } -Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { - const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value(); - ASSERT(cbuf_mask[regs.tex_cb_index]); - - const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index]; - ASSERT(texinfo.Address() != 0); - - const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle); - ASSERT(address < texinfo.Address() + texinfo.size); - - const Texture::TextureHandle tex_handle{memory_manager.Read(address)}; - return GetTextureInfo(tex_handle); -} - -Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const { - return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; -} - u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { ASSERT(stage == ShaderType::Compute); const auto& buffer = launch_description.const_buffer_config[const_buffer]; @@ -98,9 +80,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { const Texture::TextureHandle tex_handle{handle}; - const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); - SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); - result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); + const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); + + SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); + result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); return result; } diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 7f2500aab..51a041202 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -209,11 +209,6 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; - Texture::FullTextureInfo GetTexture(std::size_t offset) const; - - /// Given a texture handle, returns the TSC and TIC entries. - Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 761962ed0..9be651e24 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include #include #include "common/assert.h" @@ -227,6 +226,10 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume OnMemoryWrite(); } return; + case MAXWELL3D_REG_INDEX(fragment_barrier): + return rasterizer->FragmentBarrier(); + case MAXWELL3D_REG_INDEX(tiled_cache_barrier): + return rasterizer->TiledCacheBarrier(); } } @@ -639,7 +642,7 @@ void Maxwell3D::FinishCBData() { } Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { - const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; + const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)}; Texture::TICEntry tic_entry; memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); @@ -648,43 +651,19 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { } Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { - const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; + const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)}; Texture::TSCEntry tsc_entry; memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); return tsc_entry; } -Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const { - return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; -} - -Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const { - const auto stage_index = static_cast(stage); - const auto& shader = state.shader_stages[stage_index]; - const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; - ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); - - const GPUVAddr tex_info_address = - tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); - - ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); - - const Texture::TextureHandle tex_handle{memory_manager.Read(tex_info_address)}; - - return GetTextureInfo(tex_handle); -} - u32 Maxwell3D::GetRegisterValue(u32 method) const { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); return regs.reg_array[method]; } void Maxwell3D::ProcessClearBuffers() { - ASSERT(regs.clear_buffers.R == regs.clear_buffers.G && - regs.clear_buffers.R == regs.clear_buffers.B && - regs.clear_buffers.R == regs.clear_buffers.A); - rasterizer->Clear(); } @@ -692,9 +671,7 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse ASSERT(stage != ShaderType::Compute); const auto& shader_stage = state.shader_stages[static_cast(stage)]; const auto& buffer = shader_stage.const_buffers[const_buffer]; - u32 result; - std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32)); - return result; + return memory_manager.Read(buffer.address + offset); } SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { @@ -712,9 +689,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { const Texture::TextureHandle tex_handle{handle}; - const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); - SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); - result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); + const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); + + SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); + result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); return result; } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 564acbc53..bf9e07c9b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -438,16 +438,6 @@ public: DecrWrapOGL = 0x8508, }; - enum class MemoryLayout : u32 { - Linear = 0, - BlockLinear = 1, - }; - - enum class InvMemoryLayout : u32 { - BlockLinear = 0, - Linear = 1, - }; - enum class CounterReset : u32 { SampleCnt = 0x01, Unk02 = 0x02, @@ -589,21 +579,31 @@ public: NegativeW = 7, }; + enum class SamplerIndex : u32 { + Independently = 0, + ViaHeaderIndex = 1, + }; + + struct TileMode { + union { + BitField<0, 4, u32> block_width; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_depth; + BitField<12, 1, u32> is_pitch_linear; + BitField<16, 1, u32> is_3d; + }; + }; + static_assert(sizeof(TileMode) == 4); + struct RenderTargetConfig { u32 address_high; u32 address_low; u32 width; u32 height; Tegra::RenderTargetFormat format; + TileMode tile_mode; union { - BitField<0, 3, u32> block_width; - BitField<4, 3, u32> block_height; - BitField<8, 3, u32> block_depth; - BitField<12, 1, InvMemoryLayout> type; - BitField<16, 1, u32> is_3d; - } memory_layout; - union { - BitField<0, 16, u32> layers; + BitField<0, 16, u32> depth; BitField<16, 1, u32> volume; }; u32 layer_stride; @@ -832,7 +832,11 @@ public: u32 patch_vertices; - INSERT_UNION_PADDING_WORDS(0xC); + INSERT_UNION_PADDING_WORDS(0x4); + + u32 fragment_barrier; + + INSERT_UNION_PADDING_WORDS(0x7); std::array scissor_test; @@ -842,7 +846,15 @@ public: u32 stencil_back_mask; u32 stencil_back_func_mask; - INSERT_UNION_PADDING_WORDS(0xC); + INSERT_UNION_PADDING_WORDS(0x5); + + u32 invalidate_texture_data_cache; + + INSERT_UNION_PADDING_WORDS(0x1); + + u32 tiled_cache_barrier; + + INSERT_UNION_PADDING_WORDS(0x4); u32 color_mask_common; @@ -866,12 +878,7 @@ public: u32 address_high; u32 address_low; Tegra::DepthFormat format; - union { - BitField<0, 4, u32> block_width; - BitField<4, 4, u32> block_height; - BitField<8, 4, u32> block_depth; - BitField<20, 1, InvMemoryLayout> type; - } memory_layout; + TileMode tile_mode; u32 layer_stride; GPUVAddr Address() const { @@ -880,7 +887,18 @@ public: } } zeta; - INSERT_UNION_PADDING_WORDS(0x41); + struct { + union { + BitField<0, 16, u32> x; + BitField<16, 16, u32> width; + }; + union { + BitField<0, 16, u32> y; + BitField<16, 16, u32> height; + }; + } render_area; + + INSERT_UNION_PADDING_WORDS(0x3F); union { BitField<0, 4, u32> stencil; @@ -921,7 +939,7 @@ public: BitField<25, 3, u32> map_7; }; - u32 GetMap(std::size_t index) const { + u32 Map(std::size_t index) const { const std::array maps{map_0, map_1, map_2, map_3, map_4, map_5, map_6, map_7}; ASSERT(index < maps.size()); @@ -934,11 +952,13 @@ public: u32 zeta_width; u32 zeta_height; union { - BitField<0, 16, u32> zeta_layers; + BitField<0, 16, u32> zeta_depth; BitField<16, 1, u32> zeta_volume; }; - INSERT_UNION_PADDING_WORDS(0x26); + SamplerIndex sampler_index; + + INSERT_UNION_PADDING_WORDS(0x25); u32 depth_test_enable; @@ -964,6 +984,7 @@ public: float b; float a; } blend_color; + INSERT_UNION_PADDING_WORDS(0x4); struct { @@ -1001,7 +1022,12 @@ public: float line_width_smooth; float line_width_aliased; - INSERT_UNION_PADDING_WORDS(0x1F); + INSERT_UNION_PADDING_WORDS(0x1B); + + u32 invalidate_sampler_cache_no_wfi; + u32 invalidate_texture_header_cache_no_wfi; + + INSERT_UNION_PADDING_WORDS(0x2); u32 vb_element_base; u32 vb_base_instance; @@ -1045,13 +1071,13 @@ public: } condition; struct { - u32 tsc_address_high; - u32 tsc_address_low; - u32 tsc_limit; + u32 address_high; + u32 address_low; + u32 limit; - GPUVAddr TSCAddress() const { - return static_cast( - (static_cast(tsc_address_high) << 32) | tsc_address_low); + GPUVAddr Address() const { + return static_cast((static_cast(address_high) << 32) | + address_low); } } tsc; @@ -1062,13 +1088,13 @@ public: u32 line_smooth_enable; struct { - u32 tic_address_high; - u32 tic_address_low; - u32 tic_limit; + u32 address_high; + u32 address_low; + u32 limit; - GPUVAddr TICAddress() const { - return static_cast( - (static_cast(tic_address_high) << 32) | tic_address_low); + GPUVAddr Address() const { + return static_cast((static_cast(address_high) << 32) | + address_low); } } tic; @@ -1397,12 +1423,6 @@ public: void FlushMMEInlineDraw(); - /// Given a texture handle, returns the TSC and TIC entries. - Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; - - /// Returns the texture information for a specific texture in a specific shader stage. - Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const; - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; @@ -1598,10 +1618,13 @@ ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370); ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); ASSERT_REG_POSITION(patch_vertices, 0x373); +ASSERT_REG_POSITION(fragment_barrier, 0x378); ASSERT_REG_POSITION(scissor_test, 0x380); ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); +ASSERT_REG_POSITION(invalidate_texture_data_cache, 0x3DD); +ASSERT_REG_POSITION(tiled_cache_barrier, 0x3DF); ASSERT_REG_POSITION(color_mask_common, 0x3E4); ASSERT_REG_POSITION(depth_bounds, 0x3E7); ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); @@ -1609,6 +1632,7 @@ ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED); ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE); ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF); ASSERT_REG_POSITION(zeta, 0x3F8); +ASSERT_REG_POSITION(render_area, 0x3FD); ASSERT_REG_POSITION(clear_flags, 0x43E); ASSERT_REG_POSITION(fill_rectangle, 0x44F); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); @@ -1617,7 +1641,8 @@ ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); ASSERT_REG_POSITION(rt_control, 0x487); ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); -ASSERT_REG_POSITION(zeta_layers, 0x48c); +ASSERT_REG_POSITION(zeta_depth, 0x48c); +ASSERT_REG_POSITION(sampler_index, 0x48D); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); @@ -1641,6 +1666,8 @@ ASSERT_REG_POSITION(frag_color_clamp, 0x4EA); ASSERT_REG_POSITION(screen_y_control, 0x4EB); ASSERT_REG_POSITION(line_width_smooth, 0x4EC); ASSERT_REG_POSITION(line_width_aliased, 0x4ED); +ASSERT_REG_POSITION(invalidate_sampler_cache_no_wfi, 0x509); +ASSERT_REG_POSITION(invalidate_texture_header_cache_no_wfi, 0x50A); ASSERT_REG_POSITION(vb_element_base, 0x50D); ASSERT_REG_POSITION(vb_base_instance, 0x50E); ASSERT_REG_POSITION(clip_distance_enabled, 0x544); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 1c29e895e..ba750748c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -96,6 +96,7 @@ void MaxwellDMA::CopyPitchToPitch() { } void MaxwellDMA::CopyBlockLinearToPitch() { + UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); UNIMPLEMENTED_IF(regs.src_params.layer != 0); @@ -135,6 +136,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { } void MaxwellDMA::CopyPitchToBlockLinear() { + UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); + const auto& dst_params = regs.dst_params; const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; const u32 width = dst_params.width; diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index c5f26896e..3512283ff 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "core/core.h" +#include "video_core/delayed_destruction_ring.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" @@ -47,6 +48,11 @@ protected: template class FenceManager { public: + /// Notify the fence manager about a new frame + void TickFrame() { + delayed_destruction_ring.Tick(); + } + void SignalSemaphore(GPUVAddr addr, u32 value) { TryReleasePendingFences(); const bool should_flush = ShouldFlush(); @@ -86,7 +92,7 @@ public: } else { gpu.IncrementSyncPoint(current_fence->GetPayload()); } - fences.pop(); + PopFence(); } } @@ -132,7 +138,7 @@ private: } else { gpu.IncrementSyncPoint(current_fence->GetPayload()); } - fences.pop(); + PopFence(); } } @@ -158,7 +164,14 @@ private: query_cache.CommitAsyncFlushes(); } + void PopFence() { + delayed_destruction_ring.Push(std::move(fences.front())); + fences.pop(); + } + std::queue fences; + + DelayedDestructionRing delayed_destruction_ring; }; } // namespace VideoCommon diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 6e70bd362..65feff588 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -57,7 +57,10 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { } // Flush and invalidate through the GPU interface, to be asynchronous if possible. - system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size); + const std::optional cpu_addr = GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); + + rasterizer->UnmapMemory(*cpu_addr, size); UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); } diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 9da9fb4ff..e69de29bb 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -1,250 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/morton.h" -#include "video_core/surface.h" -#include "video_core/textures/decoders.h" - -namespace VideoCore { - -using Surface::GetBytesPerPixel; -using Surface::PixelFormat; - -using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*); -using ConversionArray = std::array; - -template -static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, - u32 tile_width_spacing, u8* buffer, u8* addr) { - constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); - - // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual - // pixel values. - constexpr u32 tile_size_x{GetDefaultBlockWidth(format)}; - constexpr u32 tile_size_y{GetDefaultBlockHeight(format)}; - - if constexpr (morton_to_linear) { - Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, - stride, height, depth, block_height, block_depth, - tile_width_spacing); - } else { - Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x, - (height + tile_size_y - 1) / tile_size_y, depth, - bytes_per_pixel, bytes_per_pixel, addr, buffer, false, - block_height, block_depth, tile_width_spacing); - } -} - -static constexpr ConversionArray morton_to_linear_fns = { - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, -}; - -static constexpr ConversionArray linear_to_morton_fns = { - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - // TODO(Subv): Swizzling ASTC formats are not supported - nullptr, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - nullptr, - nullptr, - nullptr, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, - MortonCopy, -}; - -static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { - switch (mode) { - case MortonSwizzleMode::MortonToLinear: - return morton_to_linear_fns[static_cast(format)]; - case MortonSwizzleMode::LinearToMorton: - return linear_to_morton_fns[static_cast(format)]; - } - UNREACHABLE(); - return morton_to_linear_fns[static_cast(format)]; -} - -void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, - u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, - u8* buffer, u8* addr) { - GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, - tile_width_spacing, buffer, addr); -} - -} // namespace VideoCore diff --git a/src/video_core/morton.h b/src/video_core/morton.h index b714a7e3f..e69de29bb 100644 --- a/src/video_core/morton.h +++ b/src/video_core/morton.h @@ -1,18 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" -#include "video_core/surface.h" - -namespace VideoCore { - -enum class MortonSwizzleMode { MortonToLinear, LinearToMorton }; - -void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, - u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, - u8* buffer, u8* addr); - -} // namespace VideoCore diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 27ef4c69a..0cb0f387d 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -76,6 +76,9 @@ public: /// Sync memory between guest and host. virtual void SyncGuestHost() = 0; + /// Unmap memory range + virtual void UnmapMemory(VAddr addr, u64 size) = 0; + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// and invalidated virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; @@ -83,6 +86,12 @@ public: /// Notify the host renderer to wait for previous primitive and compute operations. virtual void WaitForIdle() = 0; + /// Notify the host renderer to wait for reads and writes to render targets and flush caches. + virtual void FragmentBarrier() = 0; + + /// Notify the host renderer to make available previous render target writes. + virtual void TiledCacheBarrier() = 0; + /// Notify the rasterizer to send all written commands to the host GPU. virtual void FlushCommands() = 0; @@ -91,8 +100,7 @@ public: /// Attempt to use a faster method to perform a surface copy [[nodiscard]] virtual bool AccelerateSurfaceCopy( - const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { return false; } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 60735d502..5772cad87 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -61,10 +61,9 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - const Device& device_, std::size_t stream_size_) - : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, - std::make_unique(device_, stream_size_, true)}, - device{device_} { + const Device& device_, OGLStreamBuffer& stream_buffer_, + StateTracker& state_tracker) + : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} { if (!device.HasFastBufferSubData()) { return; } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 95251e26b..17ee90316 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -22,6 +22,7 @@ namespace OpenGL { class Device; class OGLStreamBuffer; class RasterizerOpenGL; +class StateTracker; class Buffer : public VideoCommon::BufferBlock { public: @@ -52,9 +53,10 @@ private: using GenericBufferCache = VideoCommon::BufferCache; class OGLBufferCache final : public GenericBufferCache { public: - explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - const Device& device_, std::size_t stream_size_); + explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, + const Device& device, OGLStreamBuffer& stream_buffer, + StateTracker& state_tracker); ~OGLBufferCache(); BufferInfo GetEmptyBuffer(std::size_t) override; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a94e4f72e..b24179d59 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -5,9 +5,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -27,27 +29,29 @@ constexpr u32 ReservedUniformBlocks = 1; constexpr u32 NumStages = 5; -constexpr std::array LimitUBOs = { +constexpr std::array LIMIT_UBOS = { GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, - GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS}; - -constexpr std::array LimitSSBOs = { + GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, +}; +constexpr std::array LIMIT_SSBOS = { GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, - GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS}; - -constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, - GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, - GL_MAX_TEXTURE_IMAGE_UNITS, - GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS}; - -constexpr std::array LimitImages = { + GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, +}; +constexpr std::array LIMIT_SAMPLERS = { + GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, + GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, + GL_MAX_TEXTURE_IMAGE_UNITS, + GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, +}; +constexpr std::array LIMIT_IMAGES = { GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, - GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS}; + GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS, +}; template T GetInteger(GLenum pname) { @@ -76,8 +80,8 @@ std::vector GetExtensions() { return extensions; } -bool HasExtension(const std::vector& images, std::string_view extension) { - return std::find(images.begin(), images.end(), extension) != images.end(); +bool HasExtension(std::span extensions, std::string_view extension) { + return std::ranges::find(extensions, extension) != extensions.end(); } u32 Extract(u32& base, u32& num, u32 amount, std::optional limit = {}) { @@ -91,8 +95,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional limit = {}) { std::array BuildMaxUniformBuffers() noexcept { std::array max; - std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(), - [](GLenum pname) { return GetInteger(pname); }); + std::ranges::transform(LIMIT_UBOS, max.begin(), + [](GLenum pname) { return GetInteger(pname); }); return max; } @@ -115,9 +119,10 @@ std::array BuildBaseBindin for (std::size_t i = 0; i < NumStages; ++i) { const std::size_t stage = stage_swizzle[i]; bindings[stage] = { - Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]), - Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]), - Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])}; + Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), + Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), + Extract(base_samplers, num_samplers, total_samplers / NumStages, + LIMIT_SAMPLERS[stage])}; } u32 num_images = GetInteger(GL_MAX_IMAGE_UNITS); @@ -130,7 +135,7 @@ std::array BuildBaseBindin // Reserve at least 4 image bindings on the fragment stage. bindings[4].image = - Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]); + Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]); // This is guaranteed to be at least 1. const u32 total_extracted_images = num_images / (NumStages - 1); @@ -142,7 +147,7 @@ std::array BuildBaseBindin continue; } bindings[stage].image = - Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); + Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]); } // Compute doesn't care about any of this. @@ -188,6 +193,11 @@ bool IsASTCSupported() { return true; } +[[nodiscard]] bool IsDebugToolAttached(std::span extensions) { + const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); + return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); +} + } // Anonymous namespace Device::Device() @@ -206,9 +216,8 @@ Device::Device() "Beta driver 443.24 is known to have issues. There might be performance issues."); disable_fast_buffer_sub_data = true; } - - uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); - shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); + uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); + shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger(GL_MAX_VARYING_VECTORS); max_compute_shared_memory_size = GetInteger(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); @@ -224,6 +233,7 @@ Device::Device() has_precise_bug = TestPreciseBug(); has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; + has_debugging_tool_attached = IsDebugToolAttached(extensions); // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8a4b6b9fc..13e66846c 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -36,11 +36,11 @@ public: return GetBaseBindings(static_cast(shader_type)); } - std::size_t GetUniformBufferAlignment() const { + size_t GetUniformBufferAlignment() const { return uniform_buffer_alignment; } - std::size_t GetShaderStorageBufferAlignment() const { + size_t GetShaderStorageBufferAlignment() const { return shader_storage_alignment; } @@ -104,6 +104,10 @@ public: return has_nv_viewport_array2; } + bool HasDebuggingToolAttached() const { + return has_debugging_tool_attached; + } + bool UseAssemblyShaders() const { return use_assembly_shaders; } @@ -118,8 +122,8 @@ private: std::array max_uniform_buffers{}; std::array base_bindings{}; - std::size_t uniform_buffer_alignment{}; - std::size_t shader_storage_alignment{}; + size_t uniform_buffer_alignment{}; + size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; u32 max_compute_shared_memory_size{}; @@ -135,6 +139,7 @@ private: bool has_precise_bug{}; bool has_fast_buffer_sub_data{}; bool has_nv_viewport_array2{}; + bool has_debugging_tool_attached{}; bool use_assembly_shaders{}; bool use_asynchronous_shaders{}; }; diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 6040646cb..3e9c922f5 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -46,7 +46,7 @@ void GLInnerFence::Wait() { } FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, - Tegra::GPU& gpu_, TextureCacheOpenGL& texture_cache_, + Tegra::GPU& gpu_, TextureCache& texture_cache_, OGLBufferCache& buffer_cache_, QueryCache& query_cache_) : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index 39ca6125b..30dbee613 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h @@ -33,12 +33,12 @@ private: using Fence = std::shared_ptr; using GenericFenceManager = - VideoCommon::FenceManager; + VideoCommon::FenceManager; class FenceManagerOpenGL final : public GenericFenceManager { public: explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, - TextureCacheOpenGL& texture_cache_, OGLBufferCache& buffer_cache_, + TextureCache& texture_cache_, OGLBufferCache& buffer_cache_, QueryCache& query_cache_); protected: diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp deleted file mode 100644 index b8a512cb6..000000000 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_framebuffer_cache.h" - -namespace OpenGL { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using VideoCore::Surface::SurfaceType; - -FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default; - -FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default; - -GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) { - const auto [entry, is_cache_miss] = cache.try_emplace(key); - auto& framebuffer{entry->second}; - if (is_cache_miss) { - framebuffer = CreateFramebuffer(key); - } - return framebuffer.handle; -} - -OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) { - OGLFramebuffer framebuffer; - framebuffer.Create(); - - // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); - - if (key.zeta) { - const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil; - const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT; - key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER); - } - - std::size_t num_buffers = 0; - std::array targets; - - for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { - if (!key.colors[index]) { - targets[index] = GL_NONE; - continue; - } - const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast(index); - key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER); - - const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111; - targets[index] = GL_COLOR_ATTACHMENT0 + attachment; - num_buffers = index + 1; - } - - if (num_buffers > 0) { - glDrawBuffers(static_cast(num_buffers), std::data(targets)); - } else { - glDrawBuffer(GL_NONE); - } - - return framebuffer; -} - -std::size_t FramebufferCacheKey::Hash() const noexcept { - std::size_t hash = std::hash{}(zeta); - for (const auto& color : colors) { - hash ^= std::hash{}(color); - } - hash ^= static_cast(color_attachments) << 16; - return hash; -} - -bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept { - return std::tie(colors, zeta, color_attachments) == - std::tie(rhs.colors, rhs.zeta, rhs.color_attachments); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h deleted file mode 100644 index 8f698fee0..000000000 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_texture_cache.h" - -namespace OpenGL { - -constexpr std::size_t BitsPerAttachment = 4; - -struct FramebufferCacheKey { - View zeta; - std::array colors; - u32 color_attachments = 0; - - std::size_t Hash() const noexcept; - - bool operator==(const FramebufferCacheKey& rhs) const noexcept; - - bool operator!=(const FramebufferCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - void SetAttachment(std::size_t index, u32 attachment) { - color_attachments |= attachment << (BitsPerAttachment * index); - } -}; - -} // namespace OpenGL - -namespace std { - -template <> -struct hash { - std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace OpenGL { - -class FramebufferCacheOpenGL { -public: - FramebufferCacheOpenGL(); - ~FramebufferCacheOpenGL(); - - GLuint GetFramebuffer(const FramebufferCacheKey& key); - -private: - OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); - - std::unordered_map cache; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e58e84759..8aa63d329 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -25,12 +25,15 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/shader_cache.h" +#include "video_core/texture_cache/texture_cache.h" namespace OpenGL { @@ -55,18 +58,32 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255 namespace { -constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18; -constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = +constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18; +constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; -constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = +constexpr size_t TOTAL_CONST_BUFFER_BYTES = NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; -constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; +constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; +constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; + +constexpr size_t MAX_TEXTURES = 192; +constexpr size_t MAX_IMAGES = 48; + +struct TextureHandle { + constexpr TextureHandle(u32 data, bool via_header_index) { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + + u32 image; + u32 sampler; +}; template -Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - ShaderType shader_type, std::size_t index = 0) { +TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, + ShaderType shader_type, size_t index = 0) { if constexpr (std::is_same_v) { if (entry.is_separated) { const u32 buffer_1 = entry.buffer; @@ -75,21 +92,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry const u32 offset_2 = entry.secondary_offset; const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return engine.GetTextureInfo(handle_1 | handle_2); + return TextureHandle(handle_1 | handle_2, via_header_index); } } if (entry.is_bindless) { - const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return engine.GetTextureInfo(handle); - } - - const auto& gpu_profile = engine.AccessGuestDriverProfile(); - const u32 offset = entry.offset + static_cast(index * gpu_profile.GetTextureHandlerSize()); - if constexpr (std::is_same_v) { - return engine.GetStageTexture(shader_type, offset); - } else { - return engine.GetTexture(offset); + const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); + return TextureHandle(raw, via_header_index); } + const u32 buffer = engine.GetBoundBuffer(); + const u64 offset = (entry.offset + index) * sizeof(u32); + return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); } std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, @@ -97,7 +109,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, if (!entry.IsIndirect()) { return entry.GetSize(); } - if (buffer.size > Maxwell::MaxConstBufferSize) { LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, Maxwell::MaxConstBufferSize); @@ -147,23 +158,60 @@ void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ss reinterpret_cast(ssbos)); } +ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { + if (entry.is_buffer) { + return ImageViewType::Buffer; + } + switch (entry.type) { + case Tegra::Shader::TextureType::Texture1D: + return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; + case Tegra::Shader::TextureType::Texture2D: + return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; + case Tegra::Shader::TextureType::Texture3D: + return ImageViewType::e3D; + case Tegra::Shader::TextureType::TextureCube: + return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; + } + UNREACHABLE(); + return ImageViewType::e2D; +} + +ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { + switch (entry.type) { + case Tegra::Shader::ImageType::Texture1D: + return ImageViewType::e1D; + case Tegra::Shader::ImageType::Texture1DArray: + return ImageViewType::e1DArray; + case Tegra::Shader::ImageType::Texture2D: + return ImageViewType::e2D; + case Tegra::Shader::ImageType::Texture2DArray: + return ImageViewType::e2DArray; + case Tegra::Shader::ImageType::Texture3D: + return ImageViewType::e3D; + case Tegra::Shader::ImageType::TextureBuffer: + return ImageViewType::Buffer; + } + UNREACHABLE(); + return ImageViewType::e2D; +} + } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Core::Memory::Memory& cpu_memory_, const Device& device_, ScreenInfo& screen_info_, ProgramManager& program_manager_, StateTracker& state_tracker_) - : RasterizerAccelerated{cpu_memory_}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()), + : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), - texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker), + stream_buffer(device, state_tracker), + texture_cache_runtime(device, program_manager, state_tracker), + texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), query_cache(*this, maxwell3d, gpu_memory), - buffer_cache(*this, gpu_memory, cpu_memory_, device, STREAM_BUFFER_SIZE), + buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), async_shaders(emu_window_) { - CheckExtensions(); - unified_uniform_buffer.Create(); glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); @@ -174,7 +222,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra nullptr, 0); } } - if (device.UseAsynchronousShaders()) { async_shaders.AllocateWorkers(); } @@ -186,14 +233,6 @@ RasterizerOpenGL::~RasterizerOpenGL() { } } -void RasterizerOpenGL::CheckExtensions() { - if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { - LOG_WARNING( - Render_OpenGL, - "Anisotropic filter is not supported! This can cause graphical issues in some games."); - } -} - void RasterizerOpenGL::SetupVertexFormat() { auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::VertexFormats]) { @@ -316,10 +355,16 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { return info.offset; } -void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { +void RasterizerOpenGL::SetupShaders() { MICROPROFILE_SCOPE(OpenGL_Shader); u32 clip_distances = 0; + std::array shaders{}; + image_view_indices.clear(); + sampler_handles.clear(); + + texture_cache.SynchronizeGraphicsDescriptors(); + for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = maxwell3d.regs.shader_config[index]; const auto program{static_cast(index)}; @@ -338,7 +383,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } continue; } - // Currently this stages are not supported in the OpenGL backend. // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL if (program == Maxwell::ShaderProgram::TesselationControl || @@ -347,7 +391,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); - const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; switch (program) { case Maxwell::ShaderProgram::VertexA: @@ -363,14 +406,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { default: UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, shader_config.enable.Value(), shader_config.offset); + break; } // Stage indices are 0 - 5 - const std::size_t stage = index == 0 ? 0 : index - 1; + const size_t stage = index == 0 ? 0 : index - 1; + shaders[stage] = shader; + SetupDrawConstBuffers(stage, shader); SetupDrawGlobalMemory(stage, shader); - SetupDrawTextures(stage, shader); - SetupDrawImages(stage, shader); + SetupDrawTextures(shader, stage); + SetupDrawImages(shader, stage); // Workaround for Intel drivers. // When a clip distance is enabled but not set in the shader it crops parts of the screen @@ -384,9 +430,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { ++index; } } - SyncClipEnabled(clip_distances); maxwell3d.dirty.flags[Dirty::Shaders] = false; + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + size_t image_view_index = 0; + size_t texture_index = 0; + size_t image_index = 0; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const Shader* const shader = shaders[stage]; + if (shader) { + const auto base = device.GetBaseBindings(stage); + BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, + texture_index, image_index); + } + } } std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { @@ -417,98 +477,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s shader_cache.LoadDiskCache(title_id, stop_loading, callback); } -void RasterizerOpenGL::ConfigureFramebuffers() { - MICROPROFILE_SCOPE(OpenGL_Framebuffer); - if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) { - return; - } - maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false; - - texture_cache.GuardRenderTargets(true); - - View depth_surface = texture_cache.GetDepthBufferSurface(true); - - const auto& regs = maxwell3d.regs; - UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); - - // Bind the framebuffer surfaces - FramebufferCacheKey key; - const auto colors_count = static_cast(regs.rt_control.count); - for (std::size_t index = 0; index < colors_count; ++index) { - View color_surface{texture_cache.GetColorBufferSurface(index, true)}; - if (!color_surface) { - continue; - } - // Assume that a surface will be written to if it is used as a framebuffer, even - // if the shader doesn't actually write to it. - texture_cache.MarkColorBufferInUse(index); - - key.SetAttachment(index, regs.rt_control.GetMap(index)); - key.colors[index] = std::move(color_surface); - } - - if (depth_surface) { - // Assume that a surface will be written to if it is used as a framebuffer, even if - // the shader doesn't actually write to it. - texture_cache.MarkDepthBufferInUse(); - key.zeta = std::move(depth_surface); - } - - texture_cache.GuardRenderTargets(false); - - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); -} - -void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) { - const auto& regs = maxwell3d.regs; - - texture_cache.GuardRenderTargets(true); - View color_surface; - - if (using_color) { - // Determine if we have to preserve the contents. - // First we have to make sure all clear masks are enabled. - bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G || - !regs.clear_buffers.B || !regs.clear_buffers.A; - const std::size_t index = regs.clear_buffers.RT; - if (regs.clear_flags.scissor) { - // Then we have to confirm scissor testing clears the whole image. - const auto& scissor = regs.scissor_test[0]; - preserve_contents |= scissor.min_x > 0; - preserve_contents |= scissor.min_y > 0; - preserve_contents |= scissor.max_x < regs.rt[index].width; - preserve_contents |= scissor.max_y < regs.rt[index].height; - } - - color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents); - texture_cache.MarkColorBufferInUse(index); - } - - View depth_surface; - if (using_depth_stencil) { - bool preserve_contents = false; - if (regs.clear_flags.scissor) { - // For depth stencil clears we only have to confirm scissor test covers the whole image. - const auto& scissor = regs.scissor_test[0]; - preserve_contents |= scissor.min_x > 0; - preserve_contents |= scissor.min_y > 0; - preserve_contents |= scissor.max_x < regs.zeta_width; - preserve_contents |= scissor.max_y < regs.zeta_height; - } - - depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); - texture_cache.MarkDepthBufferInUse(); - } - texture_cache.GuardRenderTargets(false); - - FramebufferCacheKey key; - key.colors[0] = std::move(color_surface); - key.zeta = std::move(depth_surface); - - state_tracker.NotifyFramebuffer(); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); -} - void RasterizerOpenGL::Clear() { if (!maxwell3d.ShouldExecute()) { return; @@ -523,8 +491,9 @@ void RasterizerOpenGL::Clear() { regs.clear_buffers.A) { use_color = true; - state_tracker.NotifyColorMask0(); - glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, + const GLuint index = regs.clear_buffers.RT; + state_tracker.NotifyColorMask(index); + glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, regs.clear_buffers.B != 0, regs.clear_buffers.A != 0); // TODO(Rodrigo): Determine if clamping is used on clears @@ -557,15 +526,17 @@ void RasterizerOpenGL::Clear() { state_tracker.NotifyScissor0(); glDisablei(GL_SCISSOR_TEST, 0); } - UNIMPLEMENTED_IF(regs.clear_flags.viewport); - ConfigureClearFramebuffer(use_color, use_depth || use_stencil); - - if (use_color) { - glClearBufferfv(GL_COLOR, 0, regs.clear_color); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.UpdateRenderTargets(true); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); } + if (use_color) { + glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); + } if (use_depth && use_stencil) { glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); } else if (use_depth) { @@ -622,16 +593,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); // Prepare the vertex array. - const bool invalidated = buffer_cache.Map(buffer_size); - - if (invalidated) { - // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty - auto& dirty = maxwell3d.dirty.flags; - dirty[Dirty::VertexBuffers] = true; - for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { - dirty[index] = true; - } - } + buffer_cache.Map(buffer_size); // Prepare vertex array format. SetupVertexFormat(); @@ -655,22 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } // Setup shaders and their used resources. - texture_cache.GuardSamplers(true); - const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); - SetupShaders(primitive_mode); - texture_cache.GuardSamplers(false); - - ConfigureFramebuffers(); + auto lock = texture_cache.AcquireLock(); + SetupShaders(); // Signal the buffer cache that we are not going to upload more things. buffer_cache.Unmap(); - + texture_cache.UpdateRenderTargets(false); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); program_manager.BindGraphicsPipeline(); - if (texture_cache.TextureBarrier()) { - glTextureBarrier(); - } - + const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); BeginTransformFeedback(primitive_mode); const GLuint base_instance = static_cast(maxwell3d.regs.vb_base_instance); @@ -722,15 +678,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { buffer_cache.Acquire(); current_cbuf = 0; - auto kernel = shader_cache.GetComputeKernel(code_addr); - program_manager.BindCompute(kernel->GetHandle()); + Shader* const kernel = shader_cache.GetComputeKernel(code_addr); - SetupComputeTextures(kernel); - SetupComputeImages(kernel); + auto lock = texture_cache.AcquireLock(); + BindComputeTextures(kernel); - const std::size_t buffer_size = - Tegra::Engines::KeplerCompute::NumConstBuffers * - (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); + const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers * + (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); buffer_cache.Map(buffer_size); SetupComputeConstBuffers(kernel); @@ -739,7 +693,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { buffer_cache.Unmap(); const auto& launch_desc = kepler_compute.launch_description; - program_manager.BindCompute(kernel->GetHandle()); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); ++num_queued_commands; } @@ -760,7 +713,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.FlushRegion(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.DownloadMemory(addr, size); + } buffer_cache.FlushRegion(addr, size); query_cache.FlushRegion(addr, size); } @@ -769,7 +725,8 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { if (!Settings::IsGPULevelHigh()) { return buffer_cache.MustFlushRegion(addr, size); } - return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); + return texture_cache.IsRegionGpuModified(addr, size) || + buffer_cache.MustFlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { @@ -777,7 +734,10 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.InvalidateRegion(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.WriteMemory(addr, size); + } shader_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size); @@ -788,18 +748,29 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.OnCPUWrite(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.WriteMemory(addr, size); + } shader_cache.OnCPUWrite(addr, size); buffer_cache.OnCPUWrite(addr, size); } void RasterizerOpenGL::SyncGuestHost() { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - texture_cache.SyncGuestHost(); buffer_cache.SyncGuestHost(); shader_cache.SyncGuestHost(); } +void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { + { + auto lock = texture_cache.AcquireLock(); + texture_cache.UnmapMemory(addr, size); + } + buffer_cache.OnCPUWrite(addr, size); + shader_cache.OnCPUWrite(addr, size); +} + void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { if (!gpu.IsAsync()) { gpu_memory.Write(addr, value); @@ -841,6 +812,14 @@ void RasterizerOpenGL::WaitForIdle() { GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT); } +void RasterizerOpenGL::FragmentBarrier() { + glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT); +} + +void RasterizerOpenGL::TiledCacheBarrier() { + glTextureBarrier(); +} + void RasterizerOpenGL::FlushCommands() { // Only flush when we have commands queued to OpenGL. if (num_queued_commands == 0) { @@ -854,47 +833,97 @@ void RasterizerOpenGL::TickFrame() { // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. num_queued_commands = 0; + fence_manager.TickFrame(); buffer_cache.TickFrame(); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.TickFrame(); + } } -bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, +bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { MICROPROFILE_SCOPE(OpenGL_Blits); - texture_cache.DoFermiCopy(src, dst, copy_config); + auto lock = texture_cache.AcquireLock(); + texture_cache.BlitImage(dst, src, copy_config); return true; } bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) { - if (!framebuffer_addr) { - return {}; + if (framebuffer_addr == 0) { + return false; } - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; - if (!surface) { - return {}; + auto lock = texture_cache.AcquireLock(); + ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)}; + if (!image_view) { + return false; } - // Verify that the cached surface is the same size and format as the requested framebuffer - const auto& params{surface->GetSurfaceParams()}; - const auto& pixel_format{ - VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; - ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); - ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); - - if (params.pixel_format != pixel_format) { - LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different"); - } - - screen_info.display_texture = surface->GetTexture(); - screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion; + // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); + // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); + screen_info.display_texture = image_view->Handle(ImageViewType::e2D); + screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); return true; } +void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { + image_view_indices.clear(); + sampler_handles.clear(); + + texture_cache.SynchronizeComputeDescriptors(); + + SetupComputeTextures(kernel); + SetupComputeImages(kernel); + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + program_manager.BindCompute(kernel->GetHandle()); + size_t image_view_index = 0; + size_t texture_index = 0; + size_t image_index = 0; + BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); +} + +void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, + GLuint base_image, size_t& image_view_index, + size_t& texture_index, size_t& image_index) { + const GLuint* const samplers = sampler_handles.data() + texture_index; + const GLuint* const textures = texture_handles.data() + texture_index; + const GLuint* const images = image_handles.data() + image_index; + + const size_t num_samplers = entries.samplers.size(); + for (const auto& sampler : entries.samplers) { + for (size_t i = 0; i < sampler.size; ++i) { + const ImageViewId image_view_id = image_view_ids[image_view_index++]; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); + texture_handles[texture_index++] = handle; + } + } + const size_t num_images = entries.images.size(); + for (size_t unit = 0; unit < num_images; ++unit) { + // TODO: Mark as modified + const ImageViewId image_view_id = image_view_ids[image_view_index++]; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); + image_handles[image_index] = handle; + ++image_index; + } + if (num_samplers > 0) { + glBindSamplers(base_texture, static_cast(num_samplers), samplers); + glBindTextures(base_texture, static_cast(num_samplers), textures); + } + if (num_images > 0) { + glBindImageTextures(base_image, static_cast(num_images), images); + } +} + void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { static constexpr std::array PARAMETER_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, @@ -999,7 +1028,6 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, }; - const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; const auto& entries{shader->GetEntries().global_memory_entries}; @@ -1056,77 +1084,53 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e } } -void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { - MICROPROFILE_SCOPE(OpenGL_Texture); - u32 binding = device.GetBaseBindings(stage_index).sampler; +void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { + const bool via_header_index = + maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : shader->GetEntries().samplers) { const auto shader_type = static_cast(stage_index); - for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); - SetupTexture(binding++, texture, entry); + for (size_t index = 0; index < entry.size; ++index) { + const auto handle = + GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); + const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); + image_view_indices.push_back(handle.image); } } } -void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) { - MICROPROFILE_SCOPE(OpenGL_Texture); - u32 binding = 0; +void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : kernel->GetEntries().samplers) { - for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i); - SetupTexture(binding++, texture, entry); + for (size_t i = 0; i < entry.size; ++i) { + const auto handle = + GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); + const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); + image_view_indices.push_back(handle.image); } } } -void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, - const SamplerEntry& entry) { - const auto view = texture_cache.GetTextureSurface(texture.tic, entry); - if (!view) { - // Can occur when texture addr is null or its memory is unmapped/invalid - glBindSampler(binding, 0); - glBindTextureUnit(binding, 0); - return; - } - const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source, - texture.tic.z_source, texture.tic.w_source); - glBindTextureUnit(binding, handle); - if (!view->GetSurfaceParams().IsBuffer()) { - glBindSampler(binding, sampler_cache.GetSampler(texture.tsc)); - } -} - -void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) { - u32 binding = device.GetBaseBindings(stage_index).image; +void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { + const bool via_header_index = + maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : shader->GetEntries().images) { const auto shader_type = static_cast(stage_index); - const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; - SetupImage(binding++, tic, entry); + const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); + image_view_indices.push_back(handle.image); } } -void RasterizerOpenGL::SetupComputeImages(Shader* shader) { - u32 binding = 0; +void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : shader->GetEntries().images) { - const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic; - SetupImage(binding++, tic, entry); + const auto handle = + GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); + image_view_indices.push_back(handle.image); } } -void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, - const ImageEntry& entry) { - const auto view = texture_cache.GetImageSurface(tic, entry); - if (!view) { - glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); - return; - } - if (entry.is_written) { - view->MarkAsModified(texture_cache.Tick()); - } - const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source); - glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat()); -} - void RasterizerOpenGL::SyncViewport() { auto& flags = maxwell3d.dirty.flags; const auto& regs = maxwell3d.regs; @@ -1526,17 +1530,9 @@ void RasterizerOpenGL::SyncPointState() { flags[Dirty::PointSize] = false; oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); + oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable); - if (maxwell3d.regs.vp_point_size.enable) { - // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled. - glEnable(GL_PROGRAM_POINT_SIZE); - return; - } - - // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid - // in OpenGL). glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); - glDisable(GL_PROGRAM_POINT_SIZE); } void RasterizerOpenGL::SyncLineState() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index de28cff15..82e03e677 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -7,12 +7,13 @@ #include #include #include -#include #include #include #include #include +#include + #include #include "common/common_types.h" @@ -23,16 +24,14 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_fence_manager.h" -#include "video_core/renderer_opengl/gl_framebuffer_cache.h" #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/renderer_opengl/gl_stream_buffer.h" #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/renderer_opengl/utils.h" #include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" @@ -51,7 +50,7 @@ class MemoryManager; namespace OpenGL { struct ScreenInfo; -struct DrawParameters; +struct ShaderEntries; struct BindlessSSBO { GLuint64EXT address; @@ -79,15 +78,18 @@ public: void InvalidateRegion(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override; void SyncGuestHost() override; + void UnmapMemory(VAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; void ReleaseFences() override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void WaitForIdle() override; + void FragmentBarrier() override; + void TiledCacheBarrier() override; void FlushCommands() override; void TickFrame() override; - bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, + bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; @@ -108,11 +110,14 @@ public: } private: - /// Configures the color and depth framebuffer states. - void ConfigureFramebuffers(); + static constexpr size_t MAX_TEXTURES = 192; + static constexpr size_t MAX_IMAGES = 48; + static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; - /// Configures the color and depth framebuffer for clearing. - void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); + void BindComputeTextures(Shader* kernel); + + void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, + size_t& image_view_index, size_t& texture_index, size_t& image_index); /// Configures the current constbuffers to use for the draw command. void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); @@ -136,23 +141,16 @@ private: size_t size, BindlessSSBO* ssbo); /// Configures the current textures to use for the draw command. - void SetupDrawTextures(std::size_t stage_index, Shader* shader); + void SetupDrawTextures(const Shader* shader, size_t stage_index); /// Configures the textures used in a compute shader. - void SetupComputeTextures(Shader* kernel); - - /// Configures a texture. - void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, - const SamplerEntry& entry); + void SetupComputeTextures(const Shader* kernel); /// Configures images in a graphics shader. - void SetupDrawImages(std::size_t stage_index, Shader* shader); + void SetupDrawImages(const Shader* shader, size_t stage_index); /// Configures images in a compute shader. - void SetupComputeImages(Shader* shader); - - /// Configures an image. - void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); + void SetupComputeImages(const Shader* shader); /// Syncs the viewport and depth range to match the guest state void SyncViewport(); @@ -227,9 +225,6 @@ private: /// End a transform feedback void EndTransformFeedback(); - /// Check for extension that are not strictly required but are needed for correct emulation - void CheckExtensions(); - std::size_t CalculateVertexArraysSize() const; std::size_t CalculateIndexBufferSize() const; @@ -242,7 +237,7 @@ private: GLintptr SetupIndexBuffer(); - void SetupShaders(GLenum primitive_mode); + void SetupShaders(); Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; @@ -254,19 +249,21 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; - TextureCacheOpenGL texture_cache; + OGLStreamBuffer stream_buffer; + TextureCacheRuntime texture_cache_runtime; + TextureCache texture_cache; ShaderCacheOpenGL shader_cache; - SamplerCacheOpenGL sampler_cache; - FramebufferCacheOpenGL framebuffer_cache; QueryCache query_cache; OGLBufferCache buffer_cache; FenceManagerOpenGL fence_manager; VideoCommon::Shader::AsyncShaders async_shaders; - static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; - - GLint vertex_binding = 0; + boost::container::static_vector image_view_indices; + std::array image_view_ids; + boost::container::static_vector sampler_handles; + std::array texture_handles; + std::array image_handles; std::array transform_feedback_buffers; @@ -280,7 +277,7 @@ private: std::size_t current_cbuf = 0; OGLBuffer unified_uniform_buffer; - /// Number of commands queued to the OpenGL driver. Reseted on flush. + /// Number of commands queued to the OpenGL driver. Resetted on flush. std::size_t num_queued_commands = 0; u32 last_clip_distance_mask = 0; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 0ebcec427..0e34a0f20 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -71,7 +71,7 @@ void OGLSampler::Create() { return; MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenSamplers(1, &handle); + glCreateSamplers(1, &handle); } void OGLSampler::Release() { diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp deleted file mode 100644 index 5c174879a..000000000 --- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/logging/log.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_sampler_cache.h" -#include "video_core/renderer_opengl/maxwell_to_gl.h" - -namespace OpenGL { - -SamplerCacheOpenGL::SamplerCacheOpenGL() = default; - -SamplerCacheOpenGL::~SamplerCacheOpenGL() = default; - -OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { - OGLSampler sampler; - sampler.Create(); - - const GLuint sampler_id{sampler.handle}; - glSamplerParameteri( - sampler_id, GL_TEXTURE_MAG_FILTER, - MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None)); - glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, - MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter)); - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u)); - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v)); - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p)); - glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, - tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); - glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, - MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func)); - glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data()); - glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod()); - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod()); - glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias()); - if (GLAD_GL_ARB_texture_filter_anisotropic) { - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy()); - } else if (GLAD_GL_EXT_texture_filter_anisotropic) { - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy()); - } else { - LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver"); - } - - return sampler; -} - -GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const { - return sampler.handle; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h deleted file mode 100644 index 34ee37f00..000000000 --- a/src/video_core/renderer_opengl/gl_sampler_cache.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/sampler_cache.h" - -namespace OpenGL { - -class SamplerCacheOpenGL final : public VideoCommon::SamplerCache { -public: - explicit SamplerCacheOpenGL(); - ~SamplerCacheOpenGL(); - -protected: - OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; - - GLuint ToSamplerType(const OGLSampler& sampler) const override; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index eabfdea5d..d4841fdb7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -27,7 +27,6 @@ #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/renderer_opengl/utils.h" #include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ccbdfe967..2e1fa252d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -38,11 +38,9 @@ using Tegra::Shader::IpaSampleMode; using Tegra::Shader::PixelImap; using Tegra::Shader::Register; using Tegra::Shader::TextureType; -using VideoCommon::Shader::BuildTransformFeedback; -using VideoCommon::Shader::Registry; -using namespace std::string_literals; using namespace VideoCommon::Shader; +using namespace std::string_literals; using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Operation = const OperationNode&; @@ -2753,11 +2751,11 @@ private: } } - std::string GetSampler(const Sampler& sampler) const { + std::string GetSampler(const SamplerEntry& sampler) const { return AppendSuffix(sampler.index, "sampler"); } - std::string GetImage(const Image& image) const { + std::string GetImage(const ImageEntry& image) const { return AppendSuffix(image.index, "image"); } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index c4ff47875..be68994bb 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -20,8 +20,8 @@ namespace OpenGL { class Device; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using SamplerEntry = VideoCommon::Shader::Sampler; -using ImageEntry = VideoCommon::Shader::Image; +using SamplerEntry = VideoCommon::Shader::SamplerEntry; +using ImageEntry = VideoCommon::Shader::ImageEntry; class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { public: diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 691c6c79b..553e6e8d6 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -83,6 +83,21 @@ void ProgramManager::RestoreGuestPipeline() { } } +void ProgramManager::BindHostCompute(GLuint program) { + if (use_assembly_programs) { + glDisable(GL_COMPUTE_PROGRAM_NV); + } + glUseProgram(program); + is_graphics_bound = false; +} + +void ProgramManager::RestoreGuestCompute() { + if (use_assembly_programs) { + glEnable(GL_COMPUTE_PROGRAM_NV); + glUseProgram(0); + } +} + void ProgramManager::UseVertexShader(GLuint program) { if (use_assembly_programs) { BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 950e0dfcb..ad42cce74 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -45,6 +45,12 @@ public: /// Rewinds BindHostPipeline state changes. void RestoreGuestPipeline(); + /// Binds an OpenGL GLSL program object unsynchronized with the guest state. + void BindHostCompute(GLuint program); + + /// Rewinds BindHostCompute state changes. + void RestoreGuestCompute(); + void UseVertexShader(GLuint program); void UseGeometryShader(GLuint program); void UseFragmentShader(GLuint program); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index 45f4fc565..60e6fa39f 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -249,4 +249,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} } } +void StateTracker::InvalidateStreamBuffer() { + flags[Dirty::VertexBuffers] = true; + for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { + flags[index] = true; + } +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 9d127548f..574615d3c 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -92,6 +92,8 @@ class StateTracker { public: explicit StateTracker(Tegra::GPU& gpu); + void InvalidateStreamBuffer(); + void BindIndexBuffer(GLuint new_index_buffer) { if (index_buffer == new_index_buffer) { return; @@ -100,6 +102,14 @@ public: glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer); } + void BindFramebuffer(GLuint new_framebuffer) { + if (framebuffer == new_framebuffer) { + return; + } + framebuffer = new_framebuffer; + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); + } + void NotifyScreenDrawVertexArray() { flags[OpenGL::Dirty::VertexFormats] = true; flags[OpenGL::Dirty::VertexFormat0 + 0] = true; @@ -129,9 +139,9 @@ public: flags[OpenGL::Dirty::Scissor0] = true; } - void NotifyColorMask0() { + void NotifyColorMask(size_t index) { flags[OpenGL::Dirty::ColorMasks] = true; - flags[OpenGL::Dirty::ColorMask0] = true; + flags[OpenGL::Dirty::ColorMask0 + index] = true; } void NotifyBlend0() { @@ -190,6 +200,7 @@ public: private: Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; + GLuint framebuffer = 0; GLuint index_buffer = 0; }; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 887995cf4..e0819cdf2 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -9,6 +9,7 @@ #include "common/assert.h" #include "common/microprofile.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", @@ -16,24 +17,14 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) - : buffer_size(size) { +OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_) + : state_tracker{state_tracker_} { gl_buffer.Create(); - GLsizeiptr allocate_size = size; - if (vertex_data_usage) { - // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer - // read position is near the end and is an out-of-bound access to the vertex buffer. This is - // probably a bug in the driver and is related to the usage of vec3 attributes in the - // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the - // crash. - allocate_size *= 2; - } - static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; - glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); + glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags); mapped_ptr = static_cast( - glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); + glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); @@ -46,25 +37,24 @@ OGLStreamBuffer::~OGLStreamBuffer() { gl_buffer.Release(); } -std::tuple OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { - ASSERT(size <= buffer_size); - ASSERT(alignment <= buffer_size); +std::pair OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { + ASSERT(size <= BUFFER_SIZE); + ASSERT(alignment <= BUFFER_SIZE); mapped_size = size; if (alignment > 0) { buffer_pos = Common::AlignUp(buffer_pos, alignment); } - bool invalidate = false; - if (buffer_pos + size > buffer_size) { + if (buffer_pos + size > BUFFER_SIZE) { MICROPROFILE_SCOPE(OpenGL_StreamBuffer); glInvalidateBufferData(gl_buffer.handle); + state_tracker.InvalidateStreamBuffer(); buffer_pos = 0; - invalidate = true; } - return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate); + return std::make_pair(mapped_ptr + buffer_pos, buffer_pos); } void OGLStreamBuffer::Unmap(GLsizeiptr size) { diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 307a67113..dd9cf67eb 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -4,29 +4,31 @@ #pragma once -#include +#include + #include + #include "common/common_types.h" #include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { class Device; +class StateTracker; class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); + explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_); ~OGLStreamBuffer(); /* * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes * and the optional alignment requirement. * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. - * The return values are the pointer to the new chunk, the offset within the buffer, - * and the invalidation flag for previous chunks. + * The return values are the pointer to the new chunk, and the offset within the buffer. * The actual used size must be specified on unmapping the chunk. */ - std::tuple Map(GLsizeiptr size, GLintptr alignment = 0); + std::pair Map(GLsizeiptr size, GLintptr alignment = 0); void Unmap(GLsizeiptr size); @@ -39,15 +41,18 @@ public: } GLsizeiptr Size() const noexcept { - return buffer_size; + return BUFFER_SIZE; } private: + static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024; + + StateTracker& state_tracker; + OGLBuffer gl_buffer; GLuint64EXT gpu_address = 0; GLintptr buffer_pos = 0; - GLsizeiptr buffer_size = 0; GLsizeiptr mapped_size = 0; u8* mapped_ptr = nullptr; }; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index daf352b50..4c690418c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -2,173 +2,238 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/assert.h" -#include "common/bit_util.h" -#include "common/common_types.h" -#include "common/microprofile.h" -#include "common/scope_exit.h" -#include "core/core.h" -#include "video_core/morton.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" +#include +#include +#include +#include + +#include + +#include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/renderer_opengl/utils.h" -#include "video_core/texture_cache/surface_base.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" +#include "video_core/renderer_opengl/util_shaders.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/format_lookup_table.h" +#include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/texture_cache.h" -#include "video_core/textures/convert.h" -#include "video_core/textures/texture.h" +#include "video_core/textures/decoders.h" namespace OpenGL { -using Tegra::Texture::SwizzleSource; -using VideoCore::MortonSwizzleMode; +namespace { +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TextureMipmapFilter; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; +using VideoCommon::CalculateLevelStrideAlignment; +using VideoCommon::ImageCopy; +using VideoCommon::ImageFlagBits; +using VideoCommon::ImageType; +using VideoCommon::NUM_RT; +using VideoCommon::SamplesLog2; +using VideoCommon::SwizzleParameters; +using VideoCore::Surface::BytesPerBlock; +using VideoCore::Surface::IsPixelFormatASTC; +using VideoCore::Surface::IsPixelFormatSRGB; +using VideoCore::Surface::MaxPixelFormat; using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; -MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", - MP_RGB(128, 192, 128)); +struct CopyOrigin { + GLint level; + GLint x; + GLint y; + GLint z; +}; -namespace { +struct CopyRegion { + GLsizei width; + GLsizei height; + GLsizei depth; +}; struct FormatTuple { GLenum internal_format; GLenum format = GL_NONE; GLenum type = GL_NONE; + GLenum store_format = internal_format; }; -constexpr std::array tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM - {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM - {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM - {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM - {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM - {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM - {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM - {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM - {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT - {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT - {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT - {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT - {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT - {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM - {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT - {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT - {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM - {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM - {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM - {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT - {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT - {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM - {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM - {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - // Compressed sRGB formats - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB - {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB - {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB - {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT - - // Depth formats - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM - - // DepthStencil formats - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM +constexpr std::array FORMAT_TABLE = {{ + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM + {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM + {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM + {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM + {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM + {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM + {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM + {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM + {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT + {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT + {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT + {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT + {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM + {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_RGBA8}, // A8B8G8R8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM + {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT + {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM + {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, GL_RGBA8}, // B8G8R8A8_UNORM + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT }}; +constexpr std::array ACCELERATED_FORMATS{ + GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, + GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, + GL_RG16UI, GL_RG8UI, GL_R32UI, GL_R16UI, GL_R8UI, GL_RGBA32I, + GL_RGBA16I, GL_RGBA8I, GL_RG32I, GL_RG16I, GL_RG8I, GL_R32I, + GL_R16I, GL_R8I, GL_RGBA16, GL_RGB10_A2, GL_RGBA8, GL_RG16, + GL_RG8, GL_R16, GL_R8, GL_RGBA16_SNORM, GL_RGBA8_SNORM, GL_RG16_SNORM, + GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, +}; + const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { - ASSERT(static_cast(pixel_format) < tex_format_tuples.size()); - return tex_format_tuples[static_cast(pixel_format)]; + ASSERT(static_cast(pixel_format) < FORMAT_TABLE.size()); + return FORMAT_TABLE[static_cast(pixel_format)]; } -GLenum GetTextureTarget(const SurfaceTarget& target) { - switch (target) { - case SurfaceTarget::TextureBuffer: - return GL_TEXTURE_BUFFER; - case SurfaceTarget::Texture1D: - return GL_TEXTURE_1D; - case SurfaceTarget::Texture2D: - return GL_TEXTURE_2D; - case SurfaceTarget::Texture3D: - return GL_TEXTURE_3D; - case SurfaceTarget::Texture1DArray: +GLenum ImageTarget(const VideoCommon::ImageInfo& info) { + switch (info.type) { + case ImageType::e1D: return GL_TEXTURE_1D_ARRAY; - case SurfaceTarget::Texture2DArray: + case ImageType::e2D: + if (info.num_samples > 1) { + return GL_TEXTURE_2D_MULTISAMPLE_ARRAY; + } return GL_TEXTURE_2D_ARRAY; - case SurfaceTarget::TextureCubemap: - return GL_TEXTURE_CUBE_MAP; - case SurfaceTarget::TextureCubeArray: - return GL_TEXTURE_CUBE_MAP_ARRAY; + case ImageType::e3D: + return GL_TEXTURE_3D; + case ImageType::Linear: + return GL_TEXTURE_2D_ARRAY; + case ImageType::Buffer: + return GL_TEXTURE_BUFFER; } - UNREACHABLE(); - return {}; + UNREACHABLE_MSG("Invalid image type={}", info.type); + return GL_NONE; } -GLint GetSwizzleSource(SwizzleSource source) { +GLenum ImageTarget(ImageViewType type, int num_samples = 1) { + const bool is_multisampled = num_samples > 1; + switch (type) { + case ImageViewType::e1D: + return GL_TEXTURE_1D; + case ImageViewType::e2D: + return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; + case ImageViewType::Cube: + return GL_TEXTURE_CUBE_MAP; + case ImageViewType::e3D: + return GL_TEXTURE_3D; + case ImageViewType::e1DArray: + return GL_TEXTURE_1D_ARRAY; + case ImageViewType::e2DArray: + return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; + case ImageViewType::CubeArray: + return GL_TEXTURE_CUBE_MAP_ARRAY; + case ImageViewType::Rect: + return GL_TEXTURE_RECTANGLE; + case ImageViewType::Buffer: + return GL_TEXTURE_BUFFER; + } + UNREACHABLE_MSG("Invalid image view type={}", type); + return GL_NONE; +} + +GLenum TextureMode(PixelFormat format, bool is_first) { + switch (format) { + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: + return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; + case PixelFormat::S8_UINT_D24_UNORM: + return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; + default: + UNREACHABLE(); + return GL_DEPTH_COMPONENT; + } +} + +GLint Swizzle(SwizzleSource source) { switch (source) { case SwizzleSource::Zero: return GL_ZERO; @@ -184,530 +249,813 @@ GLint GetSwizzleSource(SwizzleSource source) { case SwizzleSource::OneFloat: return GL_ONE; } - UNREACHABLE(); + UNREACHABLE_MSG("Invalid swizzle source={}", source); return GL_NONE; } -GLenum GetComponent(PixelFormat format, bool is_first) { +GLenum AttachmentType(PixelFormat format) { + switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) { + case SurfaceType::Depth: + return GL_DEPTH_ATTACHMENT; + case SurfaceType::DepthStencil: + return GL_DEPTH_STENCIL_ATTACHMENT; + default: + UNIMPLEMENTED_MSG("Unimplemented type={}", type); + return GL_NONE; + } +} + +[[nodiscard]] bool IsConverted(const Device& device, PixelFormat format, ImageType type) { + if (!device.HasASTC() && IsPixelFormatASTC(format)) { + return true; + } + switch (format) { + case PixelFormat::BC4_UNORM: + case PixelFormat::BC5_UNORM: + return type == ImageType::e3D; + default: + break; + } + return false; +} + +[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) { + switch (value) { + case SwizzleSource::G: + return SwizzleSource::R; + default: + return value; + } +} + +void ApplySwizzle(GLuint handle, PixelFormat format, std::array swizzle) { switch (format) { case PixelFormat::D24_UNORM_S8_UINT: case PixelFormat::D32_FLOAT_S8_UINT: - return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; case PixelFormat::S8_UINT_D24_UNORM: - return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; + UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G); + glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, + TextureMode(format, swizzle[0] == SwizzleSource::R)); + std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); + break; default: - UNREACHABLE(); - return GL_DEPTH_COMPONENT; + break; + } + std::array gl_swizzle; + std::ranges::transform(swizzle, gl_swizzle.begin(), Swizzle); + glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); +} + +[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, + const VideoCommon::ImageInfo& info) { + // Disable accelerated uploads for now as they don't implement swizzled uploads + return false; + switch (info.type) { + case ImageType::e2D: + case ImageType::e3D: + case ImageType::Linear: + break; + default: + return false; + } + const GLenum internal_format = GetFormatTuple(info.format).internal_format; + const auto& format_info = runtime.FormatInfo(info.type, internal_format); + if (format_info.is_compressed) { + return false; + } + if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) { + return false; + } + if (format_info.compatibility_by_size) { + return true; + } + const GLenum store_format = StoreFormat(BytesPerBlock(info.format)); + const GLenum store_class = runtime.FormatInfo(info.type, store_format).compatibility_class; + return format_info.compatibility_class == store_class; +} + +[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset, + VideoCommon::SubresourceLayers subresource, GLenum target) { + switch (target) { + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + return CopyOrigin{ + .level = static_cast(subresource.base_level), + .x = static_cast(offset.x), + .y = static_cast(offset.y), + .z = static_cast(subresource.base_layer), + }; + case GL_TEXTURE_3D: + return CopyOrigin{ + .level = static_cast(subresource.base_level), + .x = static_cast(offset.x), + .y = static_cast(offset.y), + .z = static_cast(offset.z), + }; + default: + UNIMPLEMENTED_MSG("Unimplemented copy target={}", target); + return CopyOrigin{.level = 0, .x = 0, .y = 0, .z = 0}; } } -void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { - if (params.IsBuffer()) { +[[nodiscard]] CopyRegion MakeCopyRegion(VideoCommon::Extent3D extent, + VideoCommon::SubresourceLayers dst_subresource, + GLenum target) { + switch (target) { + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + return CopyRegion{ + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .depth = static_cast(dst_subresource.num_layers), + }; + case GL_TEXTURE_3D: + return CopyRegion{ + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .depth = static_cast(extent.depth), + }; + default: + UNIMPLEMENTED_MSG("Unimplemented copy target={}", target); + return CopyRegion{.width = 0, .height = 0, .depth = 0}; + } +} + +void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { + if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { + const GLuint texture = image_view->DefaultHandle(); + glNamedFramebufferTexture(fbo, attachment, texture, 0); return; } - glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast(params.num_levels - 1)); - if (params.num_levels == 1) { - glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); + const GLuint texture = image_view->Handle(ImageViewType::e3D); + if (image_view->range.extent.layers > 1) { + // TODO: OpenGL doesn't support rendering to a fixed number of slices + glNamedFramebufferTexture(fbo, attachment, texture, 0); + } else { + const u32 slice = image_view->range.base.layer; + glNamedFramebufferTextureLayer(fbo, attachment, texture, 0, slice); } } -OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format, - OGLBuffer& texture_buffer) { - OGLTexture texture; - texture.Create(target); - - switch (params.target) { - case SurfaceTarget::Texture1D: - glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); - break; - case SurfaceTarget::TextureBuffer: - texture_buffer.Create(); - glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), - nullptr, GL_DYNAMIC_STORAGE_BIT); - glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); - break; - case SurfaceTarget::Texture2D: - case SurfaceTarget::TextureCubemap: - glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, - params.height); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width, - params.height, params.depth); - break; - default: - UNREACHABLE(); - } - - ApplyTextureDefaults(params, texture.handle); - - return texture; -} - -constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, - SwizzleSource w_source) { - return (static_cast(x_source) << 24) | (static_cast(y_source) << 16) | - (static_cast(z_source) << 8) | static_cast(w_source); -} - } // Anonymous namespace -CachedSurface::CachedSurface(const GPUVAddr gpu_addr_, const SurfaceParams& params_, - bool is_astc_supported_) - : SurfaceBase{gpu_addr_, params_, is_astc_supported_} { - if (is_converted) { - internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8; - format = GL_RGBA; - type = GL_UNSIGNED_BYTE; +ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_) + : span(map, size), sync{sync_}, handle{handle_} {} + +ImageBufferMap::~ImageBufferMap() { + if (sync) { + sync->Create(); + } +} + +TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, + StateTracker& state_tracker_) + : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) { + static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; + for (size_t i = 0; i < TARGETS.size(); ++i) { + const GLenum target = TARGETS[i]; + for (const FormatTuple& tuple : FORMAT_TABLE) { + const GLenum format = tuple.internal_format; + GLint compat_class; + GLint compat_type; + GLint is_compressed; + glGetInternalformativ(target, format, GL_IMAGE_COMPATIBILITY_CLASS, 1, &compat_class); + glGetInternalformativ(target, format, GL_IMAGE_FORMAT_COMPATIBILITY_TYPE, 1, + &compat_type); + glGetInternalformativ(target, format, GL_TEXTURE_COMPRESSED, 1, &is_compressed); + const FormatProperties properties{ + .compatibility_class = static_cast(compat_class), + .compatibility_by_size = compat_type == GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE, + .is_compressed = is_compressed == GL_TRUE, + }; + format_properties[i].emplace(format, properties); + } + } + null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); + null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); + null_image_3d.Create(GL_TEXTURE_3D); + null_image_rect.Create(GL_TEXTURE_RECTANGLE); + glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); + glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); + glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); + glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); + + std::array new_handles; + glGenTextures(static_cast(new_handles.size()), new_handles.data()); + null_image_view_1d.handle = new_handles[0]; + null_image_view_2d.handle = new_handles[1]; + null_image_view_2d_array.handle = new_handles[2]; + null_image_view_cube.handle = new_handles[3]; + glTextureView(null_image_view_1d.handle, GL_TEXTURE_1D, null_image_1d_array.handle, GL_R8, 0, 1, + 0, 1); + glTextureView(null_image_view_2d.handle, GL_TEXTURE_2D, null_image_cube_array.handle, GL_R8, 0, + 1, 0, 1); + glTextureView(null_image_view_2d_array.handle, GL_TEXTURE_2D_ARRAY, + null_image_cube_array.handle, GL_R8, 0, 1, 0, 1); + glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, + GL_R8, 0, 1, 0, 6); + const std::array texture_handles{ + null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, + null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, + null_image_view_2d_array.handle, null_image_view_cube.handle, + }; + for (const GLuint handle : texture_handles) { + static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; + glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); + } + const auto set_view = [this](ImageViewType type, GLuint handle) { + if (device.HasDebuggingToolAttached()) { + const std::string name = fmt::format("NullImage {}", type); + glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); + } + null_image_views[static_cast(type)] = handle; + }; + set_view(ImageViewType::e1D, null_image_view_1d.handle); + set_view(ImageViewType::e2D, null_image_view_2d.handle); + set_view(ImageViewType::Cube, null_image_view_cube.handle); + set_view(ImageViewType::e3D, null_image_3d.handle); + set_view(ImageViewType::e1DArray, null_image_1d_array.handle); + set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); + set_view(ImageViewType::CubeArray, null_image_cube_array.handle); + set_view(ImageViewType::Rect, null_image_rect.handle); +} + +TextureCacheRuntime::~TextureCacheRuntime() = default; + +void TextureCacheRuntime::Finish() { + glFinish(); +} + +ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { + return upload_buffers.RequestMap(size, true); +} + +ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { + return download_buffers.RequestMap(size, false); +} + +void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, + std::span copies) { + const GLuint dst_name = dst_image.Handle(); + const GLuint src_name = src_image.Handle(); + const GLenum dst_target = ImageTarget(dst_image.info); + const GLenum src_target = ImageTarget(src_image.info); + for (const ImageCopy& copy : copies) { + const auto src_origin = MakeCopyOrigin(copy.src_offset, copy.src_subresource, src_target); + const auto dst_origin = MakeCopyOrigin(copy.dst_offset, copy.dst_subresource, dst_target); + const auto region = MakeCopyRegion(copy.extent, copy.dst_subresource, dst_target); + glCopyImageSubData(src_name, src_target, src_origin.level, src_origin.x, src_origin.y, + src_origin.z, dst_name, dst_target, dst_origin.level, dst_origin.x, + dst_origin.y, dst_origin.z, region.width, region.height, region.depth); + } +} + +bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) { + if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { + return false; + } + return true; +} + +void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src, + std::span copies) { + if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { + ASSERT(src.info.type == ImageType::e3D); + util_shaders.CopyBC4(dst, src, copies); } else { - const auto& tuple{GetFormatTuple(params.pixel_format)}; - internal_format = tuple.internal_format; - format = tuple.format; - type = tuple.type; - is_compressed = params.IsCompressed(); - } - target = GetTextureTarget(params.target); - texture = CreateTexture(params, target, internal_format, texture_buffer); - DecorateSurfaceName(); - - u32 num_layers = 1; - if (params.is_layered || params.target == SurfaceTarget::Texture3D) { - num_layers = params.depth; - } - - main_view = - CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true); -} - -CachedSurface::~CachedSurface() = default; - -void CachedSurface::DownloadTexture(std::vector& staging_buffer) { - MICROPROFILE_SCOPE(OpenGL_Texture_Download); - - if (params.IsBuffer()) { - glGetNamedBufferSubData(texture_buffer.handle, 0, - static_cast(params.GetHostSizeInBytes(false)), - staging_buffer.data()); - return; - } - - SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); - - for (u32 level = 0; level < params.emulated_levels; ++level) { - glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); - glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); - const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); - - u8* const mip_data = staging_buffer.data() + mip_offset; - const GLsizei size = static_cast(params.GetHostMipmapSize(level)); - if (is_compressed) { - glGetCompressedTextureImage(texture.handle, level, size, mip_data); - } else { - glGetTextureImage(texture.handle, level, format, type, size, mip_data); - } + UNREACHABLE(); } } -void CachedSurface::UploadTexture(const std::vector& staging_buffer) { - MICROPROFILE_SCOPE(OpenGL_Texture_Upload); - SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); - for (u32 level = 0; level < params.emulated_levels; ++level) { - UploadTextureMipmap(level, staging_buffer); - } -} - -void CachedSurface::UploadTextureMipmap(u32 level, const std::vector& staging_buffer) { - glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); - - const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); - const u8* buffer{staging_buffer.data() + mip_offset}; - if (is_compressed) { - const auto image_size{static_cast(params.GetHostMipmapSize(level))}; - switch (params.target) { - case SurfaceTarget::Texture2D: - glCompressedTextureSubImage2D(texture.handle, level, 0, 0, - static_cast(params.GetMipWidth(level)), - static_cast(params.GetMipHeight(level)), - internal_format, image_size, buffer); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0, - static_cast(params.GetMipWidth(level)), - static_cast(params.GetMipHeight(level)), - static_cast(params.GetMipDepth(level)), - internal_format, image_size, buffer); - break; - case SurfaceTarget::TextureCubemap: { - const std::size_t host_layer_size{params.GetHostLayerSize(level)}; - for (std::size_t face = 0; face < params.depth; ++face) { - glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast(face), - static_cast(params.GetMipWidth(level)), - static_cast(params.GetMipHeight(level)), 1, - internal_format, - static_cast(host_layer_size), buffer); - buffer += host_layer_size; - } - break; - } - default: - UNREACHABLE(); - } - } else { - switch (params.target) { - case SurfaceTarget::Texture1D: - glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type, - buffer); - break; - case SurfaceTarget::TextureBuffer: - ASSERT(level == 0); - glNamedBufferSubData(texture_buffer.handle, 0, - params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer); - break; - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2D: - glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level), - params.GetMipHeight(level), format, type, buffer); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureSubImage3D( - texture.handle, level, 0, 0, 0, static_cast(params.GetMipWidth(level)), - static_cast(params.GetMipHeight(level)), - static_cast(params.GetMipDepth(level)), format, type, buffer); - break; - case SurfaceTarget::TextureCubemap: - for (std::size_t face = 0; face < params.depth; ++face) { - glTextureSubImage3D(texture.handle, level, 0, 0, static_cast(face), - params.GetMipWidth(level), params.GetMipHeight(level), 1, - format, type, buffer); - buffer += params.GetHostLayerSize(level); - } - break; - default: - UNREACHABLE(); - } - } -} - -void CachedSurface::DecorateSurfaceName() { - LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); -} - -void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) { - LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix); -} - -View CachedSurface::CreateView(const ViewParams& view_key) { - return CreateViewInner(view_key, false); -} - -View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) { - auto view = std::make_shared(*this, view_key, is_proxy); - views[view_key] = view; - if (!is_proxy) - view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); - return view; -} - -CachedSurfaceView::CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, - bool is_proxy_) - : ViewBase{params_}, surface{surface_}, format{surface_.internal_format}, - target{GetTextureTarget(params_.target)}, is_proxy{is_proxy_} { - if (!is_proxy_) { - main_view = CreateTextureView(); - } -} - -CachedSurfaceView::~CachedSurfaceView() = default; - -void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const { - ASSERT(params.num_levels == 1); - - if (params.target == SurfaceTarget::Texture3D) { - if (params.num_layers > 1) { - ASSERT(params.base_layer == 0); - glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level); - } else { - glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle, - params.base_level, params.base_layer); - } - return; - } - - if (params.num_layers > 1) { - UNIMPLEMENTED_IF(params.base_layer != 0); - glFramebufferTexture(fb_target, attachment, GetTexture(), 0); - return; - } - - const GLenum view_target = surface.GetTarget(); - const GLuint texture = surface.GetTexture(); - switch (surface.GetSurfaceParams().target) { - case SurfaceTarget::Texture1D: - glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level); - break; - case SurfaceTarget::Texture2D: - glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level); - break; - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: - glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level, - params.base_layer); - break; - default: - UNIMPLEMENTED(); - } -} - -GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source, - SwizzleSource z_source, SwizzleSource w_source) { - if (GetSurfaceParams().IsBuffer()) { - return GetTexture(); - } - const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); - if (current_swizzle == new_swizzle) { - return current_view; - } - current_swizzle = new_swizzle; - - const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); - OGLTextureView& view = entry->second; - if (!is_cache_miss) { - current_view = view.handle; - return view.handle; - } - view = CreateTextureView(); - current_view = view.handle; - - std::array swizzle{x_source, y_source, z_source, w_source}; - - switch (const PixelFormat pixel_format = GetSurfaceParams().pixel_format) { - case PixelFormat::D24_UNORM_S8_UINT: - case PixelFormat::D32_FLOAT_S8_UINT: - case PixelFormat::S8_UINT_D24_UNORM: - UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); - glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, - GetComponent(pixel_format, x_source == SwizzleSource::R)); - - // Make sure we sample the first component - std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) { - return value == SwizzleSource::G ? SwizzleSource::R : value; - }); - [[fallthrough]]; - default: { - const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]), - GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])}; - glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); - break; - } - } - return view.handle; -} - -OGLTextureView CachedSurfaceView::CreateTextureView() const { - OGLTextureView texture_view; - texture_view.Create(); - - if (target == GL_TEXTURE_3D) { - glTextureView(texture_view.handle, target, surface.texture.handle, format, - params.base_level, params.num_levels, 0, 1); - } else { - glTextureView(texture_view.handle, target, surface.texture.handle, format, - params.base_level, params.num_levels, params.base_layer, params.num_layers); - } - ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); - - return texture_view; -} - -TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, const Device& device_, - StateTracker& state_tracker_) - : TextureCacheBase{rasterizer_, maxwell3d_, gpu_memory_, device_.HasASTC()}, - state_tracker{state_tracker_} { - src_framebuffer.Create(); - dst_framebuffer.Create(); -} - -TextureCacheOpenGL::~TextureCacheOpenGL() = default; - -Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { - return std::make_shared(gpu_addr, params, is_astc_supported); -} - -void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, - const VideoCommon::CopyParams& copy_params) { - const auto& src_params = src_surface->GetSurfaceParams(); - const auto& dst_params = dst_surface->GetSurfaceParams(); - if (src_params.type != dst_params.type) { - // A fallback is needed - return; - } - const auto src_handle = src_surface->GetTexture(); - const auto src_target = src_surface->GetTarget(); - const auto dst_handle = dst_surface->GetTexture(); - const auto dst_target = dst_surface->GetTarget(); - glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x, - copy_params.source_y, copy_params.source_z, dst_handle, dst_target, - copy_params.dest_level, copy_params.dest_x, copy_params.dest_y, - copy_params.dest_z, copy_params.width, copy_params.height, - copy_params.depth); -} - -void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) { - const auto& src_params{src_view->GetSurfaceParams()}; - const auto& dst_params{dst_view->GetSurfaceParams()}; - UNIMPLEMENTED_IF(src_params.depth != 1); - UNIMPLEMENTED_IF(dst_params.depth != 1); - +void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, + const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation) { state_tracker.NotifyScissor0(); - state_tracker.NotifyFramebuffer(); state_tracker.NotifyRasterizeEnable(); state_tracker.NotifyFramebufferSRGB(); - if (dst_params.srgb_conversion) { - glEnable(GL_FRAMEBUFFER_SRGB); - } else { - glDisable(GL_FRAMEBUFFER_SRGB); - } + ASSERT(dst->BufferBits() == src->BufferBits()); + + glEnable(GL_FRAMEBUFFER_SRGB); glDisable(GL_RASTERIZER_DISCARD); glDisablei(GL_SCISSOR_TEST, 0); - glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle); - - GLenum buffers = 0; - if (src_params.type == SurfaceType::ColorTexture) { - src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - buffers = GL_COLOR_BUFFER_BIT; - } else if (src_params.type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - buffers = GL_DEPTH_BUFFER_BIT; - } else if (src_params.type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); - - buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; - } - - const Common::Rectangle& src_rect = copy_config.src_rect; - const Common::Rectangle& dst_rect = copy_config.dst_rect; - const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; - - glBlitFramebuffer(static_cast(src_rect.left), static_cast(src_rect.top), - static_cast(src_rect.right), static_cast(src_rect.bottom), - static_cast(dst_rect.left), static_cast(dst_rect.top), - static_cast(dst_rect.right), static_cast(dst_rect.bottom), - buffers, - is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST); + const GLbitfield buffer_bits = dst->BufferBits(); + const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0; + const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear; + glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region[0].x, src_region[0].y, + src_region[1].x, src_region[1].y, dst_region[0].x, dst_region[0].y, + dst_region[1].x, dst_region[1].y, buffer_bits, + is_linear ? GL_LINEAR : GL_NEAREST); } -void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { - MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); - const auto& src_params = src_surface->GetSurfaceParams(); - const auto& dst_params = dst_surface->GetSurfaceParams(); - UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); - - const auto source_format = GetFormatTuple(src_params.pixel_format); - const auto dest_format = GetFormatTuple(dst_params.pixel_format); - - const std::size_t source_size = src_surface->GetHostSizeInBytes(); - const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); - - const std::size_t buffer_size = std::max(source_size, dest_size); - - GLuint copy_pbo_handle = FetchPBO(buffer_size); - - glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); - - if (src_surface->IsCompressed()) { - glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast(source_size), - nullptr); - } else { - glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type, - static_cast(source_size), nullptr); - } - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); - - const GLsizei width = static_cast(dst_params.width); - const GLsizei height = static_cast(dst_params.height); - const GLsizei depth = static_cast(dst_params.depth); - if (dst_surface->IsCompressed()) { - LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); +void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, + size_t buffer_offset, + std::span swizzles) { + switch (image.info.type) { + case ImageType::e2D: + return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles); + case ImageType::e3D: + return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles); + case ImageType::Linear: + return util_shaders.PitchUpload(image, map, buffer_offset, swizzles); + default: UNREACHABLE(); + break; + } +} + +void TextureCacheRuntime::InsertUploadMemoryBarrier() { + glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); +} + +FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal_format) const { + switch (type) { + case ImageType::e1D: + return format_properties[0].at(internal_format); + case ImageType::e2D: + case ImageType::Linear: + return format_properties[1].at(internal_format); + case ImageType::e3D: + return format_properties[2].at(internal_format); + default: + UNREACHABLE(); + return FormatProperties{}; + } +} + +TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) + : storage_flags{storage_flags_}, map_flags{map_flags_} {} + +TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default; + +ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size, + bool insert_fence) { + const size_t index = RequestBuffer(requested_size); + OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; + return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync); +} + +size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { + if (const std::optional index = FindBuffer(requested_size); index) { + return *index; + } + + OGLBuffer& buffer = buffers.emplace_back(); + buffer.Create(); + glNamedBufferStorage(buffer.handle, requested_size, nullptr, + storage_flags | GL_MAP_PERSISTENT_BIT); + maps.push_back(static_cast(glMapNamedBufferRange(buffer.handle, 0, requested_size, + map_flags | GL_MAP_PERSISTENT_BIT))); + + syncs.emplace_back(); + sizes.push_back(requested_size); + + ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && + maps.size() == sizes.size()); + + return buffers.size() - 1; +} + +std::optional TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) { + size_t smallest_buffer = std::numeric_limits::max(); + std::optional found; + const size_t num_buffers = sizes.size(); + for (size_t index = 0; index < num_buffers; ++index) { + const size_t buffer_size = sizes[index]; + if (buffer_size < requested_size || buffer_size >= smallest_buffer) { + continue; + } + if (syncs[index].handle != 0) { + GLint status; + glGetSynciv(syncs[index].handle, GL_SYNC_STATUS, 1, nullptr, &status); + if (status != GL_SIGNALED) { + continue; + } + syncs[index].Release(); + } + smallest_buffer = buffer_size; + found = index; + } + return found; +} + +Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, + VAddr cpu_addr_) + : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) { + if (CanBeAccelerated(runtime, info)) { + flags |= ImageFlagBits::AcceleratedUpload; + } + if (IsConverted(runtime.device, info.format, info.type)) { + flags |= ImageFlagBits::Converted; + gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; + gl_store_format = GL_RGBA8; + gl_format = GL_RGBA; + gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; } else { - switch (dst_params.target) { - case SurfaceTarget::Texture1D: - glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format, - dest_format.type, nullptr); + const auto& tuple = GetFormatTuple(info.format); + gl_internal_format = tuple.internal_format; + gl_store_format = tuple.store_format; + gl_format = tuple.format; + gl_type = tuple.type; + } + const GLenum target = ImageTarget(info); + const GLsizei width = info.size.width; + const GLsizei height = info.size.height; + const GLsizei depth = info.size.depth; + const int max_host_mip_levels = std::bit_width(info.size.width); + const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); + const GLsizei num_layers = info.resources.layers; + const GLsizei num_samples = info.num_samples; + + GLuint handle = 0; + if (target != GL_TEXTURE_BUFFER) { + texture.Create(target); + handle = texture.handle; + } + switch (target) { + case GL_TEXTURE_1D_ARRAY: + glTextureStorage2D(handle, num_levels, gl_store_format, width, num_layers); + break; + case GL_TEXTURE_2D_ARRAY: + glTextureStorage3D(handle, num_levels, gl_store_format, width, height, num_layers); + break; + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { + // TODO: Where should 'fixedsamplelocations' come from? + const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); + glTextureStorage3DMultisample(handle, num_samples, gl_store_format, width >> samples_x, + height >> samples_y, num_layers, GL_FALSE); + break; + } + case GL_TEXTURE_RECTANGLE: + glTextureStorage2D(handle, num_levels, gl_store_format, width, height); + break; + case GL_TEXTURE_3D: + glTextureStorage3D(handle, num_levels, gl_store_format, width, height, depth); + break; + case GL_TEXTURE_BUFFER: + buffer.Create(); + glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0); + break; + default: + UNREACHABLE_MSG("Invalid target=0x{:x}", target); + break; + } + if (runtime.device.HasDebuggingToolAttached()) { + const std::string name = VideoCommon::Name(*this); + glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle, + static_cast(name.size()), name.data()); + } +} + +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies) { + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle()); + glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); + + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + + u32 current_row_length = std::numeric_limits::max(); + u32 current_image_height = std::numeric_limits::max(); + + for (const VideoCommon::BufferImageCopy& copy : copies) { + if (current_row_length != copy.buffer_row_length) { + current_row_length = copy.buffer_row_length; + glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length); + } + if (current_image_height != copy.buffer_image_height) { + current_image_height = copy.buffer_image_height; + glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); + } + CopyBufferToImage(copy, buffer_offset); + } +} + +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies) { + for (const VideoCommon::BufferCopy& copy : copies) { + glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset, + copy.dst_offset, copy.size); + } +} + +void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, + std::span copies) { + glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API + + glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle()); + glPixelStorei(GL_PACK_ALIGNMENT, 1); + + u32 current_row_length = std::numeric_limits::max(); + u32 current_image_height = std::numeric_limits::max(); + + for (const VideoCommon::BufferImageCopy& copy : copies) { + if (current_row_length != copy.buffer_row_length) { + current_row_length = copy.buffer_row_length; + glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); + } + if (current_image_height != copy.buffer_image_height) { + current_image_height = copy.buffer_image_height; + glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); + } + CopyImageToBuffer(copy, buffer_offset); + } +} + +void Image::CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) { + // Compressed formats don't have a pixel format or type + const bool is_compressed = gl_format == GL_NONE; + const void* const offset = reinterpret_cast(copy.buffer_offset + buffer_offset); + + switch (info.type) { + case ImageType::e1D: + if (is_compressed) { + glCompressedTextureSubImage2D(texture.handle, copy.image_subresource.base_level, + copy.image_offset.x, copy.image_subresource.base_layer, + copy.image_extent.width, + copy.image_subresource.num_layers, gl_internal_format, + static_cast(copy.buffer_size), offset); + } else { + glTextureSubImage2D(texture.handle, copy.image_subresource.base_level, + copy.image_offset.x, copy.image_subresource.base_layer, + copy.image_extent.width, copy.image_subresource.num_layers, + gl_format, gl_type, offset); + } + break; + case ImageType::e2D: + case ImageType::Linear: + if (is_compressed) { + glCompressedTextureSubImage3D( + texture.handle, copy.image_subresource.base_level, copy.image_offset.x, + copy.image_offset.y, copy.image_subresource.base_layer, copy.image_extent.width, + copy.image_extent.height, copy.image_subresource.num_layers, gl_internal_format, + static_cast(copy.buffer_size), offset); + } else { + glTextureSubImage3D(texture.handle, copy.image_subresource.base_level, + copy.image_offset.x, copy.image_offset.y, + copy.image_subresource.base_layer, copy.image_extent.width, + copy.image_extent.height, copy.image_subresource.num_layers, + gl_format, gl_type, offset); + } + break; + case ImageType::e3D: + if (is_compressed) { + glCompressedTextureSubImage3D( + texture.handle, copy.image_subresource.base_level, copy.image_offset.x, + copy.image_offset.y, copy.image_offset.z, copy.image_extent.width, + copy.image_extent.height, copy.image_extent.depth, gl_internal_format, + static_cast(copy.buffer_size), offset); + } else { + glTextureSubImage3D(texture.handle, copy.image_subresource.base_level, + copy.image_offset.x, copy.image_offset.y, copy.image_offset.z, + copy.image_extent.width, copy.image_extent.height, + copy.image_extent.depth, gl_format, gl_type, offset); + } + break; + default: + UNREACHABLE(); + } +} + +void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) { + const GLint x_offset = copy.image_offset.x; + const GLsizei width = copy.image_extent.width; + + const GLint level = copy.image_subresource.base_level; + const GLsizei buffer_size = static_cast(copy.buffer_size); + void* const offset = reinterpret_cast(copy.buffer_offset + buffer_offset); + + GLint y_offset = 0; + GLint z_offset = 0; + GLsizei height = 1; + GLsizei depth = 1; + + switch (info.type) { + case ImageType::e1D: + y_offset = copy.image_subresource.base_layer; + height = copy.image_subresource.num_layers; + break; + case ImageType::e2D: + case ImageType::Linear: + y_offset = copy.image_offset.y; + z_offset = copy.image_subresource.base_layer; + height = copy.image_extent.height; + depth = copy.image_subresource.num_layers; + break; + case ImageType::e3D: + y_offset = copy.image_offset.y; + z_offset = copy.image_offset.z; + height = copy.image_extent.height; + depth = copy.image_extent.depth; + break; + default: + UNREACHABLE(); + } + // Compressed formats don't have a pixel format or type + const bool is_compressed = gl_format == GL_NONE; + if (is_compressed) { + glGetCompressedTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, + height, depth, buffer_size, offset); + } else { + glGetTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, height, + depth, gl_format, gl_type, buffer_size, offset); + } +} + +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, + ImageId image_id_, Image& image) + : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} { + const Device& device = runtime.device; + if (True(image.flags & ImageFlagBits::Converted)) { + internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; + } else { + internal_format = GetFormatTuple(format).internal_format; + } + VideoCommon::SubresourceRange flatten_range = info.range; + std::array handles; + stored_views.reserve(2); + + switch (info.type) { + case ImageViewType::e1DArray: + flatten_range.extent.layers = 1; + [[fallthrough]]; + case ImageViewType::e1D: + glGenTextures(2, handles.data()); + SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); + SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); + break; + case ImageViewType::e2DArray: + flatten_range.extent.layers = 1; + [[fallthrough]]; + case ImageViewType::e2D: + if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) { + // 2D and 2D array views on a 3D textures are used exclusively for render targets + ASSERT(info.range.extent.levels == 1); + const VideoCommon::SubresourceRange slice_range{ + .base = {.level = info.range.base.level, .layer = 0}, + .extent = {.levels = 1, .layers = 1}, + }; + glGenTextures(1, handles.data()); + SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); break; - case SurfaceTarget::Texture2D: - glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height, - dest_format.format, dest_format.type, nullptr); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, - dest_format.format, dest_format.type, nullptr); - break; - case SurfaceTarget::TextureCubemap: - glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, - dest_format.format, dest_format.type, nullptr); - break; - default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", dst_params.target); - UNREACHABLE(); + } + glGenTextures(2, handles.data()); + SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); + SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); + break; + case ImageViewType::e3D: + glGenTextures(1, handles.data()); + SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); + break; + case ImageViewType::CubeArray: + flatten_range.extent.layers = 6; + [[fallthrough]]; + case ImageViewType::Cube: + glGenTextures(2, handles.data()); + SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); + SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); + break; + case ImageViewType::Rect: + glGenTextures(1, handles.data()); + SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); + break; + case ImageViewType::Buffer: + glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); + SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); + break; + } + default_handle = Handle(info.type); +} + +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) + : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} + +void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, + GLuint handle, const VideoCommon::ImageViewInfo& info, + VideoCommon::SubresourceRange view_range) { + if (info.type == ImageViewType::Buffer) { + // TODO: Take offset from buffer cache + glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, + image.guest_size_bytes); + } else { + const GLuint parent = image.texture.handle; + const GLenum target = ImageTarget(view_type, image.info.num_samples); + glTextureView(handle, target, parent, internal_format, view_range.base.level, + view_range.extent.levels, view_range.base.layer, view_range.extent.layers); + if (!info.IsRenderTarget()) { + ApplySwizzle(handle, format, info.Swizzle()); } } - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - - glTextureBarrier(); + if (device.HasDebuggingToolAttached()) { + const std::string name = VideoCommon::Name(*this, view_type); + glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); + } + stored_views.emplace_back().handle = handle; + views[static_cast(view_type)] = handle; } -GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { - ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; }); - const u32 l2 = Common::Log2Ceil64(static_cast(buffer_size)); - OGLBuffer& cp = copy_pbo_cache[l2]; - if (cp.handle == 0) { - const std::size_t ceil_size = 1ULL << l2; - cp.Create(); - cp.MakeStreamCopy(ceil_size); +Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { + const GLenum compare_mode = config.depth_compare_enabled ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE; + const GLenum compare_func = MaxwellToGL::DepthCompareFunc(config.depth_compare_func); + const GLenum mag = MaxwellToGL::TextureFilterMode(config.mag_filter, TextureMipmapFilter::None); + const GLenum min = MaxwellToGL::TextureFilterMode(config.min_filter, config.mipmap_filter); + const GLenum reduction_filter = MaxwellToGL::ReductionFilter(config.reduction_filter); + const GLint seamless = config.cubemap_interface_filtering ? GL_TRUE : GL_FALSE; + + UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1); + UNIMPLEMENTED_IF(config.float_coord_normalization != 0); + + sampler.Create(); + const GLuint handle = sampler.handle; + glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u)); + glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v)); + glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p)); + glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode); + glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func); + glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag); + glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min); + glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias()); + glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod()); + glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod()); + glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); + + if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { + glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy()); + } else { + LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); } - return cp.handle; + if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) { + glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter); + } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) { + LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required"); + } + if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) { + glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless); + } else if (seamless == GL_FALSE) { + // We default to false because it's more common + LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required"); + } +} + +Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span color_buffers, + ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { + // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of + // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared + // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with + // mismatching size, this is why core framebuffers are preferred. + GLuint handle; + glGenFramebuffers(1, &handle); + glBindFramebuffer(GL_READ_FRAMEBUFFER, handle); + + GLsizei num_buffers = 0; + std::array gl_draw_buffers; + gl_draw_buffers.fill(GL_NONE); + + for (size_t index = 0; index < color_buffers.size(); ++index) { + const ImageView* const image_view = color_buffers[index]; + if (!image_view) { + continue; + } + buffer_bits |= GL_COLOR_BUFFER_BIT; + gl_draw_buffers[index] = GL_COLOR_ATTACHMENT0 + key.draw_buffers[index]; + num_buffers = static_cast(index + 1); + + const GLenum attachment = static_cast(GL_COLOR_ATTACHMENT0 + index); + AttachTexture(handle, attachment, image_view); + } + + if (const ImageView* const image_view = depth_buffer; image_view) { + if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) { + buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + } else { + buffer_bits |= GL_DEPTH_BUFFER_BIT; + } + const GLenum attachment = AttachmentType(image_view->format); + AttachTexture(handle, attachment, image_view); + } + + if (num_buffers > 1) { + glNamedFramebufferDrawBuffers(handle, num_buffers, gl_draw_buffers.data()); + } else if (num_buffers > 0) { + glNamedFramebufferDrawBuffer(handle, gl_draw_buffers[0]); + } else { + glNamedFramebufferDrawBuffer(handle, GL_NONE); + } + + glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_WIDTH, key.size.width); + glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_HEIGHT, key.size.height); + // TODO + // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_LAYERS, ...); + // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_SAMPLES, ...); + // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS, ...); + + if (runtime.device.HasDebuggingToolAttached()) { + const std::string name = VideoCommon::Name(key); + glObjectLabel(GL_FRAMEBUFFER, handle, static_cast(name.size()), name.data()); + } + framebuffer.handle = handle; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 72b284fab..04193e31e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -4,157 +4,247 @@ #pragma once -#include -#include #include -#include -#include -#include +#include #include -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/texture_cache.h" namespace OpenGL { -using VideoCommon::SurfaceParams; -using VideoCommon::ViewParams; - -class CachedSurfaceView; -class CachedSurface; -class TextureCacheOpenGL; +class Device; +class ProgramManager; class StateTracker; -using Surface = std::shared_ptr; -using View = std::shared_ptr; -using TextureCacheBase = VideoCommon::TextureCache; +class Framebuffer; +class Image; +class ImageView; +class Sampler; -class CachedSurface final : public VideoCommon::SurfaceBase { - friend CachedSurfaceView; +using VideoCommon::ImageId; +using VideoCommon::ImageViewId; +using VideoCommon::ImageViewType; +using VideoCommon::NUM_RT; +using VideoCommon::Offset2D; +using VideoCommon::RenderTargets; +class ImageBufferMap { public: - explicit CachedSurface(GPUVAddr gpu_addr_, const SurfaceParams& params_, - bool is_astc_supported_); - ~CachedSurface(); + explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync); + ~ImageBufferMap(); - void UploadTexture(const std::vector& staging_buffer) override; - void DownloadTexture(std::vector& staging_buffer) override; - - GLenum GetTarget() const { - return target; + GLuint Handle() const noexcept { + return handle; } - GLuint GetTexture() const { + std::span Span() const noexcept { + return span; + } + +private: + std::span span; + OGLSync* sync; + GLuint handle; +}; + +struct FormatProperties { + GLenum compatibility_class; + bool compatibility_by_size; + bool is_compressed; +}; + +class TextureCacheRuntime { + friend Framebuffer; + friend Image; + friend ImageView; + friend Sampler; + +public: + explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager, + StateTracker& state_tracker); + ~TextureCacheRuntime(); + + void Finish(); + + ImageBufferMap MapUploadBuffer(size_t size); + + ImageBufferMap MapDownloadBuffer(size_t size); + + void CopyImage(Image& dst, Image& src, std::span copies); + + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { + UNIMPLEMENTED(); + } + + bool CanImageBeCopied(const Image& dst, const Image& src); + + void EmulateCopyImage(Image& dst, Image& src, std::span copies); + + void BlitFramebuffer(Framebuffer* dst, Framebuffer* src, + const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation); + + void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles); + + void InsertUploadMemoryBarrier(); + + FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const; + +private: + struct StagingBuffers { + explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); + ~StagingBuffers(); + + ImageBufferMap RequestMap(size_t requested_size, bool insert_fence); + + size_t RequestBuffer(size_t requested_size); + + std::optional FindBuffer(size_t requested_size); + + std::vector syncs; + std::vector buffers; + std::vector maps; + std::vector sizes; + GLenum storage_flags; + GLenum map_flags; + }; + + const Device& device; + StateTracker& state_tracker; + UtilShaders util_shaders; + + std::array, 3> format_properties; + + StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; + StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT}; + + OGLTexture null_image_1d_array; + OGLTexture null_image_cube_array; + OGLTexture null_image_3d; + OGLTexture null_image_rect; + OGLTextureView null_image_view_1d; + OGLTextureView null_image_view_2d; + OGLTextureView null_image_view_2d_array; + OGLTextureView null_image_view_cube; + + std::array null_image_views; +}; + +class Image : public VideoCommon::ImageBase { + friend ImageView; + +public: + explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, + VAddr cpu_addr); + + void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies); + + void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies); + + void DownloadMemory(ImageBufferMap& map, size_t buffer_offset, + std::span copies); + + GLuint Handle() const noexcept { return texture.handle; } - bool IsCompressed() const { - return is_compressed; - } - -protected: - void DecorateSurfaceName() override; - - View CreateView(const ViewParams& view_key) override; - View CreateViewInner(const ViewParams& view_key, bool is_proxy); - private: - void UploadTextureMipmap(u32 level, const std::vector& staging_buffer); + void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); - GLenum internal_format{}; - GLenum format{}; - GLenum type{}; - bool is_compressed{}; - GLenum target{}; - u32 view_count{}; + void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); OGLTexture texture; - OGLBuffer texture_buffer; + OGLTextureView store_view; + OGLBuffer buffer; + GLenum gl_internal_format = GL_NONE; + GLenum gl_store_format = GL_NONE; + GLenum gl_format = GL_NONE; + GLenum gl_type = GL_NONE; }; -class CachedSurfaceView final : public VideoCommon::ViewBase { +class ImageView : public VideoCommon::ImageViewBase { + friend Image; + public: - explicit CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, bool is_proxy_); - ~CachedSurfaceView(); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); - /// @brief Attaches this texture view to the currently bound fb_target framebuffer - /// @param attachment Attachment to bind textures to - /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER) - void Attach(GLenum attachment, GLenum fb_target) const; - - GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); - - void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix); - - void MarkAsModified(u64 tick) { - surface.MarkAsModified(true, tick); + [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { + return views[static_cast(query_type)]; } - GLuint GetTexture() const { - if (is_proxy) { - return surface.GetTexture(); - } - return main_view.handle; + [[nodiscard]] GLuint DefaultHandle() const noexcept { + return default_handle; } - GLenum GetFormat() const { - return format; - } - - const SurfaceParams& GetSurfaceParams() const { - return surface.GetSurfaceParams(); + [[nodiscard]] GLenum Format() const noexcept { + return internal_format; } private: - OGLTextureView CreateTextureView() const; + void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, + const VideoCommon::ImageViewInfo& info, + VideoCommon::SubresourceRange view_range); - CachedSurface& surface; - const GLenum format; - const GLenum target; - const bool is_proxy; - - std::unordered_map view_cache; - OGLTextureView main_view; - - // Use an invalid default so it always fails the comparison test - u32 current_swizzle = 0xffffffff; - GLuint current_view = 0; + std::array views{}; + std::vector stored_views; + GLuint default_handle = 0; + GLenum internal_format = GL_NONE; }; -class TextureCacheOpenGL final : public TextureCacheBase { +class ImageAlloc : public VideoCommon::ImageAllocBase {}; + +class Sampler { public: - explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, const Device& device_, - StateTracker& state_tracker); - ~TextureCacheOpenGL(); + explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); -protected: - Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; - - void ImageCopy(Surface& src_surface, Surface& dst_surface, - const VideoCommon::CopyParams& copy_params) override; - - void ImageBlit(View& src_view, View& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) override; - - void BufferCopy(Surface& src_surface, Surface& dst_surface) override; + GLuint Handle() const noexcept { + return sampler.handle; + } private: - GLuint FetchPBO(std::size_t buffer_size); - - StateTracker& state_tracker; - - OGLFramebuffer src_framebuffer; - OGLFramebuffer dst_framebuffer; - std::unordered_map copy_pbo_cache; + OGLSampler sampler; }; +class Framebuffer { +public: + explicit Framebuffer(TextureCacheRuntime&, std::span color_buffers, + ImageView* depth_buffer, const VideoCommon::RenderTargets& key); + + [[nodiscard]] GLuint Handle() const noexcept { + return framebuffer.handle; + } + + [[nodiscard]] GLbitfield BufferBits() const noexcept { + return buffer_bits; + } + +private: + OGLFramebuffer framebuffer; + GLbitfield buffer_bits = GL_NONE; +}; + +struct TextureCacheParams { + static constexpr bool ENABLE_VALIDATION = true; + static constexpr bool FRAMEBUFFER_BLITS = true; + static constexpr bool HAS_EMULATED_COPIES = true; + + using Runtime = OpenGL::TextureCacheRuntime; + using Image = OpenGL::Image; + using ImageAlloc = OpenGL::ImageAlloc; + using ImageView = OpenGL::ImageView; + using Sampler = OpenGL::Sampler; + using Framebuffer = OpenGL::Framebuffer; +}; + +using TextureCache = VideoCommon::TextureCache; + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index dd4ee3361..cbccfdeb4 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -475,6 +475,19 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) { return GL_FILL; } +inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) { + switch (filter) { + case Tegra::Texture::SamplerReduction::WeightedAverage: + return GL_WEIGHTED_AVERAGE_ARB; + case Tegra::Texture::SamplerReduction::Min: + return GL_MIN; + case Tegra::Texture::SamplerReduction::Max: + return GL_MAX; + } + UNREACHABLE_MSG("Invalid reduction filter={}", static_cast(filter)); + return GL_WEIGHTED_AVERAGE_ARB; +} + inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) { // Enumeration order matches register order. We can convert it arithmetically. return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast(swizzle); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index cbfaaa99c..dd77a543c 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -23,10 +23,10 @@ #include "core/telemetry_session.h" #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" -#include "video_core/morton.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/textures/decoders.h" namespace OpenGL { @@ -140,11 +140,10 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (!framebuffer) { return; } - PrepareRendertarget(framebuffer); RenderScreenshot(); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + state_tracker.BindFramebuffer(0); DrawScreen(emu_window.GetFramebufferLayout()); ++m_current_frame; @@ -187,19 +186,20 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf // Reset the screen info's display texture to its own permanent texture screen_info.display_texture = screen_info.texture.resource.handle; - const auto pixel_format{ - VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; - const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; - const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; - u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; - rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes); - // TODO(Rodrigo): Read this from HLE constexpr u32 block_height_log2 = 4; - VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, - framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, - gl_framebuffer_data.data(), host_ptr); + const auto pixel_format{ + VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; + const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; + const u64 size_in_bytes{Tegra::Texture::CalculateSize( + true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; + const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; + const std::span input_data(host_ptr, size_in_bytes); + Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, + framebuffer.width, framebuffer.height, 1, block_height_log2, + 0); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(framebuffer.stride)); // Update existing texture @@ -238,6 +238,10 @@ void RendererOpenGL::InitOpenGLObjects() { glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); + // Generate presentation sampler + present_sampler.Create(); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + // Generate VBO handle for drawing vertex_buffer.Create(); @@ -255,6 +259,11 @@ void RendererOpenGL::InitOpenGLObjects() { // Clear screen to black LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); + // Enable seamless cubemaps when per texture parameters are not available + if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { + glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); + } + // Enable unified vertex attributes and query vertex buffer address when the driver supports it if (device.HasVertexBufferUnifiedMemory()) { glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); @@ -296,7 +305,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, const auto pixel_format{ VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; - const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; + const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel); GLint internal_format; @@ -315,8 +324,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, internal_format = GL_RGBA8; texture.gl_format = GL_RGBA; texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; - UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", - static_cast(framebuffer.pixel_format)); + // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", + // static_cast(framebuffer.pixel_format)); } texture.resource.Release(); @@ -382,7 +391,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { state_tracker.NotifyPolygonModes(); state_tracker.NotifyViewport0(); state_tracker.NotifyScissor0(); - state_tracker.NotifyColorMask0(); + state_tracker.NotifyColorMask(0); state_tracker.NotifyBlend0(); state_tracker.NotifyFramebuffer(); state_tracker.NotifyFrontFace(); @@ -440,7 +449,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { } glBindTextureUnit(0, screen_info.display_texture); - glBindSampler(0, 0); + glBindSampler(0, present_sampler.handle); glClear(GL_COLOR_BUFFER_BIT); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); @@ -473,6 +482,8 @@ void RendererOpenGL::RenderScreenshot() { DrawScreen(layout); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + glPixelStorei(GL_PACK_ROW_LENGTH, 0); glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, renderer_settings.screenshot_bits); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 376f88766..44e109794 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -102,6 +102,7 @@ private: StateTracker state_tracker{gpu}; // OpenGL object IDs + OGLSampler present_sampler; OGLBuffer vertex_buffer; OGLProgram vertex_program; OGLProgram fragment_program; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp new file mode 100644 index 000000000..eb849cbf2 --- /dev/null +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -0,0 +1,224 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" +#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" +#include "video_core/host_shaders/opengl_copy_bc4_comp.h" +#include "video_core/host_shaders/pitch_unswizzle_comp.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/renderer_opengl/util_shaders.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/accelerated_swizzle.h" +#include "video_core/texture_cache/types.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/decoders.h" + +namespace OpenGL { + +using namespace HostShaders; + +using VideoCommon::Extent3D; +using VideoCommon::ImageCopy; +using VideoCommon::ImageType; +using VideoCommon::SwizzleParameters; +using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams; +using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams; +using VideoCore::Surface::BytesPerBlock; + +namespace { + +OGLProgram MakeProgram(std::string_view source) { + OGLShader shader; + shader.Create(source, GL_COMPUTE_SHADER); + + OGLProgram program; + program.Create(true, false, shader.handle); + return program; +} + +} // Anonymous namespace + +UtilShaders::UtilShaders(ProgramManager& program_manager_) + : program_manager{program_manager_}, + block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), + block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), + pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), + copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { + const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); + swizzle_table_buffer.Create(); + glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); +} + +UtilShaders::~UtilShaders() = default; + +void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles) { + static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; + static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; + static constexpr GLuint BINDING_INPUT_BUFFER = 1; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; + + program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); + glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); + + const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); + for (const SwizzleParameters& swizzle : swizzles) { + const Extent3D num_tiles = swizzle.num_tiles; + const size_t input_offset = swizzle.buffer_offset + buffer_offset; + + const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); + const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); + + const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); + glUniform3uiv(0, 1, params.origin.data()); + glUniform3iv(1, 1, params.destination.data()); + glUniform1ui(2, params.bytes_per_block_log2); + glUniform1ui(3, params.layer_stride); + glUniform1ui(4, params.block_size); + glUniform1ui(5, params.x_shift); + glUniform1ui(6, params.block_height); + glUniform1ui(7, params.block_height_mask); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), + input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, + GL_WRITE_ONLY, store_format); + glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); + } + program_manager.RestoreGuestCompute(); +} + +void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles) { + static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; + + static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; + static constexpr GLuint BINDING_INPUT_BUFFER = 1; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; + + glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); + program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); + + const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); + for (const SwizzleParameters& swizzle : swizzles) { + const Extent3D num_tiles = swizzle.num_tiles; + const size_t input_offset = swizzle.buffer_offset + buffer_offset; + + const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); + const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); + const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth); + + const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info); + glUniform3uiv(0, 1, params.origin.data()); + glUniform3iv(1, 1, params.destination.data()); + glUniform1ui(2, params.bytes_per_block_log2); + glUniform1ui(3, params.slice_size); + glUniform1ui(4, params.block_size); + glUniform1ui(5, params.x_shift); + glUniform1ui(6, params.block_height); + glUniform1ui(7, params.block_height_mask); + glUniform1ui(8, params.block_depth); + glUniform1ui(9, params.block_depth_mask); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), + input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, + GL_WRITE_ONLY, store_format); + glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); + } + program_manager.RestoreGuestCompute(); +} + +void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles) { + static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; + static constexpr GLuint BINDING_INPUT_BUFFER = 0; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; + static constexpr GLuint LOC_ORIGIN = 0; + static constexpr GLuint LOC_DESTINATION = 1; + static constexpr GLuint LOC_BYTES_PER_BLOCK = 2; + static constexpr GLuint LOC_PITCH = 3; + + const u32 bytes_per_block = BytesPerBlock(image.info.format); + const GLenum format = StoreFormat(bytes_per_block); + const u32 pitch = image.info.pitch; + + UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), + "Non-power of two images are not implemented"); + + program_manager.BindHostCompute(pitch_unswizzle_program.handle); + glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); + glUniform2ui(LOC_ORIGIN, 0, 0); + glUniform2i(LOC_DESTINATION, 0, 0); + glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); + glUniform1ui(LOC_PITCH, pitch); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); + for (const SwizzleParameters& swizzle : swizzles) { + const Extent3D num_tiles = swizzle.num_tiles; + const size_t input_offset = swizzle.buffer_offset + buffer_offset; + + const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); + const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); + + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), + input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); + } + program_manager.RestoreGuestCompute(); +} + +void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span copies) { + static constexpr GLuint BINDING_INPUT_IMAGE = 0; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 1; + static constexpr GLuint LOC_SRC_OFFSET = 0; + static constexpr GLuint LOC_DST_OFFSET = 1; + + program_manager.BindHostCompute(copy_bc4_program.handle); + + for (const ImageCopy& copy : copies) { + ASSERT(copy.src_subresource.base_layer == 0); + ASSERT(copy.src_subresource.num_layers == 1); + ASSERT(copy.dst_subresource.base_layer == 0); + ASSERT(copy.dst_subresource.num_layers == 1); + + glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); + glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); + glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level, + GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); + glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(), + copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); + glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); + } + program_manager.RestoreGuestCompute(); +} + +GLenum StoreFormat(u32 bytes_per_block) { + switch (bytes_per_block) { + case 1: + return GL_R8UI; + case 2: + return GL_R16UI; + case 4: + return GL_R32UI; + case 8: + return GL_RG32UI; + case 16: + return GL_RGBA32UI; + } + UNREACHABLE(); + return GL_R8UI; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h new file mode 100644 index 000000000..359997255 --- /dev/null +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -0,0 +1,51 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/texture_cache/types.h" + +namespace OpenGL { + +class Image; +class ImageBufferMap; +class ProgramManager; + +class UtilShaders { +public: + explicit UtilShaders(ProgramManager& program_manager); + ~UtilShaders(); + + void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles); + + void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles); + + void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span swizzles); + + void CopyBC4(Image& dst_image, Image& src_image, + std::span copies); + +private: + ProgramManager& program_manager; + + OGLBuffer swizzle_table_buffer; + + OGLProgram block_linear_unswizzle_2d_program; + OGLProgram block_linear_unswizzle_3d_program; + OGLProgram pitch_unswizzle_program; + OGLProgram copy_bc4_program; +}; + +GLenum StoreFormat(u32 bytes_per_block); + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp deleted file mode 100644 index 6d7bb16b2..000000000 --- a/src/video_core/renderer_opengl/utils.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include -#include - -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/renderer_opengl/utils.h" - -namespace OpenGL { - -void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { - if (!GLAD_GL_KHR_debug) { - // We don't need to throw an error as this is just for debugging - return; - } - - std::string object_label; - if (extra_info.empty()) { - switch (identifier) { - case GL_TEXTURE: - object_label = fmt::format("Texture@0x{:016X}", addr); - break; - case GL_PROGRAM: - object_label = fmt::format("Shader@0x{:016X}", addr); - break; - default: - object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr); - break; - } - } else { - object_label = fmt::format("{}@0x{:016X}", extra_info, addr); - } - glObjectLabel(identifier, handle, -1, static_cast(object_label.c_str())); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h deleted file mode 100644 index 9c09ee12c..000000000 --- a/src/video_core/renderer_opengl/utils.h +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include "common/common_types.h" - -namespace OpenGL { - -void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); - -} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp new file mode 100644 index 000000000..87c8e5693 --- /dev/null +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -0,0 +1,624 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" +#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" +#include "video_core/host_shaders/full_screen_triangle_vert_spv.h" +#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h" +#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" +#include "video_core/renderer_vulkan/blit_image.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/vk_state_tracker.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/surface.h" + +namespace Vulkan { + +using VideoCommon::ImageViewType; + +namespace { +struct PushConstants { + std::array tex_scale; + std::array tex_offset; +}; + +template +inline constexpr VkDescriptorSetLayoutBinding TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING{ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = nullptr, +}; +constexpr std::array TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS{ + TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, + TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<1>, +}; +constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = 1, + .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, +}; +constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()), + .pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(), +}; +constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{ + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .offset = 0, + .size = sizeof(PushConstants), +}; +constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .vertexBindingDescriptionCount = 0, + .pVertexBindingDescriptions = nullptr, + .vertexAttributeDescriptionCount = 0, + .pVertexAttributeDescriptions = nullptr, +}; +constexpr VkPipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + .primitiveRestartEnable = VK_FALSE, +}; +constexpr VkPipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewportCount = 1, + .pViewports = nullptr, + .scissorCount = 1, + .pScissors = nullptr, +}; +constexpr VkPipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_BACK_BIT, + .frontFace = VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, +}; +constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, +}; +constexpr std::array DYNAMIC_STATES{ + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, +}; +constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .dynamicStateCount = static_cast(DYNAMIC_STATES.size()), + .pDynamicStates = DYNAMIC_STATES.data(), +}; +constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_CLEAR, + .attachmentCount = 0, + .pAttachments = nullptr, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, +}; +constexpr VkPipelineColorBlendAttachmentState PIPELINE_COLOR_BLEND_ATTACHMENT_STATE{ + .blendEnable = VK_FALSE, + .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, +}; +constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_CLEAR, + .attachmentCount = 1, + .pAttachments = &PIPELINE_COLOR_BLEND_ATTACHMENT_STATE, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, +}; +constexpr VkPipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthTestEnable = VK_TRUE, + .depthWriteEnable = VK_TRUE, + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = VK_FALSE, + .stencilTestEnable = VK_FALSE, + .front = VkStencilOpState{}, + .back = VkStencilOpState{}, + .minDepthBounds = 0.0f, + .maxDepthBounds = 0.0f, +}; + +template +inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .magFilter = filter, + .minFilter = filter, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .mipLodBias = 0.0f, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 0.0f, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .minLod = 0.0f, + .maxLod = 0.0f, + .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE, + .unnormalizedCoordinates = VK_TRUE, +}; + +constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo( + const VkDescriptorSetLayout* set_layout) { + return VkPipelineLayoutCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = set_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &PUSH_CONSTANT_RANGE, + }; +} + +constexpr VkPipelineShaderStageCreateInfo PipelineShaderStageCreateInfo(VkShaderStageFlagBits stage, + VkShaderModule shader) { + return VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = stage, + .module = shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }; +} + +constexpr std::array MakeStages( + VkShaderModule vertex_shader, VkShaderModule fragment_shader) { + return std::array{ + PipelineShaderStageCreateInfo(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader), + PipelineShaderStageCreateInfo(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader), + }; +} + +void UpdateOneTextureDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set, + VkSampler sampler, VkImageView image_view) { + const VkDescriptorImageInfo image_info{ + .sampler = sampler, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + const VkWriteDescriptorSet write_descriptor_set{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + device.GetLogical().UpdateDescriptorSets(write_descriptor_set, nullptr); +} + +void UpdateTwoTexturesDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set, + VkSampler sampler, VkImageView image_view_0, + VkImageView image_view_1) { + const VkDescriptorImageInfo image_info_0{ + .sampler = sampler, + .imageView = image_view_0, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + const VkDescriptorImageInfo image_info_1{ + .sampler = sampler, + .imageView = image_view_1, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + const std::array write_descriptor_sets{ + VkWriteDescriptorSet{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info_0, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }, + VkWriteDescriptorSet{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_set, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info_1, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }, + }; + device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr); +} + +void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, + const std::array& dst_region, + const std::array& src_region) { + const VkOffset2D offset{ + .x = std::min(dst_region[0].x, dst_region[1].x), + .y = std::min(dst_region[0].y, dst_region[1].y), + }; + const VkExtent2D extent{ + .width = static_cast(std::abs(dst_region[1].x - dst_region[0].x)), + .height = static_cast(std::abs(dst_region[1].y - dst_region[0].y)), + }; + const VkViewport viewport{ + .x = static_cast(offset.x), + .y = static_cast(offset.y), + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + // TODO: Support scissored blits + const VkRect2D scissor{ + .offset = offset, + .extent = extent, + }; + const float scale_x = static_cast(src_region[1].x - src_region[0].x); + const float scale_y = static_cast(src_region[1].y - src_region[0].y); + const PushConstants push_constants{ + .tex_scale = {scale_x, scale_y}, + .tex_offset = {static_cast(src_region[0].x), static_cast(src_region[0].y)}, + }; + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); +} + +} // Anonymous namespace + +BlitImageHelper::BlitImageHelper(const VKDevice& device_, VKScheduler& scheduler_, + StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) + : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, + one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( + ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), + two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( + TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), + one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout), + two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout), + one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( + PipelineLayoutCreateInfo(one_texture_set_layout.address()))), + two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( + PipelineLayoutCreateInfo(two_textures_set_layout.address()))), + full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)), + blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), + convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), + convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), + linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)), + nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)) { + if (device.IsExtShaderStencilExportSupported()) { + blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV); + } +} + +BlitImageHelper::~BlitImageHelper() = default; + +void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation) { + const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear; + const BlitImagePipelineKey key{ + .renderpass = dst_framebuffer->RenderPass(), + .operation = operation, + }; + const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); + const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; + const VkPipeline pipeline = FindOrEmplacePipeline(key); + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set, + &device = device](vk::CommandBuffer cmdbuf) { + // TODO: Barriers + UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + BindBlitState(cmdbuf, layout, dst_region, src_region); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + +void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, + VkImageView src_depth_view, VkImageView src_stencil_view, + const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation) { + ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); + ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy); + + const VkPipelineLayout layout = *two_textures_pipeline_layout; + const VkSampler sampler = *nearest_sampler; + const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); + const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, + src_stencil_view, descriptor_set, + &device = device](vk::CommandBuffer cmdbuf) { + // TODO: Barriers + UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, + src_stencil_view); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + BindBlitState(cmdbuf, layout, dst_region, src_region); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + +void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass()); + Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + + ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); + Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass()); + Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass()); + Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); + const VkSampler sampler = *nearest_sampler; + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); + const VkExtent2D extent{ + .width = src_image_view.size.width, + .height = src_image_view.size.height, + }; + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent, + &device = device](vk::CommandBuffer cmdbuf) { + const VkOffset2D offset{ + .x = 0, + .y = 0, + }; + const VkViewport viewport{ + .x = 0.0f, + .y = 0.0f, + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .minDepth = 0.0f, + .maxDepth = 0.0f, + }; + const VkRect2D scissor{ + .offset = offset, + .extent = extent, + }; + const PushConstants push_constants{ + .tex_scale = {viewport.width, viewport.height}, + .tex_offset = {0.0f, 0.0f}, + }; + UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); + + // TODO: Barriers + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + +VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) { + const auto it = std::ranges::find(blit_color_keys, key); + if (it != blit_color_keys.end()) { + return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)]; + } + blit_color_keys.push_back(key); + + const std::array stages = MakeStages(*full_screen_vert, *blit_color_to_color_frag); + const VkPipelineColorBlendAttachmentState blend_attachment{ + .blendEnable = VK_FALSE, + .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + // TODO: programmable blending + const VkPipelineColorBlendStateCreateInfo color_blend_create_info{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_CLEAR, + .attachmentCount = 1, + .pAttachments = &blend_attachment, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, + }; + blit_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = nullptr, + .pColorBlendState = &color_blend_create_info, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *one_texture_pipeline_layout, + .renderPass = key.renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + })); + return *blit_color_pipelines.back(); +} + +VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) { + if (blit_depth_stencil_pipeline) { + return *blit_depth_stencil_pipeline; + } + const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag); + blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *two_textures_pipeline_layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); + return *blit_depth_stencil_pipeline; +} + +void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { + if (pipeline) { + return; + } + const std::array stages = MakeStages(*full_screen_vert, *convert_depth_to_float_frag); + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = nullptr, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *one_texture_pipeline_layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); +} + +void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { + if (pipeline) { + return; + } + const std::array stages = MakeStages(*full_screen_vert, *convert_float_to_depth_frag); + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *one_texture_pipeline_layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h new file mode 100644 index 000000000..2c2790bf9 --- /dev/null +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -0,0 +1,97 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "video_core/engines/fermi_2d.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/texture_cache/types.h" + +namespace Vulkan { + +using VideoCommon::Offset2D; + +class VKDevice; +class VKScheduler; +class StateTracker; + +class Framebuffer; +class ImageView; + +struct BlitImagePipelineKey { + constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default; + + VkRenderPass renderpass; + Tegra::Engines::Fermi2D::Operation operation; +}; + +class BlitImageHelper { +public: + explicit BlitImageHelper(const VKDevice& device, VKScheduler& scheduler, + StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); + ~BlitImageHelper(); + + void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation); + + void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, + VkImageView src_stencil_view, const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation); + + void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + + void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + + void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + + void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + +private: + void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, + const ImageView& src_image_view); + + [[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key); + + [[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass); + + void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); + + void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); + + const VKDevice& device; + VKScheduler& scheduler; + StateTracker& state_tracker; + + vk::DescriptorSetLayout one_texture_set_layout; + vk::DescriptorSetLayout two_textures_set_layout; + DescriptorAllocator one_texture_descriptor_allocator; + DescriptorAllocator two_textures_descriptor_allocator; + vk::PipelineLayout one_texture_pipeline_layout; + vk::PipelineLayout two_textures_pipeline_layout; + vk::ShaderModule full_screen_vert; + vk::ShaderModule blit_color_to_color_frag; + vk::ShaderModule blit_depth_stencil_frag; + vk::ShaderModule convert_depth_to_float_frag; + vk::ShaderModule convert_float_to_depth_frag; + vk::Sampler linear_sampler; + vk::Sampler nearest_sampler; + + std::vector blit_color_keys; + std::vector blit_color_pipelines; + vk::Pipeline blit_depth_stencil_pipeline; + vk::Pipeline convert_d32_to_r32_pipeline; + vk::Pipeline convert_r32_to_d32_pipeline; + vk::Pipeline convert_d16_to_r16_pipeline; + vk::Pipeline convert_r16_to_d16_pipeline; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 5ec43db11..67dd10500 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -60,6 +60,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta logic_op.Assign(PackLogicOp(regs.logic_op.operation)); rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); topology.Assign(regs.draw.topology); + msaa_mode.Assign(regs.multisample_mode); raw2 = 0; const auto test_func = diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index c26b77790..7e95e6fce 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -186,6 +186,7 @@ struct FixedPipelineState { BitField<19, 4, u32> logic_op; BitField<23, 1, u32> rasterize_enable; BitField<24, 4, Maxwell::PrimitiveTopology> topology; + BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; }; union { u32 raw2; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 58e117eb3..40501e7fa 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -122,7 +122,7 @@ struct FormatTuple { {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM - {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM + {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5_UNORM {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT @@ -163,7 +163,7 @@ struct FormatTuple { {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT {VK_FORMAT_UNDEFINED}, // R16G16_UINT - {VK_FORMAT_UNDEFINED}, // R16G16_SINT + {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB @@ -233,18 +233,20 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { - tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) - ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 - : VK_FORMAT_A8B8G8R8_UNORM_PACK32; + const bool is_srgb = VideoCore::Surface::IsPixelFormatSRGB(pixel_format); + tuple.format = is_srgb ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 : VK_FORMAT_A8B8G8R8_UNORM_PACK32; } const bool attachable = tuple.usage & Attachable; const bool storage = tuple.usage & Storage; VkFormatFeatureFlags usage; - if (format_type == FormatType::Buffer) { + switch (format_type) { + case FormatType::Buffer: usage = VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; - } else { + break; + case FormatType::Linear: + case FormatType::Optimal: usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT; if (attachable) { @@ -254,6 +256,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo if (storage) { usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; } + break; } return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; } @@ -724,4 +727,17 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) return {}; } +VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction) { + switch (reduction) { + case Tegra::Texture::SamplerReduction::WeightedAverage: + return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; + case Tegra::Texture::SamplerReduction::Min: + return VK_SAMPLER_REDUCTION_MODE_MIN_EXT; + case Tegra::Texture::SamplerReduction::Max: + return VK_SAMPLER_REDUCTION_MODE_MAX_EXT; + } + UNREACHABLE_MSG("Invalid sampler mode={}", static_cast(reduction)); + return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; +} + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 7e213452f..1a90f192e 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -61,4 +61,6 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); +VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index ea4b7c1e6..7f521cb9b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -92,9 +92,9 @@ Common::DynamicLibrary OpenVulkanLibrary() { return library; } -std::pair CreateInstance( - Common::DynamicLibrary& library, vk::InstanceDispatch& dld, - WindowSystemType window_type = WindowSystemType::Headless, bool enable_layers = false) { +std::pair CreateInstance(Common::DynamicLibrary& library, + vk::InstanceDispatch& dld, WindowSystemType window_type, + bool enable_debug_utils, bool enable_layers) { if (!library.IsOpen()) { LOG_ERROR(Render_Vulkan, "Vulkan library not available"); return {}; @@ -133,7 +133,7 @@ std::pair CreateInstance( if (window_type != Core::Frontend::WindowSystemType::Headless) { extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); } - if (enable_layers) { + if (enable_debug_utils) { extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); } extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); @@ -287,7 +287,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { bool RendererVulkan::Init() { library = OpenVulkanLibrary(); std::tie(instance, instance_version) = CreateInstance( - library, dld, render_window.GetWindowInfo().type, Settings::values.renderer_debug); + library, dld, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug); if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { return false; } @@ -447,7 +447,8 @@ void RendererVulkan::Report() const { std::vector RendererVulkan::EnumerateDevices() { vk::InstanceDispatch dld; Common::DynamicLibrary library = OpenVulkanLibrary(); - vk::Instance instance = CreateInstance(library, dld).first; + vk::Instance instance = + CreateInstance(library, dld, WindowSystemType::Headless, false, false).first; if (!instance) { return {}; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 977b86003..74642fba4 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -33,10 +33,9 @@ class VKDevice; class VKMemoryManager; class VKSwapchain; class VKScheduler; -class VKImage; struct VKScreenInfo { - VKImage* image{}; + VkImageView image_view{}; u32 width{}; u32 height{}; bool is_srgb{}; diff --git a/src/video_core/renderer_vulkan/shaders/blit.frag b/src/video_core/renderer_vulkan/shaders/blit.frag deleted file mode 100644 index a06ecd24a..000000000 --- a/src/video_core/renderer_vulkan/shaders/blit.frag +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -/* - * Build instructions: - * $ glslangValidator -V $THIS_FILE -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ - -#version 460 core - -layout (location = 0) in vec2 frag_tex_coord; - -layout (location = 0) out vec4 color; - -layout (binding = 1) uniform sampler2D color_texture; - -void main() { - color = texture(color_texture, frag_tex_coord); -} diff --git a/src/video_core/renderer_vulkan/shaders/blit.vert b/src/video_core/renderer_vulkan/shaders/blit.vert deleted file mode 100644 index c64d9235a..000000000 --- a/src/video_core/renderer_vulkan/shaders/blit.vert +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -/* - * Build instructions: - * $ glslangValidator -V $THIS_FILE -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ - -#version 460 core - -layout (location = 0) in vec2 vert_position; -layout (location = 1) in vec2 vert_tex_coord; - -layout (location = 0) out vec2 frag_tex_coord; - -layout (set = 0, binding = 0) uniform MatrixBlock { - mat4 modelview_matrix; -}; - -void main() { - gl_Position = modelview_matrix * vec4(vert_position, 0.0, 1.0); - frag_tex_coord = vert_tex_coord; -} diff --git a/src/video_core/renderer_vulkan/shaders/quad_array.comp b/src/video_core/renderer_vulkan/shaders/quad_array.comp deleted file mode 100644 index 5a5703308..000000000 --- a/src/video_core/renderer_vulkan/shaders/quad_array.comp +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -/* - * Build instructions: - * $ glslangValidator -V $THIS_FILE -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ - -#version 460 core - -layout (local_size_x = 1024) in; - -layout (std430, set = 0, binding = 0) buffer OutputBuffer { - uint output_indexes[]; -}; - -layout (push_constant) uniform PushConstants { - uint first; -}; - -void main() { - uint primitive = gl_GlobalInvocationID.x; - if (primitive * 6 >= output_indexes.length()) { - return; - } - - const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3); - for (uint vertex = 0; vertex < 6; ++vertex) { - uint index = first + primitive * 4 + quad_map[vertex]; - output_indexes[primitive * 6 + vertex] = index; - } -} diff --git a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp b/src/video_core/renderer_vulkan/shaders/quad_indexed.comp deleted file mode 100644 index 5a472ba9b..000000000 --- a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -/* - * Build instructions: - * $ glslangValidator -V quad_indexed.comp -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ - -#version 460 core - -layout (local_size_x = 1024) in; - -layout (std430, set = 0, binding = 0) readonly buffer InputBuffer { - uint input_indexes[]; -}; - -layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { - uint output_indexes[]; -}; - -layout (push_constant) uniform PushConstants { - uint base_vertex; - int index_shift; // 0: uint8, 1: uint16, 2: uint32 -}; - -void main() { - int primitive = int(gl_GlobalInvocationID.x); - if (primitive * 6 >= output_indexes.length()) { - return; - } - - int index_size = 8 << index_shift; - int flipped_shift = 2 - index_shift; - int mask = (1 << flipped_shift) - 1; - - const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3); - for (uint vertex = 0; vertex < 6; ++vertex) { - int offset = primitive * 4 + quad_swizzle[vertex]; - int int_offset = offset >> flipped_shift; - int bit_offset = (offset & mask) * index_size; - uint packed_input = input_indexes[int_offset]; - uint index = bitfieldExtract(packed_input, bit_offset, index_size); - output_indexes[primitive * 6 + vertex] = index + base_vertex; - } -} diff --git a/src/video_core/renderer_vulkan/shaders/uint8.comp b/src/video_core/renderer_vulkan/shaders/uint8.comp deleted file mode 100644 index a320f3ae0..000000000 --- a/src/video_core/renderer_vulkan/shaders/uint8.comp +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -/* - * Build instructions: - * $ glslangValidator -V $THIS_FILE -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ - -#version 460 core -#extension GL_EXT_shader_16bit_storage : require -#extension GL_EXT_shader_8bit_storage : require - -layout (local_size_x = 1024) in; - -layout (std430, set = 0, binding = 0) readonly buffer InputBuffer { - uint8_t input_indexes[]; -}; - -layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { - uint16_t output_indexes[]; -}; - -void main() { - uint id = gl_GlobalInvocationID.x; - if (id < input_indexes.length()) { - output_indexes[id] = uint16_t(input_indexes[id]); - } -} diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index b5b60309e..d3a83f22f 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -16,12 +16,12 @@ #include "core/frontend/emu_window.h" #include "core/memory.h" #include "video_core/gpu.h" -#include "video_core/morton.h" +#include "video_core/host_shaders/vulkan_present_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_vert_spv.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_image.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -29,108 +29,12 @@ #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" +#include "video_core/textures/decoders.h" namespace Vulkan { namespace { -// Generated from the "shaders/" directory, read the instructions there. -constexpr u8 blit_vertex_code[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, - 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, - 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, - 0x1a, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x3e, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, - 0x38, 0x00, 0x01, 0x00}; - -constexpr u8 blit_fragment_code[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, - 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0a, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x03, 0x00, - 0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, - 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; - struct ScreenRectVertex { ScreenRectVertex() = default; explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} @@ -173,9 +77,9 @@ constexpr std::array MakeOrthographicMatrix(f32 width, f32 height) { // clang-format on } -std::size_t GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { +u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { using namespace VideoCore::Surface; - return GetBytesPerPixel(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); + return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); } std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { @@ -239,34 +143,30 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool scheduler.Wait(resource_ticks[image_index]); resource_ticks[image_index] = scheduler.CurrentTick(); - VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get(); - - UpdateDescriptorSet(image_index, blit_image->GetPresentView()); + UpdateDescriptorSet(image_index, + use_accelerated ? screen_info.image_view : *raw_image_views[image_index]); BufferData data; SetUniformData(data, framebuffer); SetVertexData(data, framebuffer); auto map = buffer_commit->Map(); - std::memcpy(map.GetAddress(), &data, sizeof(data)); + std::memcpy(map.Address(), &data, sizeof(data)); if (!use_accelerated) { const u64 image_offset = GetRawImageOffset(framebuffer, image_index); - const auto pixel_format = - VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; - const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr); - rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer)); + const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); + const size_t size_bytes = GetSizeInBytes(framebuffer); + rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes); // TODO(Rodrigo): Read this from HLE constexpr u32 block_height_log2 = 4; - VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, - framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, - map.GetAddress() + image_offset, host_ptr); - - blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); + Tegra::Texture::UnswizzleTexture( + std::span(map.Address() + image_offset, size_bytes), std::span(host_ptr, size_bytes), + bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); const VkBufferImageCopy copy{ .bufferOffset = image_offset, @@ -288,15 +188,44 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool }, }; scheduler.Record( - [buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); + [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) { + const VkImageMemoryBarrier base_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + VkImageMemoryBarrier read_barrier = base_barrier; + read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + VkImageMemoryBarrier write_barrier = base_barrier; + write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, read_barrier); + cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); }); } map.Release(); - blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], descriptor_set = descriptor_sets[image_index], buffer = *buffer, size = swapchain.GetSize(), pipeline = *pipeline, @@ -304,31 +233,31 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool const VkClearValue clear_color{ .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}}, }; - - VkRenderPassBeginInfo renderpass_bi; - renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - renderpass_bi.pNext = nullptr; - renderpass_bi.renderPass = renderpass; - renderpass_bi.framebuffer = framebuffer; - renderpass_bi.renderArea.offset.x = 0; - renderpass_bi.renderArea.offset.y = 0; - renderpass_bi.renderArea.extent = size; - renderpass_bi.clearValueCount = 1; - renderpass_bi.pClearValues = &clear_color; - - VkViewport viewport; - viewport.x = 0.0f; - viewport.y = 0.0f; - viewport.width = static_cast(size.width); - viewport.height = static_cast(size.height); - viewport.minDepth = 0.0f; - viewport.maxDepth = 1.0f; - - VkRect2D scissor; - scissor.offset.x = 0; - scissor.offset.y = 0; - scissor.extent = size; - + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = renderpass, + .framebuffer = framebuffer, + .renderArea = + { + .offset = {0, 0}, + .extent = size, + }, + .clearValueCount = 1, + .pClearValues = &clear_color, + }; + const VkViewport viewport{ + .x = 0.0f, + .y = 0.0f, + .width = static_cast(size.width), + .height = static_cast(size.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + const VkRect2D scissor{ + .offset = {0, 0}, + .extent = size, + }; cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.SetViewport(0, viewport); @@ -372,8 +301,8 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) } void VKBlitScreen::CreateShaders() { - vertex_shader = BuildShader(device, sizeof(blit_vertex_code), blit_vertex_code); - fragment_shader = BuildShader(device, sizeof(blit_fragment_code), blit_fragment_code); + vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); + fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); } void VKBlitScreen::CreateSemaphores() { @@ -420,7 +349,7 @@ void VKBlitScreen::CreateRenderPass() { const VkAttachmentReference color_attachment_ref{ .attachment = 0, - .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + .layout = VK_IMAGE_LAYOUT_GENERAL, }; const VkSubpassDescription subpass_description{ @@ -735,34 +664,56 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { raw_images.resize(image_count); + raw_image_views.resize(image_count); raw_buffer_commits.resize(image_count); - const VkImageCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .imageType = VK_IMAGE_TYPE_2D, - .format = GetFormat(framebuffer), - .extent = - { - .width = framebuffer.width, - .height = framebuffer.height, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - }; - - for (std::size_t i = 0; i < image_count; ++i) { - raw_images[i] = std::make_unique(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT); - raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false); + for (size_t i = 0; i < image_count; ++i) { + raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .imageType = VK_IMAGE_TYPE_2D, + .format = GetFormat(framebuffer), + .extent = + { + .width = framebuffer.width, + .height = framebuffer.height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }); + raw_buffer_commits[i] = memory_manager.Commit(raw_images[i], false); + raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = *raw_images[i], + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = GetFormat(framebuffer), + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }); } } @@ -789,7 +740,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag const VkDescriptorImageInfo image_info{ .sampler = *sampler, .imageView = image_view, - .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, }; const VkWriteDescriptorSet sampler_write{ diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 8f2839214..2ee374247 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -35,7 +35,6 @@ struct ScreenInfo; class RasterizerVulkan; class VKDevice; -class VKImage; class VKScheduler; class VKSwapchain; @@ -110,7 +109,8 @@ private: std::vector resource_ticks; std::vector semaphores; - std::vector> raw_images; + std::vector raw_images; + std::vector raw_image_views; std::vector raw_buffer_commits; u32 raw_width = 0; u32 raw_height = 0; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 444d3fb93..10d296c2f 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -31,15 +31,19 @@ constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; +constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = + VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; + std::unique_ptr CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { - return std::make_unique(device, scheduler, BUFFER_USAGE); + return std::make_unique(device, scheduler); } } // Anonymous namespace -Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, +Buffer::Buffer(const VKDevice& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) - : BufferBlock{cpu_addr_, size_}, scheduler{scheduler_}, staging_pool{staging_pool_} { + : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ + staging_pool_} { const VkBufferCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -64,24 +68,39 @@ void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { scheduler.RequestOutsideRenderPassOperationContext(); const VkBuffer handle = Handle(); - scheduler.Record( - [staging = *staging.handle, handle, offset, data_size](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); - - const VkBufferMemoryBarrier barrier{ - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = UPLOAD_ACCESS_BARRIERS, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = handle, - .offset = offset, - .size = data_size, - }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, - barrier, {}); - }); + scheduler.Record([staging = *staging.handle, handle, offset, data_size, + &device = device](vk::CommandBuffer cmdbuf) { + const VkBufferMemoryBarrier read_barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = + VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | + VK_ACCESS_HOST_WRITE_BIT | + (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0), + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = data_size, + }; + const VkBufferMemoryBarrier write_barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = UPLOAD_ACCESS_BARRIERS, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = data_size, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, read_barrier); + cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, + write_barrier); + }); } void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { @@ -150,8 +169,10 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, const VKDevice& device_, VKMemoryManager& memory_manager_, - VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_) - : BufferCache{rasterizer_, gpu_memory_, cpu_memory_, CreateStreamBuffer(device_, scheduler_)}, + VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_, + VKStagingBufferPool& staging_pool_) + : VideoCommon::BufferCache{rasterizer_, gpu_memory_, + cpu_memory_, stream_buffer_}, device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ staging_pool_} {} diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 6008b8373..daf498222 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -41,6 +41,7 @@ public: } private: + const VKDevice& device; VKScheduler& scheduler; VKStagingBufferPool& staging_pool; @@ -49,10 +50,11 @@ private: class VKBufferCache final : public VideoCommon::BufferCache { public: - explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - const VKDevice& device_, VKMemoryManager& memory_manager_, - VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_); + explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, + const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, VKStreamBuffer& stream_buffer, + VKStagingBufferPool& staging_pool); ~VKBufferCache(); BufferInfo GetEmptyBuffer(std::size_t size) override; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 1ac7e2a30..2c030e910 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -10,6 +10,9 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" +#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h" +#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" +#include "video_core/host_shaders/vulkan_uint8_comp_spv.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_device.h" @@ -22,99 +25,6 @@ namespace Vulkan { namespace { -// Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there. -constexpr u8 quad_array[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, - 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, - 0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, - 0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, - 0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, - 0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00, - 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, - 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, - 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, - 0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, - 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, - 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, - 0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00, - 0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, - 0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, - 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; - VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { return { .binding = 0, @@ -144,208 +54,6 @@ VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { }; } -// Uint8 SPIR-V module. Generated from the "shaders/" directory. -constexpr u8 uint8_pass[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, - 0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, - 0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f, - 0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, - 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, - 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00, - 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, - 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, - 0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, - 0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, - 0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, - 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; - -// Quad indexed SPIR-V module. Generated from the "shaders/" directory. -constexpr u8 QUAD_INDEXED_SPV[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, - 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00, - 0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, - 0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, - 0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, - 0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, - 0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00, - 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, - 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, - 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, - 0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, - 0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, - 0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, - 0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, - 0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, - 0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, - 0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00, - 0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, - 0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, - 0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, - 0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00, - 0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, - 0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, - 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; - std::array BuildInputOutputDescriptorSetBindings() { return {{ { @@ -381,8 +89,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, vk::Span bindings, vk::Span templates, - vk::Span push_constants, std::size_t code_size, - const u8* code) { + vk::Span push_constants, + std::span code) { descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -390,7 +98,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto .bindingCount = bindings.size(), .pBindings = bindings.data(), }); - layout = device.GetLogical().CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -400,7 +107,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto .pushConstantRangeCount = push_constants.size(), .pPushConstantRanges = push_constants.data(), }); - if (!templates.empty()) { descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, @@ -417,18 +123,13 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); } - - auto code_copy = std::make_unique(code_size / sizeof(u32) + 1); - std::memcpy(code_copy.get(), code, code_size); - module = device.GetLogical().CreateShaderModule({ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .codeSize = code_size, - .pCode = code_copy.get(), + .codeSize = static_cast(code.size_bytes()), + .pCode = code.data(), }); - pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, @@ -467,7 +168,7 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_, VKUpdateDescriptorQueue& update_descriptor_queue_) : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(), BuildQuadArrayPassDescriptorUpdateTemplateEntry(), - BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array), + BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_} {} @@ -510,12 +211,11 @@ std::pair QuadArrayPass::Assemble(u32 num_vertices, u32 return {*buffer.handle, 0}; } -Uint8Pass::Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, +Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), - BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass), - uint8_pass), + : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), + BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_} {} @@ -561,8 +261,7 @@ QuadIndexedPass::QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler VKUpdateDescriptorQueue& update_descriptor_queue_) : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), BuildInputOutputDescriptorUpdateTemplate(), - BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV), - QUAD_INDEXED_SPV), + BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_} {} diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 2dc87902c..abdf61e2c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include "common/common_types.h" @@ -24,8 +25,7 @@ public: explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, vk::Span bindings, vk::Span templates, - vk::Span push_constants, std::size_t code_size, - const u8* code); + vk::Span push_constants, std::span code); ~VKComputePass(); protected: diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index ce3846195..370a63f74 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -46,6 +46,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, + VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, @@ -122,6 +123,7 @@ std::unordered_map GetFormatProperties( VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16_SFLOAT, + VK_FORMAT_R16G16_SINT, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UINT, VK_FORMAT_R8G8B8A8_SRGB, @@ -161,18 +163,32 @@ std::unordered_map GetFormatProperties( VK_FORMAT_BC2_SRGB_BLOCK, VK_FORMAT_BC3_SRGB_BLOCK, VK_FORMAT_BC7_SRGB_BLOCK, + VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, - VK_FORMAT_ASTC_8x8_SRGB_BLOCK, - VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VK_FORMAT_ASTC_5x4_UNORM_BLOCK, VK_FORMAT_ASTC_5x4_SRGB_BLOCK, VK_FORMAT_ASTC_5x5_UNORM_BLOCK, VK_FORMAT_ASTC_5x5_SRGB_BLOCK, - VK_FORMAT_ASTC_10x8_UNORM_BLOCK, - VK_FORMAT_ASTC_10x8_SRGB_BLOCK, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK, + VK_FORMAT_ASTC_6x5_SRGB_BLOCK, VK_FORMAT_ASTC_6x6_UNORM_BLOCK, VK_FORMAT_ASTC_6x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x5_UNORM_BLOCK, + VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK, + VK_FORMAT_ASTC_8x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x8_UNORM_BLOCK, + VK_FORMAT_ASTC_8x8_SRGB_BLOCK, + VK_FORMAT_ASTC_10x5_UNORM_BLOCK, + VK_FORMAT_ASTC_10x5_SRGB_BLOCK, + VK_FORMAT_ASTC_10x6_UNORM_BLOCK, + VK_FORMAT_ASTC_10x6_SRGB_BLOCK, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK, + VK_FORMAT_ASTC_10x8_SRGB_BLOCK, VK_FORMAT_ASTC_10x10_UNORM_BLOCK, VK_FORMAT_ASTC_10x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x10_UNORM_BLOCK, + VK_FORMAT_ASTC_12x10_SRGB_BLOCK, VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK, VK_FORMAT_ASTC_8x6_UNORM_BLOCK, @@ -192,7 +208,7 @@ std::unordered_map GetFormatProperties( VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) - : dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, + : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} { SetupFamilies(surface); SetupFeatures(); @@ -214,7 +230,7 @@ bool VKDevice::Create() { features2.features = { .robustBufferAccess = false, .fullDrawIndexUint32 = false, - .imageCubeArray = false, + .imageCubeArray = true, .independentBlend = true, .geometryShader = true, .tessellationShader = true, @@ -242,7 +258,7 @@ bool VKDevice::Create() { .shaderTessellationAndGeometryPointSize = false, .shaderImageGatherExtended = true, .shaderStorageImageExtendedFormats = false, - .shaderStorageImageMultisample = false, + .shaderStorageImageMultisample = true, .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, .shaderStorageImageWriteWithoutFormat = true, .shaderUniformBufferArrayDynamicIndexing = false, @@ -268,7 +284,6 @@ bool VKDevice::Create() { .variableMultisampleRate = false, .inheritedQueries = false, }; - VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, .pNext = nullptr, @@ -380,6 +395,20 @@ bool VKDevice::Create() { LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceRobustness2FeaturesEXT robustness2; + if (ext_robustness2) { + robustness2 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, + .pNext = nullptr, + .robustBufferAccess2 = false, + .robustImageAccess2 = true, + .nullDescriptor = true, + }; + SetNext(next, robustness2); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support robustness2"); + } + if (!ext_depth_range_unrestricted) { LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); } @@ -405,7 +434,14 @@ bool VKDevice::Create() { } CollectTelemetryParameters(); + CollectToolingInfo(); + if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { + LOG_WARNING( + Render_Vulkan, + "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); + ext_extended_dynamic_state = false; + } if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) { // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it // seems to cause stability issues @@ -458,7 +494,7 @@ void VKDevice::ReportLoss() const { LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); // Wait for the log to flush and for Nsight Aftermath to dump the results - std::this_thread::sleep_for(std::chrono::seconds{3}); + std::this_thread::sleep_for(std::chrono::seconds{15}); } void VKDevice::SaveShader(const std::vector& spirv) const { @@ -499,6 +535,16 @@ bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) return true; } +bool VKDevice::TestDepthStencilBlits() const { + static constexpr VkFormatFeatureFlags required_features = + VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + const auto test_features = [](VkFormatProperties props) { + return (props.optimalTilingFeatures & required_features) == required_features; + }; + return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) && + test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT)); +} + bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const { const auto it = format_properties.find(wanted_format); @@ -569,6 +615,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { const auto features{physical.GetFeatures()}; const std::array feature_report = { std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), + std::make_pair(features.imageCubeArray, "imageCubeArray"), std::make_pair(features.independentBlend, "independentBlend"), std::make_pair(features.depthClamp, "depthClamp"), std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), @@ -580,6 +627,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), + std::make_pair(features.shaderStorageImageMultisample, "shaderStorageImageMultisample"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), }; @@ -608,6 +656,7 @@ std::vector VKDevice::LoadExtensions() { bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; bool has_ext_extended_dynamic_state{}; + bool has_ext_robustness2{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -627,11 +676,15 @@ std::vector VKDevice::LoadExtensions() { test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); + test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true); test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); + test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); + test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); + test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false); if (instance_version >= VK_API_VERSION_1_1) { test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); } @@ -733,6 +786,18 @@ std::vector VKDevice::LoadExtensions() { } } + if (has_ext_robustness2) { + VkPhysicalDeviceRobustness2FeaturesEXT robustness2; + robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; + robustness2.pNext = nullptr; + features.pNext = &robustness2; + physical.GetFeatures2KHR(features); + if (robustness2.nullDescriptor && robustness2.robustImageAccess2) { + extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); + ext_robustness2 = true; + } + } + return extensions; } @@ -764,6 +829,7 @@ void VKDevice::SetupFamilies(VkSurfaceKHR surface) { void VKDevice::SetupFeatures() { const auto supported_features{physical.GetFeatures()}; is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; + is_blit_depth_stencil_supported = TestDepthStencilBlits(); is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); } @@ -794,6 +860,32 @@ void VKDevice::CollectTelemetryParameters() { } } +void VKDevice::CollectToolingInfo() { + if (!ext_tooling_info) { + return; + } + const auto vkGetPhysicalDeviceToolPropertiesEXT = + reinterpret_cast( + dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT")); + if (!vkGetPhysicalDeviceToolPropertiesEXT) { + return; + } + u32 tool_count = 0; + if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) { + return; + } + std::vector tools(tool_count); + if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) { + return; + } + for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) { + const std::string_view name = tool.name; + LOG_INFO(Render_Vulkan, "{}", name); + has_renderdoc = has_renderdoc || name == "RenderDoc"; + has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics"; + } +} + std::vector VKDevice::GetDeviceQueueCreateInfos() const { static constexpr float QUEUE_PRIORITY = 1.0f; diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 4286673d9..995dcfc0f 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -157,6 +157,11 @@ public: return is_formatless_image_load_supported; } + /// Returns true when blitting from and to depth stencil images is supported. + bool IsBlitDepthStencilSupported() const { + return is_blit_depth_stencil_supported; + } + /// Returns true if the device supports VK_NV_viewport_swizzle. bool IsNvViewportSwizzleSupported() const { return nv_viewport_swizzle; @@ -172,6 +177,11 @@ public: return ext_index_type_uint8; } + /// Returns true if the device supports VK_EXT_sampler_filter_minmax. + bool IsExtSamplerFilterMinmaxSupported() const { + return ext_sampler_filter_minmax; + } + /// Returns true if the device supports VK_EXT_depth_range_unrestricted. bool IsExtDepthRangeUnrestrictedSupported() const { return ext_depth_range_unrestricted; @@ -197,6 +207,16 @@ public: return ext_extended_dynamic_state; } + /// Returns true if the device supports VK_EXT_shader_stencil_export. + bool IsExtShaderStencilExportSupported() const { + return ext_shader_stencil_export; + } + + /// Returns true when a known debugging tool is attached. + bool HasDebuggingToolAttached() const { + return has_renderdoc || has_nsight_graphics; + } + /// Returns the vendor name reported from Vulkan. std::string_view GetVendorName() const { return vendor_name; @@ -228,16 +248,23 @@ private: /// Collects telemetry information from the device. void CollectTelemetryParameters(); + /// Collects information about attached tools. + void CollectToolingInfo(); + /// Returns a list of queue initialization descriptors. std::vector GetDeviceQueueCreateInfos() const; /// Returns true if ASTC textures are natively supported. bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; + /// Returns true if the device natively supports blitting depth stencil images. + bool TestDepthStencilBlits() const; + /// Returns true if a format is supported. bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; + VkInstance instance; ///< Vulkan instance. vk::DeviceDispatch dld; ///< Device function pointers. vk::PhysicalDevice physical; ///< Physical device. VkPhysicalDeviceProperties properties; ///< Device properties. @@ -253,15 +280,22 @@ private: bool is_float16_supported{}; ///< Support for float16 arithmetics. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. + bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. + bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. + bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_robustness2{}; ///< Support for VK_EXT_robustness2. + bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + bool has_renderdoc{}; ///< Has RenderDoc attached + bool has_nsight_graphics{}; ///< Has Nsight Graphics attached // Asynchronous Graphics Pipeline setting bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 0bcaee714..774a12a53 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -73,10 +73,9 @@ bool InnerFence::IsEventSignalled() const { } VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, - Tegra::MemoryManager& memory_manager_, - VKTextureCache& texture_cache_, VKBufferCache& buffer_cache_, - VKQueryCache& query_cache_, const VKDevice& device_, - VKScheduler& scheduler_) + Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, + VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, + const VKDevice& device_, VKScheduler& scheduler_) : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, device{device_}, scheduler{scheduler_} {} diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index c8547cc24..c2869e8e3 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -8,6 +8,7 @@ #include "video_core/fence_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Core { @@ -24,7 +25,6 @@ class VKBufferCache; class VKDevice; class VKQueryCache; class VKScheduler; -class VKTextureCache; class InnerFence : public VideoCommon::FenceBase { public: @@ -51,12 +51,12 @@ private: using Fence = std::shared_ptr; using GenericFenceManager = - VideoCommon::FenceManager; + VideoCommon::FenceManager; class VKFenceManager final : public GenericFenceManager { public: explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, - Tegra::MemoryManager& memory_manager_, VKTextureCache& texture_cache_, + Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, const VKDevice& device_, VKScheduler& scheduler_); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 970979fa1..7979df3a8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -15,7 +15,6 @@ #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -69,23 +68,45 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { }; } +VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { + switch (msaa_mode) { + case Tegra::Texture::MsaaMode::Msaa1x1: + return VK_SAMPLE_COUNT_1_BIT; + case Tegra::Texture::MsaaMode::Msaa2x1: + case Tegra::Texture::MsaaMode::Msaa2x1_D3D: + return VK_SAMPLE_COUNT_2_BIT; + case Tegra::Texture::MsaaMode::Msaa2x2: + case Tegra::Texture::MsaaMode::Msaa2x2_VC4: + case Tegra::Texture::MsaaMode::Msaa2x2_VC12: + return VK_SAMPLE_COUNT_4_BIT; + case Tegra::Texture::MsaaMode::Msaa4x2: + case Tegra::Texture::MsaaMode::Msaa4x2_D3D: + case Tegra::Texture::MsaaMode::Msaa4x2_VC8: + case Tegra::Texture::MsaaMode::Msaa4x2_VC24: + return VK_SAMPLE_COUNT_8_BIT; + case Tegra::Texture::MsaaMode::Msaa4x4: + return VK_SAMPLE_COUNT_16_BIT; + default: + UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast(msaa_mode)); + return VK_SAMPLE_COUNT_1_BIT; + } +} + } // Anonymous namespace VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, - VKRenderPassCache& renderpass_cache_, - const GraphicsPipelineCacheKey& key_, - vk::Span bindings_, - const SPIRVProgram& program_) - : device{device_}, scheduler{scheduler_}, cache_key{key_}, hash{cache_key.Hash()}, - descriptor_set_layout{CreateDescriptorSetLayout(bindings_)}, + const GraphicsPipelineCacheKey& key, + vk::Span bindings, + const SPIRVProgram& program, u32 num_color_buffers) + : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, + descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate(program_)}, modules{CreateShaderModules( - program_)}, - renderpass{renderpass_cache_.GetRenderPass(cache_key.renderpass_params)}, - pipeline{CreatePipeline(cache_key.renderpass_params, program_)} {} + descriptor_template{CreateDescriptorUpdateTemplate(program)}, + modules(CreateShaderModules(program)), + pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} VKGraphicsPipeline::~VKGraphicsPipeline() = default; @@ -179,8 +200,9 @@ std::vector VKGraphicsPipeline::CreateShaderModules( return shader_modules; } -vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, - const SPIRVProgram& program) const { +vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, + VkRenderPass renderpass, + u32 num_color_buffers) const { const auto& state = cache_key.fixed_state; const auto& viewport_swizzles = state.viewport_swizzles; @@ -290,8 +312,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa }; std::array swizzles; - std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(), - UnpackViewportSwizzle); + std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, .pNext = nullptr, @@ -326,7 +347,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), .sampleShadingEnable = VK_FALSE, .minSampleShading = 0.0f, .pSampleMask = nullptr, @@ -352,8 +373,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa }; std::array cb_attachments; - const auto num_attachments = static_cast(renderpass_params.num_color_attachments); - for (std::size_t index = 0; index < num_attachments; ++index) { + for (std::size_t index = 0; index < num_color_buffers; ++index) { static constexpr std::array COMPONENT_TABLE{ VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, @@ -387,7 +407,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa .flags = 0, .logicOpEnable = VK_FALSE, .logicOp = VK_LOGIC_OP_COPY, - .attachmentCount = static_cast(num_attachments), + .attachmentCount = num_color_buffers, .pAttachments = cb_attachments.data(), .blendConstants = {}, }; @@ -447,8 +467,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa stage_ci.pNext = &subgroup_size_ci; } } - - const VkGraphicsPipelineCreateInfo ci{ + return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -468,8 +487,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa .subpass = 0, .basePipelineHandle = nullptr, .basePipelineIndex = 0, - }; - return device.GetLogical().CreateGraphicsPipeline(ci); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 3fb31d55a..214d06b4c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -8,10 +8,10 @@ #include #include +#include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -20,8 +20,7 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct GraphicsPipelineCacheKey { - RenderPassParams renderpass_params; - u32 padding; + VkRenderPass renderpass; std::array shaders; FixedPipelineState fixed_state; @@ -34,7 +33,7 @@ struct GraphicsPipelineCacheKey { } std::size_t Size() const noexcept { - return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); + return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); } }; static_assert(std::has_unique_object_representations_v); @@ -43,7 +42,6 @@ static_assert(std::is_trivially_constructible_v); class VKDescriptorPool; class VKDevice; -class VKRenderPassCache; class VKScheduler; class VKUpdateDescriptorQueue; @@ -52,12 +50,11 @@ using SPIRVProgram = std::array, Maxwell::MaxShaderSt class VKGraphicsPipeline final { public: explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, - VKRenderPassCache& renderpass_cache_, - const GraphicsPipelineCacheKey& key_, - vk::Span bindings_, - const SPIRVProgram& program_); + const GraphicsPipelineCacheKey& key, + vk::Span bindings, + const SPIRVProgram& program, u32 num_color_buffers); ~VKGraphicsPipeline(); VkDescriptorSet CommitDescriptorSet(); @@ -70,10 +67,6 @@ public: return *layout; } - VkRenderPass GetRenderPass() const { - return renderpass; - } - GraphicsPipelineCacheKey GetCacheKey() const { return cache_key; } @@ -89,8 +82,8 @@ private: std::vector CreateShaderModules(const SPIRVProgram& program) const; - vk::Pipeline CreatePipeline(const RenderPassParams& renderpass_params, - const SPIRVProgram& program) const; + vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, + u32 num_color_buffers) const; const VKDevice& device; VKScheduler& scheduler; @@ -104,7 +97,6 @@ private: vk::DescriptorUpdateTemplateKHR descriptor_template; std::vector modules; - VkRenderPass renderpass; vk::Pipeline pipeline; }; diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp deleted file mode 100644 index 072d14e3b..000000000 --- a/src/video_core/renderer_vulkan/vk_image.cpp +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "common/assert.h" -#include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_image.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan { - -VKImage::VKImage(const VKDevice& device_, VKScheduler& scheduler_, - const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_) - : device{device_}, scheduler{scheduler_}, format{image_ci_.format}, aspect_mask{aspect_mask_}, - image_num_layers{image_ci_.arrayLayers}, image_num_levels{image_ci_.mipLevels} { - UNIMPLEMENTED_IF_MSG(image_ci_.queueFamilyIndexCount != 0, - "Queue family tracking is not implemented"); - - image = device_.GetLogical().CreateImage(image_ci_); - - const u32 num_ranges = image_num_layers * image_num_levels; - barriers.resize(num_ranges); - subrange_states.resize(num_ranges, {{}, image_ci_.initialLayout}); -} - -VKImage::~VKImage() = default; - -void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, - VkImageLayout new_layout) { - if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) { - return; - } - - std::size_t cursor = 0; - for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) { - for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) { - const u32 layer = base_layer + layer_it; - const u32 level = base_level + level_it; - auto& state = GetSubrangeState(layer, level); - auto& barrier = barriers[cursor]; - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = state.access; - barrier.dstAccessMask = new_access; - barrier.oldLayout = state.layout; - barrier.newLayout = new_layout; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.image = *image; - barrier.subresourceRange.aspectMask = aspect_mask; - barrier.subresourceRange.baseMipLevel = level; - barrier.subresourceRange.levelCount = 1; - barrier.subresourceRange.baseArrayLayer = layer; - barrier.subresourceRange.layerCount = 1; - state.access = new_access; - state.layout = new_layout; - } - } - - scheduler.RequestOutsideRenderPassOperationContext(); - - scheduler.Record([barriers = barriers, cursor](vk::CommandBuffer cmdbuf) { - // TODO(Rodrigo): Implement a way to use the latest stage across subresources. - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, {}, {}, - vk::Span(barriers.data(), cursor)); - }); -} - -bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - VkAccessFlags new_access, VkImageLayout new_layout) noexcept { - const bool is_full_range = base_layer == 0 && num_layers == image_num_layers && - base_level == 0 && num_levels == image_num_levels; - if (!is_full_range) { - state_diverged = true; - } - - if (!state_diverged) { - auto& state = GetSubrangeState(0, 0); - if (state.access != new_access || state.layout != new_layout) { - return true; - } - } - - for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) { - for (u32 level_it = 0; level_it < num_levels; ++level_it) { - const u32 layer = base_layer + layer_it; - const u32 level = base_level + level_it; - auto& state = GetSubrangeState(layer, level); - if (state.access != new_access || state.layout != new_layout) { - return true; - } - } - } - return false; -} - -void VKImage::CreatePresentView() { - // Image type has to be 2D to be presented. - present_view = device.GetLogical().CreateImageView({ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = *image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = format, - .components = - { - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, - }, - .subresourceRange = - { - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }); -} - -VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { - return subrange_states[static_cast(layer * image_num_levels) + - static_cast(level)]; -} - -} // namespace Vulkan \ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_image.h b/src/video_core/renderer_vulkan/vk_image.h deleted file mode 100644 index 287ab90ca..000000000 --- a/src/video_core/renderer_vulkan/vk_image.h +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan { - -class VKDevice; -class VKScheduler; - -class VKImage { -public: - explicit VKImage(const VKDevice& device_, VKScheduler& scheduler_, - const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_); - ~VKImage(); - - /// Records in the passed command buffer an image transition and updates the state of the image. - void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, - VkImageLayout new_layout); - - /// Returns a view compatible with presentation, the image has to be 2D. - VkImageView GetPresentView() { - if (!present_view) { - CreatePresentView(); - } - return *present_view; - } - - /// Returns the Vulkan image handler. - const vk::Image& GetHandle() const { - return image; - } - - /// Returns the Vulkan format for this image. - VkFormat GetFormat() const { - return format; - } - - /// Returns the Vulkan aspect mask. - VkImageAspectFlags GetAspectMask() const { - return aspect_mask; - } - -private: - struct SubrangeState final { - VkAccessFlags access = 0; ///< Current access bits. - VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; ///< Current image layout. - }; - - bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - VkAccessFlags new_access, VkImageLayout new_layout) noexcept; - - /// Creates a presentation view. - void CreatePresentView(); - - /// Returns the subrange state for a layer and layer. - SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept; - - const VKDevice& device; ///< Device handler. - VKScheduler& scheduler; ///< Device scheduler. - - const VkFormat format; ///< Vulkan format. - const VkImageAspectFlags aspect_mask; ///< Vulkan aspect mask. - const u32 image_num_layers; ///< Number of layers. - const u32 image_num_levels; ///< Number of mipmap levels. - - vk::Image image; ///< Image handle. - vk::ImageView present_view; ///< Image view compatible with presentation. - - std::vector barriers; ///< Pool of barriers. - std::vector subrange_states; ///< Current subrange state. - - bool state_diverged = false; ///< True when subresources mismatch in layout. -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index be53d450f..56b24b70f 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -216,7 +216,7 @@ VKMemoryCommitImpl::~VKMemoryCommitImpl() { } MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { - return MemoryMap{this, memory.Map(interval.first + offset_, size)}; + return MemoryMap(this, std::span(memory.Map(interval.first + offset_, size), size)); } void VKMemoryCommitImpl::Unmap() const { diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index 39f903ec8..318f8b43e 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include "common/common_types.h" @@ -93,8 +94,8 @@ private: /// Holds ownership of a memory map. class MemoryMap final { public: - explicit MemoryMap(const VKMemoryCommitImpl* commit_, u8* address_) - : commit{commit_}, address{address_} {} + explicit MemoryMap(const VKMemoryCommitImpl* commit_, std::span span_) + : commit{commit_}, span{span_} {} ~MemoryMap() { if (commit) { @@ -108,19 +109,24 @@ public: commit = nullptr; } + /// Returns a span to the memory map. + [[nodiscard]] std::span Span() const noexcept { + return span; + } + /// Returns the address of the memory map. - u8* GetAddress() const { - return address; + [[nodiscard]] u8* Address() const noexcept { + return span.data(); } /// Returns the address of the memory map; - operator u8*() const { - return address; + [[nodiscard]] operator u8*() const noexcept { + return span.data(); } private: const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit. - u8* address{}; ///< Address to the mapped memory. + std::span span; ///< Address to the mapped memory. }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3fb264d03..083796d05 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -8,6 +8,7 @@ #include #include "common/bit_cast.h" +#include "common/cityhash.h" #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" @@ -22,7 +23,6 @@ #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -52,7 +52,9 @@ constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEX constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ - VideoCommon::Shader::CompileDepth::FullDecompile}; + .depth = VideoCommon::Shader::CompileDepth::FullDecompile, + .disable_else_derivation = true, +}; constexpr std::size_t GetStageFromProgram(std::size_t program) { return program == 0 ? 0 : program - 1; @@ -149,12 +151,11 @@ VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_ Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - VKRenderPassCache& renderpass_cache_) - : ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, - gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, - descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, - renderpass_cache{renderpass_cache_} {} + VKUpdateDescriptorQueue& update_descriptor_queue_) + : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, + scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ + update_descriptor_queue_} {} VKPipelineCache::~VKPipelineCache() = default; @@ -199,7 +200,8 @@ std::array VKPipelineCache::GetShaders() { } VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( - const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { + const GraphicsPipelineCacheKey& key, u32 num_color_buffers, + VideoCommon::Shader::AsyncShaders& async_shaders) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); if (last_graphics_pipeline && last_graphics_key == key) { @@ -215,8 +217,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); const auto [program, bindings] = DecompileShaders(key.fixed_state); async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, - update_descriptor_queue, renderpass_cache, bindings, - program, key); + update_descriptor_queue, bindings, program, key, + num_color_buffers); } last_graphics_pipeline = pair->second.get(); return last_graphics_pipeline; @@ -229,8 +231,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); const auto [program, bindings] = DecompileShaders(key.fixed_state); entry = std::make_unique(device, scheduler, descriptor_pool, - update_descriptor_queue, renderpass_cache, key, - bindings, program); + update_descriptor_queue, key, bindings, + program, num_color_buffers); gpu.ShaderNotify().MarkShaderComplete(); } last_graphics_pipeline = entry.get(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 9e1f8fcbb..fbaa8257c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -19,7 +19,6 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader/async_shaders.h" @@ -119,18 +118,18 @@ private: class VKPipelineCache final : public VideoCommon::ShaderCache { public: - explicit VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, - VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - VKRenderPassCache& renderpass_cache_); + explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, const VKDevice& device, + VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue); ~VKPipelineCache() override; std::array GetShaders(); VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, + u32 num_color_buffers, VideoCommon::Shader::AsyncShaders& async_shaders); VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); @@ -153,7 +152,6 @@ private: VKScheduler& scheduler; VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; - VKRenderPassCache& renderpass_cache; std::unique_ptr null_shader; std::unique_ptr null_kernel; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f93986aab..04c5c859c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -19,6 +19,7 @@ #include "core/settings.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -30,8 +31,6 @@ #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/vk_sampler_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" @@ -39,10 +38,13 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader_cache.h" +#include "video_core/texture_cache/texture_cache.h" namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using VideoCommon::ImageViewId; +using VideoCommon::ImageViewType; MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); @@ -58,9 +60,9 @@ MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192 namespace { -constexpr auto ComputeShaderIndex = static_cast(Tegra::Engines::ShaderType::Compute); +constexpr auto COMPUTE_SHADER_INDEX = static_cast(Tegra::Engines::ShaderType::Compute); -VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { +VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, size_t index) { const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; const float height = src.scale_y * 2.0f; @@ -83,7 +85,7 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si return viewport; } -VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { +VkRect2D GetScissorState(const Maxwell& regs, size_t index) { const auto& src = regs.scissor_test[index]; VkRect2D scissor; if (src.enable) { @@ -103,98 +105,122 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { std::array GetShaderAddresses( const std::array& shaders) { std::array addresses; - for (std::size_t i = 0; i < std::size(addresses); ++i) { + for (size_t i = 0; i < std::size(addresses); ++i) { addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; } return addresses; } -void TransitionImages(const std::vector& views, VkPipelineStageFlags pipeline_stage, - VkAccessFlags access) { - for (auto& [view, layout] : views) { - view->Transition(*layout, pipeline_stage, access); +struct TextureHandle { + constexpr TextureHandle(u32 data, bool via_header_index) { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); } -} + + u32 image; + u32 sampler; +}; template -Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - std::size_t stage, std::size_t index = 0) { - const auto stage_type = static_cast(stage); +TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, + size_t stage, size_t index = 0) { + const auto shader_type = static_cast(stage); if constexpr (std::is_same_v) { if (entry.is_separated) { const u32 buffer_1 = entry.buffer; const u32 buffer_2 = entry.secondary_buffer; const u32 offset_1 = entry.offset; const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2); - return engine.GetTextureInfo(Tegra::Texture::TextureHandle{handle_1 | handle_2}); + const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); + const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); + return TextureHandle(handle_1 | handle_2, via_header_index); } } if (entry.is_bindless) { - const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); - return engine.GetTextureInfo(Tegra::Texture::TextureHandle{tex_handle}); - } - const auto& gpu_profile = engine.AccessGuestDriverProfile(); - const u32 entry_offset = static_cast(index * gpu_profile.GetTextureHandlerSize()); - const u32 offset = entry.offset + entry_offset; - if constexpr (std::is_same_v) { - return engine.GetStageTexture(stage_type, offset); - } else { - return engine.GetTexture(offset); + const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); + return TextureHandle(raw, via_header_index); } + const u32 buffer = engine.GetBoundBuffer(); + const u64 offset = (entry.offset + index) * sizeof(u32); + return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); } -/// @brief Determine if an attachment to be updated has to preserve contents -/// @param is_clear True when a clear is being executed -/// @param regs 3D registers -/// @return True when the contents have to be preserved -bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) { - if (!is_clear) { - return true; - } - // First we have to make sure all clear masks are enabled. - if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B || - !regs.clear_buffers.A) { - return true; - } - // If scissors are disabled, the whole screen is cleared - if (!regs.clear_flags.scissor) { - return false; - } - // Then we have to confirm scissor testing clears the whole image - const std::size_t index = regs.clear_buffers.RT; - const auto& scissor = regs.scissor_test[0]; - return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width || - scissor.max_y < regs.rt[index].height; -} - -/// @brief Determine if an attachment to be updated has to preserve contents -/// @param is_clear True when a clear is being executed -/// @param regs 3D registers -/// @return True when the contents have to be preserved -bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) { - // If we are not clearing, the contents have to be preserved - if (!is_clear) { - return true; - } - // For depth stencil clears we only have to confirm scissor test covers the whole image - if (!regs.clear_flags.scissor) { - return false; - } - // Make sure the clear cover the whole image - const auto& scissor = regs.scissor_test[0]; - return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width || - scissor.max_y < regs.zeta_height; -} - -template +template std::array ExpandStrides(const std::array& strides) { std::array expanded; std::copy(strides.begin(), strides.end(), expanded.begin()); return expanded; } +ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { + if (entry.is_buffer) { + return ImageViewType::e2D; + } + switch (entry.type) { + case Tegra::Shader::TextureType::Texture1D: + return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; + case Tegra::Shader::TextureType::Texture2D: + return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; + case Tegra::Shader::TextureType::Texture3D: + return ImageViewType::e3D; + case Tegra::Shader::TextureType::TextureCube: + return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; + } + UNREACHABLE(); + return ImageViewType::e2D; +} + +ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { + switch (entry.type) { + case Tegra::Shader::ImageType::Texture1D: + return ImageViewType::e1D; + case Tegra::Shader::ImageType::Texture1DArray: + return ImageViewType::e1DArray; + case Tegra::Shader::ImageType::Texture2D: + return ImageViewType::e2D; + case Tegra::Shader::ImageType::Texture2DArray: + return ImageViewType::e2DArray; + case Tegra::Shader::ImageType::Texture3D: + return ImageViewType::e3D; + case Tegra::Shader::ImageType::TextureBuffer: + return ImageViewType::Buffer; + } + UNREACHABLE(); + return ImageViewType::e2D; +} + +void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue, + ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { + for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { + const ImageViewId image_view_id = *image_view_id_ptr++; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); + } + for (const auto& entry : entries.samplers) { + for (size_t i = 0; i < entry.size; ++i) { + const VkSampler sampler = *sampler_ptr++; + const ImageViewId image_view_id = *image_view_id_ptr++; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); + update_descriptor_queue.AddSampledImage(handle, sampler); + } + } + for ([[maybe_unused]] const auto& entry : entries.storage_texels) { + const ImageViewId image_view_id = *image_view_id_ptr++; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); + } + for (const auto& entry : entries.images) { + // TODO: Mark as modified + const ImageViewId image_view_id = *image_view_id_ptr++; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); + update_descriptor_queue.AddImage(handle); + } +} + } // Anonymous namespace class BufferBindings final { @@ -290,7 +316,7 @@ public: private: // Some of these fields are intentionally left uninitialized to avoid initializing them twice. struct { - std::size_t num_buffers = 0; + size_t num_buffers = 0; std::array buffers; std::array offsets; std::array sizes; @@ -303,7 +329,7 @@ private: VkIndexType type; } index; - template + template void BindStatic(const VKDevice& device, VKScheduler& scheduler) const { if (device.IsExtExtendedDynamicStateSupported()) { if (index.buffer) { @@ -320,7 +346,7 @@ private: } } - template + template void BindStatic(VKScheduler& scheduler) const { static_assert(N <= Maxwell::NumVertexArrays); if constexpr (N == 0) { @@ -385,20 +411,23 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_, const VKDevice& device_, VKMemoryManager& memory_manager_, StateTracker& state_tracker_, VKScheduler& scheduler_) - : RasterizerAccelerated(cpu_memory_), gpu(gpu_), gpu_memory(gpu_memory_), - maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_), - device(device_), memory_manager(memory_manager_), state_tracker(state_tracker_), - scheduler(scheduler_), staging_pool(device, memory_manager, scheduler), - descriptor_pool(device, scheduler_), update_descriptor_queue(device, scheduler), - renderpass_cache(device), + : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, + gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, + screen_info{screen_info_}, device{device_}, memory_manager{memory_manager_}, + state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler), + staging_pool(device, memory_manager, scheduler), descriptor_pool(device, scheduler), + update_descriptor_queue(device, scheduler), + blit_image(device, scheduler, state_tracker, descriptor_pool), quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), - texture_cache(*this, maxwell3d, gpu_memory, device, memory_manager, scheduler, staging_pool), + texture_cache_runtime{device, scheduler, memory_manager, staging_pool, blit_image}, + texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, - descriptor_pool, update_descriptor_queue, renderpass_cache), - buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, staging_pool), - sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler), + descriptor_pool, update_descriptor_queue), + buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, stream_buffer, + staging_pool), + query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { @@ -427,9 +456,10 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { const DrawParameters draw_params = SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); - update_descriptor_queue.Acquire(); - sampled_views.clear(); - image_views.clear(); + auto lock = texture_cache.AcquireLock(); + texture_cache.SynchronizeGraphicsDescriptors(); + + texture_cache.UpdateRenderTargets(false); const auto shaders = pipeline_cache.GetShaders(); key.shaders = GetShaderAddresses(shaders); @@ -437,30 +467,24 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { buffer_cache.Unmap(); - const Texceptions texceptions = UpdateAttachments(false); - SetupImageTransitions(texceptions, color_attachments, zeta_attachment); + const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); + key.renderpass = framebuffer->RenderPass(); - key.renderpass_params = GetRenderPassParams(texceptions); - key.padding = 0; - - auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); + auto* const pipeline = + pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders); if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { // Async graphics pipeline was not ready. return; } - scheduler.BindGraphicsPipeline(pipeline->GetHandle()); - - const auto renderpass = pipeline->GetRenderPass(); - const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); - scheduler.RequestRenderpass(renderpass, framebuffer, render_area); - - UpdateDynamicStates(); - buffer_bindings.Bind(device, scheduler); BeginTransformFeedback(); + scheduler.RequestRenderpass(framebuffer); + scheduler.BindGraphicsPipeline(pipeline->GetHandle()); + UpdateDynamicStates(); + const auto pipeline_layout = pipeline->GetLayout(); const auto descriptor_set = pipeline->CommitDescriptorSet(); scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { @@ -481,9 +505,6 @@ void RasterizerVulkan::Clear() { return; } - sampled_views.clear(); - image_views.clear(); - query_cache.UpdateCounters(); const auto& regs = maxwell3d.regs; @@ -495,20 +516,24 @@ void RasterizerVulkan::Clear() { return; } - [[maybe_unused]] const auto texceptions = UpdateAttachments(true); - DEBUG_ASSERT(texceptions.none()); - SetupImageTransitions(0, color_attachments, zeta_attachment); + auto lock = texture_cache.AcquireLock(); + texture_cache.UpdateRenderTargets(true); + const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); + const VkExtent2D render_area = framebuffer->RenderArea(); + scheduler.RequestRenderpass(framebuffer); - const VkRenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); - const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); - scheduler.RequestRenderpass(renderpass, framebuffer, render_area); - - VkClearRect clear_rect; - clear_rect.baseArrayLayer = regs.clear_buffers.layer; - clear_rect.layerCount = 1; - clear_rect.rect = GetScissorState(regs, 0); - clear_rect.rect.extent.width = std::min(clear_rect.rect.extent.width, render_area.width); - clear_rect.rect.extent.height = std::min(clear_rect.rect.extent.height, render_area.height); + VkClearRect clear_rect{ + .rect = GetScissorState(regs, 0), + .baseArrayLayer = regs.clear_buffers.layer, + .layerCount = 1, + }; + if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) { + return; + } + clear_rect.rect.extent = VkExtent2D{ + .width = std::min(clear_rect.rect.extent.width, render_area.width), + .height = std::min(clear_rect.rect.extent.height, render_area.height), + }; if (use_color) { VkClearValue clear_value; @@ -549,9 +574,6 @@ void RasterizerVulkan::Clear() { void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { MICROPROFILE_SCOPE(Vulkan_Compute); - update_descriptor_queue.Acquire(); - sampled_views.clear(); - image_views.clear(); query_cache.UpdateCounters(); @@ -570,29 +592,43 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { // Compute dispatches can't be executed inside a renderpass scheduler.RequestOutsideRenderPassOperationContext(); - buffer_cache.Map(CalculateComputeStreamBufferSize()); + image_view_indices.clear(); + sampler_handles.clear(); + + auto lock = texture_cache.AcquireLock(); + texture_cache.SynchronizeComputeDescriptors(); const auto& entries = pipeline.GetEntries(); - SetupComputeConstBuffers(entries); - SetupComputeGlobalBuffers(entries); SetupComputeUniformTexels(entries); SetupComputeTextures(entries); SetupComputeStorageTexels(entries); SetupComputeImages(entries); + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + buffer_cache.Map(CalculateComputeStreamBufferSize()); + + update_descriptor_queue.Acquire(); + + SetupComputeConstBuffers(entries); + SetupComputeGlobalBuffers(entries); + + ImageViewId* image_view_id_ptr = image_view_ids.data(); + VkSampler* sampler_ptr = sampler_handles.data(); + PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, + sampler_ptr); + buffer_cache.Unmap(); - TransitionImages(sampled_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT); - TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); - + const VkPipeline pipeline_handle = pipeline.GetHandle(); + const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); + const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, - grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), - layout = pipeline.GetLayout(), - descriptor_set = pipeline.CommitDescriptorSet()](vk::CommandBuffer cmdbuf) { + grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, + descriptor_set](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, DESCRIPTOR_SET, + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, DESCRIPTOR_SET, descriptor_set, {}); cmdbuf.Dispatch(grid_x, grid_y, grid_z); }); @@ -613,7 +649,10 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.FlushRegion(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.DownloadMemory(addr, size); + } buffer_cache.FlushRegion(addr, size); query_cache.FlushRegion(addr, size); } @@ -622,14 +661,18 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { if (!Settings::IsGPULevelHigh()) { return buffer_cache.MustFlushRegion(addr, size); } - return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); + return texture_cache.IsRegionGpuModified(addr, size) || + buffer_cache.MustFlushRegion(addr, size); } void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.InvalidateRegion(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.WriteMemory(addr, size); + } pipeline_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size); @@ -639,17 +682,28 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.OnCPUWrite(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.WriteMemory(addr, size); + } pipeline_cache.OnCPUWrite(addr, size); buffer_cache.OnCPUWrite(addr, size); } void RasterizerVulkan::SyncGuestHost() { - texture_cache.SyncGuestHost(); buffer_cache.SyncGuestHost(); pipeline_cache.SyncGuestHost(); } +void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { + { + auto lock = texture_cache.AcquireLock(); + texture_cache.UnmapMemory(addr, size); + } + buffer_cache.OnCPUWrite(addr, size); + pipeline_cache.OnCPUWrite(addr, size); +} + void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { if (!gpu.IsAsync()) { gpu_memory.Write(addr, value); @@ -700,6 +754,14 @@ void RasterizerVulkan::WaitForIdle() { }); } +void RasterizerVulkan::FragmentBarrier() { + // We already put barriers when a render pass finishes +} + +void RasterizerVulkan::TiledCacheBarrier() { + // TODO: Implementing tiled barriers requires rewriting a good chunk of the Vulkan backend +} + void RasterizerVulkan::FlushCommands() { if (draw_counter > 0) { draw_counter = 0; @@ -710,14 +772,20 @@ void RasterizerVulkan::FlushCommands() { void RasterizerVulkan::TickFrame() { draw_counter = 0; update_descriptor_queue.TickFrame(); + fence_manager.TickFrame(); buffer_cache.TickFrame(); staging_pool.TickFrame(); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.TickFrame(); + } } -bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, +bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { - texture_cache.DoFermiCopy(src, dst, copy_config); + auto lock = texture_cache.AcquireLock(); + texture_cache.BlitImage(dst, src, copy_config); return true; } @@ -727,20 +795,16 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, return false; } - const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; - if (!surface) { + auto lock = texture_cache.AcquireLock(); + ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); + if (!image_view) { return false; } - // Verify that the cached surface is the same size and format as the requested framebuffer - const auto& params{surface->GetSurfaceParams()}; - ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); - ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); - - screen_info.image = &surface->GetImage(); - screen_info.width = params.width; - screen_info.height = params.height; - screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion; + screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); + screen_info.width = image_view->size.width; + screen_info.height = image_view->size.height; + screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); return true; } @@ -765,103 +829,6 @@ void RasterizerVulkan::FlushWork() { draw_counter = 0; } -RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) { - MICROPROFILE_SCOPE(Vulkan_RenderTargets); - - const auto& regs = maxwell3d.regs; - auto& dirty = maxwell3d.dirty.flags; - const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; - dirty[VideoCommon::Dirty::RenderTargets] = false; - - texture_cache.GuardRenderTargets(true); - - Texceptions texceptions; - for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - if (update_rendertargets) { - const bool preserve_contents = HasToPreserveColorContents(is_clear, regs); - color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents); - } - if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { - texceptions[rt] = true; - } - } - - if (update_rendertargets) { - const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs); - zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents); - } - if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { - texceptions[ZETA_TEXCEPTION_INDEX] = true; - } - - texture_cache.GuardRenderTargets(false); - - return texceptions; -} - -bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) { - bool overlap = false; - for (auto& [view, layout] : sampled_views) { - if (!attachment.IsSameSurface(*view)) { - continue; - } - overlap = true; - *layout = VK_IMAGE_LAYOUT_GENERAL; - } - return overlap; -} - -std::tuple RasterizerVulkan::ConfigureFramebuffers( - VkRenderPass renderpass) { - FramebufferCacheKey key{ - .renderpass = renderpass, - .width = std::numeric_limits::max(), - .height = std::numeric_limits::max(), - .layers = std::numeric_limits::max(), - .views = {}, - }; - - const auto try_push = [&key](const View& view) { - if (!view) { - return false; - } - key.views.push_back(view->GetAttachment()); - key.width = std::min(key.width, view->GetWidth()); - key.height = std::min(key.height, view->GetHeight()); - key.layers = std::min(key.layers, view->GetNumLayers()); - return true; - }; - - const auto& regs = maxwell3d.regs; - const std::size_t num_attachments = static_cast(regs.rt_control.count); - for (std::size_t index = 0; index < num_attachments; ++index) { - if (try_push(color_attachments[index])) { - texture_cache.MarkColorBufferInUse(index); - } - } - if (try_push(zeta_attachment)) { - texture_cache.MarkDepthBufferInUse(); - } - - const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); - auto& framebuffer = fbentry->second; - if (is_cache_miss) { - framebuffer = device.GetLogical().CreateFramebuffer({ - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .renderPass = key.renderpass, - .attachmentCount = static_cast(key.views.size()), - .pAttachments = key.views.data(), - .width = key.width, - .height = key.height, - .layers = key.layers, - }); - } - - return {*framebuffer, VkExtent2D{key.width, key.height}}; -} - RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, bool is_indexed, @@ -885,9 +852,27 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt void RasterizerVulkan::SetupShaderDescriptors( const std::array& shaders) { - texture_cache.GuardSamplers(true); + image_view_indices.clear(); + sampler_handles.clear(); + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + Shader* const shader = shaders[stage + 1]; + if (!shader) { + continue; + } + const auto& entries = shader->GetEntries(); + SetupGraphicsUniformTexels(entries, stage); + SetupGraphicsTextures(entries, stage); + SetupGraphicsStorageTexels(entries, stage); + SetupGraphicsImages(entries, stage); + } + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + update_descriptor_queue.Acquire(); + + ImageViewId* image_view_id_ptr = image_view_ids.data(); + VkSampler* sampler_ptr = sampler_handles.data(); + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { // Skip VertexA stage Shader* const shader = shaders[stage + 1]; if (!shader) { @@ -896,39 +881,8 @@ void RasterizerVulkan::SetupShaderDescriptors( const auto& entries = shader->GetEntries(); SetupGraphicsConstBuffers(entries, stage); SetupGraphicsGlobalBuffers(entries, stage); - SetupGraphicsUniformTexels(entries, stage); - SetupGraphicsTextures(entries, stage); - SetupGraphicsStorageTexels(entries, stage); - SetupGraphicsImages(entries, stage); - } - texture_cache.GuardSamplers(false); -} - -void RasterizerVulkan::SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color, - const ZetaAttachment& zeta) { - TransitionImages(sampled_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT); - TransitionImages(image_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); - - for (std::size_t rt = 0; rt < color.size(); ++rt) { - const auto color_attachment = color[rt]; - if (color_attachment == nullptr) { - continue; - } - const auto image_layout = - texceptions[rt] ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - color_attachment->Transition(image_layout, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); - } - - if (zeta != nullptr) { - const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX] - ? VK_IMAGE_LAYOUT_GENERAL - : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - zeta->Transition(image_layout, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); + PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, + sampler_ptr); } } @@ -1000,7 +954,7 @@ void RasterizerVulkan::EndTransformFeedback() { void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { const auto& regs = maxwell3d.regs; - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { const auto& vertex_array = regs.vertex_array[index]; if (!vertex_array.IsEnabled()) { continue; @@ -1009,7 +963,7 @@ void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; ASSERT(end >= start); - const std::size_t size = end - start; + const size_t size = end - start; if (size == 0) { buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); continue; @@ -1070,7 +1024,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar } } -void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_ConstBuffers); const auto& shader_stage = maxwell3d.state.shader_stages[stage]; for (const auto& entry : entries.const_buffers) { @@ -1078,7 +1032,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s } } -void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); const auto& cbufs{maxwell3d.state.shader_stages[stage]}; @@ -1088,37 +1042,49 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, } } -void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); + const auto& regs = maxwell3d.regs; + const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : entries.uniform_texels) { - const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; - SetupUniformTexels(image, entry); + const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); + image_view_indices.push_back(handle.image); } } -void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); + const auto& regs = maxwell3d.regs; + const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : entries.samplers) { - for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(maxwell3d, entry, stage, i); - SetupTexture(texture, entry); + for (size_t index = 0; index < entry.size; ++index) { + const TextureHandle handle = + GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); } } } -void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); + const auto& regs = maxwell3d.regs; + const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : entries.storage_texels) { - const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; - SetupStorageTexel(image, entry); + const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); + image_view_indices.push_back(handle.image); } } -void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_Images); + const auto& regs = maxwell3d.regs; + const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : entries.images) { - const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic; - SetupImage(tic, entry); + const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); + image_view_indices.push_back(handle.image); } } @@ -1128,11 +1094,12 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { for (const auto& entry : entries.const_buffers) { const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); - Tegra::Engines::ConstBufferInfo buffer; - buffer.address = config.Address(); - buffer.size = config.size; - buffer.enabled = mask[entry.GetIndex()]; - SetupConstBuffer(entry, buffer); + const Tegra::Engines::ConstBufferInfo info{ + .address = config.Address(), + .size = config.size, + .enabled = mask[entry.GetIndex()], + }; + SetupConstBuffer(entry, info); } } @@ -1147,35 +1114,46 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : entries.uniform_texels) { - const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; - SetupUniformTexels(image, entry); + const TextureHandle handle = + GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); + image_view_indices.push_back(handle.image); } } void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : entries.samplers) { - for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i); - SetupTexture(texture, entry); + for (size_t index = 0; index < entry.size; ++index) { + const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, + COMPUTE_SHADER_INDEX, index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); } } } void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : entries.storage_texels) { - const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; - SetupStorageTexel(image, entry); + const TextureHandle handle = + GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); + image_view_indices.push_back(handle.image); } } void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Images); + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : entries.images) { - const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; - SetupImage(tic, entry); + const TextureHandle handle = + GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); + image_view_indices.push_back(handle.image); } } @@ -1186,14 +1164,12 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE); return; } - // Align the size to avoid bad std140 interactions - const std::size_t size = - Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); + const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); ASSERT(size <= MaxConstbufferSize); - const auto info = - buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); + const u64 alignment = device.GetUniformBufferAlignment(); + const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment); update_descriptor_queue.AddBuffer(info.handle, info.offset, size); } @@ -1206,7 +1182,7 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd // because Vulkan doesn't like empty buffers. // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the // default buffer. - static constexpr std::size_t dummy_size = 4; + static constexpr size_t dummy_size = 4; const auto info = buffer_cache.GetEmptyBuffer(dummy_size); update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); return; @@ -1217,55 +1193,6 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd update_descriptor_queue.AddBuffer(info.handle, info.offset, size); } -void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, - const UniformTexelEntry& entry) { - const auto view = texture_cache.GetTextureSurface(tic, entry); - ASSERT(view->IsBufferView()); - - update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); -} - -void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture, - const SamplerEntry& entry) { - auto view = texture_cache.GetTextureSurface(texture.tic, entry); - ASSERT(!view->IsBufferView()); - - const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source, - texture.tic.z_source, texture.tic.w_source); - const auto sampler = sampler_cache.GetSampler(texture.tsc); - update_descriptor_queue.AddSampledImage(sampler, image_view); - - VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); - *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - sampled_views.push_back(ImageView{std::move(view), image_layout}); -} - -void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic, - const StorageTexelEntry& entry) { - const auto view = texture_cache.GetImageSurface(tic, entry); - ASSERT(view->IsBufferView()); - - update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); -} - -void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { - auto view = texture_cache.GetImageSurface(tic, entry); - - if (entry.is_written) { - view->MarkAsModified(texture_cache.Tick()); - } - - UNIMPLEMENTED_IF(tic.IsBuffer()); - - const VkImageView image_view = - view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source); - update_descriptor_queue.AddImage(image_view); - - VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); - *image_layout = VK_IMAGE_LAYOUT_GENERAL; - image_views.push_back(ImageView{std::move(view), image_layout}); -} - void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchViewports()) { return; @@ -1457,8 +1384,8 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& }); } -std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { - std::size_t size = CalculateVertexArraysSize(); +size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { + size_t size = CalculateVertexArraysSize(); if (is_indexed) { size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); } @@ -1466,15 +1393,15 @@ std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) return size; } -std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { +size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { return Tegra::Engines::KeplerCompute::NumConstBuffers * (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); } -std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { +size_t RasterizerVulkan::CalculateVertexArraysSize() const { const auto& regs = maxwell3d.regs; - std::size_t size = 0; + size_t size = 0; for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { // This implementation assumes that all attributes are used in the shader. const GPUVAddr start{regs.vertex_array[index].StartAddress()}; @@ -1486,12 +1413,12 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { return size; } -std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { - return static_cast(maxwell3d.regs.index_array.count) * - static_cast(maxwell3d.regs.index_array.FormatSizeInBytes()); +size_t RasterizerVulkan::CalculateIndexBufferSize() const { + return static_cast(maxwell3d.regs.index_array.count) * + static_cast(maxwell3d.regs.index_array.FormatSizeInBytes()); } -std::size_t RasterizerVulkan::CalculateConstBufferSize( +size_t RasterizerVulkan::CalculateConstBufferSize( const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { if (entry.IsIndirect()) { // Buffer is accessed indirectly, so upload the entire thing @@ -1502,37 +1429,10 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize( } } -RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { - const auto& regs = maxwell3d.regs; - const std::size_t num_attachments = static_cast(regs.rt_control.count); - - RenderPassParams params; - params.color_formats = {}; - std::size_t color_texceptions = 0; - - std::size_t index = 0; - for (std::size_t rt = 0; rt < num_attachments; ++rt) { - const auto& rendertarget = regs.rt[rt]; - if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) { - continue; - } - params.color_formats[index] = static_cast(rendertarget.format); - color_texceptions |= (texceptions[rt] ? 1ULL : 0ULL) << index; - ++index; - } - params.num_color_attachments = static_cast(index); - params.texceptions = static_cast(color_texceptions); - - params.zeta_format = regs.zeta_enable ? static_cast(regs.zeta.format) : 0; - params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX]; - return params; -} - VkBuffer RasterizerVulkan::DefaultBuffer() { if (default_buffer) { return *default_buffer; } - default_buffer = device.GetLogical().CreateBuffer({ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 30ec58eb4..990f9e031 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -11,11 +11,11 @@ #include #include -#include #include "common/common_types.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_interface.h" +#include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" @@ -24,10 +24,9 @@ #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/vk_sampler_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -49,60 +48,9 @@ namespace Vulkan { struct VKScreenInfo; -using ImageViewsPack = boost::container::static_vector; - -struct FramebufferCacheKey { - VkRenderPass renderpass{}; - u32 width = 0; - u32 height = 0; - u32 layers = 0; - ImageViewsPack views; - - std::size_t Hash() const noexcept { - std::size_t hash = 0; - boost::hash_combine(hash, static_cast(renderpass)); - for (const auto& view : views) { - boost::hash_combine(hash, static_cast(view)); - } - boost::hash_combine(hash, width); - boost::hash_combine(hash, height); - boost::hash_combine(hash, layers); - return hash; - } - - bool operator==(const FramebufferCacheKey& rhs) const noexcept { - return std::tie(renderpass, views, width, height, layers) == - std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers); - } - - bool operator!=(const FramebufferCacheKey& rhs) const noexcept { - return !operator==(rhs); - } -}; - -} // namespace Vulkan - -namespace std { - -template <> -struct hash { - std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace Vulkan { - class StateTracker; class BufferBindings; -struct ImageView { - View view; - VkImageLayout* layout = nullptr; -}; - class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { public: explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, @@ -123,15 +71,18 @@ public: void InvalidateRegion(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override; void SyncGuestHost() override; + void UnmapMemory(VAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; void ReleaseFences() override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void WaitForIdle() override; + void FragmentBarrier() override; + void TiledCacheBarrier() override; void FlushCommands() override; void TickFrame() override; - bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, + bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; @@ -145,11 +96,17 @@ public: } /// Maximum supported size that a constbuffer can have in bytes. - static constexpr std::size_t MaxConstbufferSize = 0x10000; + static constexpr size_t MaxConstbufferSize = 0x10000; static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); private: + static constexpr size_t MAX_TEXTURES = 192; + static constexpr size_t MAX_IMAGES = 48; + static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; + + static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); + struct DrawParameters { void Draw(vk::CommandBuffer cmdbuf) const; @@ -160,23 +117,8 @@ private: bool is_indexed = 0; }; - using ColorAttachments = std::array; - using ZetaAttachment = View; - - using Texceptions = std::bitset; - - static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8; - static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); - void FlushWork(); - /// @brief Updates the currently bound attachments - /// @param is_clear True when the framebuffer is updated as a clear - /// @return Bitfield of attachments being used as sampled textures - Texceptions UpdateAttachments(bool is_clear); - - std::tuple ConfigureFramebuffers(VkRenderPass renderpass); - /// Setups geometry buffers and state. DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, bool is_indexed, bool is_instanced); @@ -184,17 +126,12 @@ private: /// Setup descriptors in the graphics pipeline. void SetupShaderDescriptors(const std::array& shaders); - void SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color, - const ZetaAttachment& zeta); - void UpdateDynamicStates(); void BeginTransformFeedback(); void EndTransformFeedback(); - bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); - void SetupVertexArrays(BufferBindings& buffer_bindings); void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); @@ -240,14 +177,6 @@ private: void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); - void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry); - - void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); - - void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry); - - void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); - void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); @@ -264,18 +193,16 @@ private: void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); - std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; + size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; - std::size_t CalculateComputeStreamBufferSize() const; + size_t CalculateComputeStreamBufferSize() const; - std::size_t CalculateVertexArraysSize() const; + size_t CalculateVertexArraysSize() const; - std::size_t CalculateIndexBufferSize() const; + size_t CalculateIndexBufferSize() const; - std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry, - const Tegra::Engines::ConstBufferInfo& buffer) const; - - RenderPassParams GetRenderPassParams(Texceptions texceptions) const; + size_t CalculateConstBufferSize(const ConstBufferEntry& entry, + const Tegra::Engines::ConstBufferInfo& buffer) const; VkBuffer DefaultBuffer(); @@ -290,18 +217,19 @@ private: StateTracker& state_tracker; VKScheduler& scheduler; + VKStreamBuffer stream_buffer; VKStagingBufferPool staging_pool; VKDescriptorPool descriptor_pool; VKUpdateDescriptorQueue update_descriptor_queue; - VKRenderPassCache renderpass_cache; + BlitImageHelper blit_image; QuadArrayPass quad_array_pass; QuadIndexedPass quad_indexed_pass; Uint8Pass uint8_pass; - VKTextureCache texture_cache; + TextureCacheRuntime texture_cache_runtime; + TextureCache texture_cache; VKPipelineCache pipeline_cache; VKBufferCache buffer_cache; - VKSamplerCache sampler_cache; VKQueryCache query_cache; VKFenceManager fence_manager; @@ -310,16 +238,11 @@ private: vk::Event wfi_event; VideoCommon::Shader::AsyncShaders async_shaders; - ColorAttachments color_attachments; - ZetaAttachment zeta_attachment; - - std::vector sampled_views; - std::vector image_views; + boost::container::static_vector image_view_indices; + std::array image_view_ids; + boost::container::static_vector sampler_handles; u32 draw_counter = 0; - - // TODO(Rodrigo): Invalidate on image destruction - std::unordered_map framebuffer_cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp deleted file mode 100644 index e812c7dd6..000000000 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/cityhash.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan { - -std::size_t RenderPassParams::Hash() const noexcept { - const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); - return static_cast(hash); -} - -bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept { - return std::memcmp(&rhs, this, sizeof *this) == 0; -} - -VKRenderPassCache::VKRenderPassCache(const VKDevice& device_) : device{device_} {} - -VKRenderPassCache::~VKRenderPassCache() = default; - -VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { - const auto [pair, is_cache_miss] = cache.try_emplace(params); - auto& entry = pair->second; - if (is_cache_miss) { - entry = CreateRenderPass(params); - } - return *entry; -} - -vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { - using namespace VideoCore::Surface; - const std::size_t num_attachments = static_cast(params.num_color_attachments); - - std::vector descriptors; - descriptors.reserve(num_attachments); - - std::vector color_references; - color_references.reserve(num_attachments); - - for (std::size_t rt = 0; rt < num_attachments; ++rt) { - const auto guest_format = static_cast(params.color_formats[rt]); - const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format); - const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format); - ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", - static_cast(pixel_format)); - - // TODO(Rodrigo): Add MAY_ALIAS_BIT when it's needed. - const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0 - ? VK_IMAGE_LAYOUT_GENERAL - : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - descriptors.push_back({ - .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, - .format = format.format, - .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, - .initialLayout = color_layout, - .finalLayout = color_layout, - }); - - color_references.push_back({ - .attachment = static_cast(rt), - .layout = color_layout, - }); - } - - VkAttachmentReference zeta_attachment_ref; - const bool has_zeta = params.zeta_format != 0; - if (has_zeta) { - const auto guest_format = static_cast(params.zeta_format); - const PixelFormat pixel_format = PixelFormatFromDepthFormat(guest_format); - const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format); - ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", - static_cast(pixel_format)); - - const VkImageLayout zeta_layout = params.zeta_texception != 0 - ? VK_IMAGE_LAYOUT_GENERAL - : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - descriptors.push_back({ - .flags = 0, - .format = format.format, - .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = zeta_layout, - .finalLayout = zeta_layout, - }); - - zeta_attachment_ref = { - .attachment = static_cast(num_attachments), - .layout = zeta_layout, - }; - } - - const VkSubpassDescription subpass_description{ - .flags = 0, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .pInputAttachments = nullptr, - .colorAttachmentCount = static_cast(color_references.size()), - .pColorAttachments = color_references.data(), - .pResolveAttachments = nullptr, - .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr, - .preserveAttachmentCount = 0, - .pPreserveAttachments = nullptr, - }; - - VkAccessFlags access = 0; - VkPipelineStageFlags stage = 0; - if (!color_references.empty()) { - access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - } - - if (has_zeta) { - access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - } - - const VkSubpassDependency subpass_dependency{ - .srcSubpass = VK_SUBPASS_EXTERNAL, - .dstSubpass = 0, - .srcStageMask = stage, - .dstStageMask = stage, - .srcAccessMask = 0, - .dstAccessMask = access, - .dependencyFlags = 0, - }; - - return device.GetLogical().CreateRenderPass({ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .attachmentCount = static_cast(descriptors.size()), - .pAttachments = descriptors.data(), - .subpassCount = 1, - .pSubpasses = &subpass_description, - .dependencyCount = 1, - .pDependencies = &subpass_dependency, - }); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h deleted file mode 100644 index 652ecef7b..000000000 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include -#include - -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/surface.h" - -namespace Vulkan { - -class VKDevice; - -struct RenderPassParams { - std::array color_formats; - u8 num_color_attachments; - u8 texceptions; - - u8 zeta_format; - u8 zeta_texception; - - std::size_t Hash() const noexcept; - - bool operator==(const RenderPassParams& rhs) const noexcept; - - bool operator!=(const RenderPassParams& rhs) const noexcept { - return !operator==(rhs); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - -} // namespace Vulkan - -namespace std { - -template <> -struct hash { - std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace Vulkan { - -class VKRenderPassCache final { -public: - explicit VKRenderPassCache(const VKDevice& device_); - ~VKRenderPassCache(); - - VkRenderPass GetRenderPass(const RenderPassParams& params); - -private: - vk::RenderPass CreateRenderPass(const RenderPassParams& params) const; - - const VKDevice& device; - std::unordered_map cache; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp deleted file mode 100644 index b859691fa..000000000 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_sampler_cache.h" -#include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/textures/texture.h" - -using Tegra::Texture::TextureMipmapFilter; - -namespace Vulkan { - -namespace { - -VkBorderColor ConvertBorderColor(std::array color) { - // TODO(Rodrigo): Manage integer border colors - if (color == std::array{0, 0, 0, 0}) { - return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; - } else if (color == std::array{0, 0, 0, 1}) { - return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; - } else if (color == std::array{1, 1, 1, 1}) { - return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; - } - if (color[0] + color[1] + color[2] > 1.35f) { - // If color elements are brighter than roughly 0.5 average, use white border - return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; - } else if (color[3] > 0.5f) { - return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; - } else { - return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; - } -} - -} // Anonymous namespace - -VKSamplerCache::VKSamplerCache(const VKDevice& device_) : device{device_} {} - -VKSamplerCache::~VKSamplerCache() = default; - -vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { - const bool arbitrary_borders = device.IsExtCustomBorderColorSupported(); - const std::array color = tsc.GetBorderColor(); - - VkSamplerCustomBorderColorCreateInfoEXT border{ - .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, - .pNext = nullptr, - .customBorderColor = {}, - .format = VK_FORMAT_UNDEFINED, - }; - std::memcpy(&border.customBorderColor, color.data(), sizeof(color)); - - return device.GetLogical().CreateSampler({ - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .pNext = arbitrary_borders ? &border : nullptr, - .flags = 0, - .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), - .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), - .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), - .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), - .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), - .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), - .mipLodBias = tsc.GetLodBias(), - .anisotropyEnable = - static_cast(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE), - .maxAnisotropy = tsc.GetMaxAnisotropy(), - .compareEnable = tsc.depth_compare_enabled, - .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), - .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(), - .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(), - .borderColor = - arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), - .unnormalizedCoordinates = VK_FALSE, - }); -} - -VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const { - return *sampler; -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h deleted file mode 100644 index 3f22c4610..000000000 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/sampler_cache.h" -#include "video_core/textures/texture.h" - -namespace Vulkan { - -class VKDevice; - -class VKSamplerCache final : public VideoCommon::SamplerCache { -public: - explicit VKSamplerCache(const VKDevice& device_); - ~VKSamplerCache(); - -protected: - vk::Sampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; - - VkSampler ToSamplerType(const vk::Sampler& sampler) const override; - -private: - const VKDevice& device; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 1a483dc71..c104c6fe3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -16,6 +16,7 @@ #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -96,38 +97,39 @@ void VKScheduler::DispatchWork() { AcquireNewChunk(); } -void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, - VkExtent2D render_area) { - if (renderpass == state.renderpass && framebuffer == state.framebuffer && +void VKScheduler::RequestRenderpass(const Framebuffer* framebuffer) { + const VkRenderPass renderpass = framebuffer->RenderPass(); + const VkFramebuffer framebuffer_handle = framebuffer->Handle(); + const VkExtent2D render_area = framebuffer->RenderArea(); + if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer && render_area.width == state.render_area.width && render_area.height == state.render_area.height) { return; } - const bool end_renderpass = state.renderpass != nullptr; + EndRenderPass(); state.renderpass = renderpass; - state.framebuffer = framebuffer; + state.framebuffer = framebuffer_handle; state.render_area = render_area; - const VkRenderPassBeginInfo renderpass_bi{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .pNext = nullptr, - .renderPass = renderpass, - .framebuffer = framebuffer, - .renderArea = - { - .offset = {.x = 0, .y = 0}, - .extent = render_area, - }, - .clearValueCount = 0, - .pClearValues = nullptr, - }; - - Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) { - if (end_renderpass) { - cmdbuf.EndRenderPass(); - } + Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) { + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = renderpass, + .framebuffer = framebuffer_handle, + .renderArea = + { + .offset = {.x = 0, .y = 0}, + .extent = render_area, + }, + .clearValueCount = 0, + .pClearValues = nullptr, + }; cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); }); + num_renderpass_images = framebuffer->NumImages(); + renderpass_images = framebuffer->Images(); + renderpass_image_ranges = framebuffer->ImageRanges(); } void VKScheduler::RequestOutsideRenderPassOperationContext() { @@ -241,8 +243,37 @@ void VKScheduler::EndRenderPass() { if (!state.renderpass) { return; } + Record([num_images = num_renderpass_images, images = renderpass_images, + ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) { + std::array barriers; + for (size_t i = 0; i < num_images; ++i) { + barriers[i] = VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = images[i], + .subresourceRange = ranges[i], + }; + } + cmdbuf.EndRenderPass(); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr, + vk::Span(barriers.data(), num_images)); + }); state.renderpass = nullptr; - Record([](vk::CommandBuffer cmdbuf) { cmdbuf.EndRenderPass(); }); + num_renderpass_images = 0; } void VKScheduler::AcquireNewChunk() { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 6d3a5da0b..0a36c8fad 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -17,6 +17,7 @@ namespace Vulkan { class CommandPool; +class Framebuffer; class MasterSemaphore; class StateTracker; class VKDevice; @@ -52,8 +53,7 @@ public: void DispatchWork(); /// Requests to begin a renderpass. - void RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, - VkExtent2D render_area); + void RequestRenderpass(const Framebuffer* framebuffer); /// Requests the current executino context to be able to execute operations only allowed outside /// of a renderpass. @@ -62,6 +62,9 @@ public: /// Binds a pipeline to the current execution context. void BindGraphicsPipeline(VkPipeline pipeline); + /// Invalidates current command buffer state except for render passes + void InvalidateState(); + /// Assigns the query cache. void SetQueryCache(VKQueryCache& query_cache_) { query_cache = &query_cache_; @@ -170,8 +173,6 @@ private: void AllocateNewContext(); - void InvalidateState(); - void EndPendingOperations(); void EndRenderPass(); @@ -192,6 +193,11 @@ private: std::thread worker_thread; State state; + + u32 num_renderpass_images = 0; + std::array renderpass_images{}; + std::array renderpass_image_ranges{}; + Common::SPSCQueue> chunk_queue; Common::SPSCQueue> chunk_reserve; std::mutex mutex; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 72954d0e3..09d6f9f35 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -102,7 +102,7 @@ struct GenericVaryingDescription { bool is_scalar = false; }; -spv::Dim GetSamplerDim(const Sampler& sampler) { +spv::Dim GetSamplerDim(const SamplerEntry& sampler) { ASSERT(!sampler.is_buffer); switch (sampler.type) { case Tegra::Shader::TextureType::Texture1D: @@ -119,7 +119,7 @@ spv::Dim GetSamplerDim(const Sampler& sampler) { } } -std::pair GetImageDim(const Image& image) { +std::pair GetImageDim(const ImageEntry& image) { switch (image.type) { case Tegra::Shader::ImageType::Texture1D: return {spv::Dim::Dim1D, false}; @@ -980,7 +980,7 @@ private: return binding; } - void DeclareImage(const Image& image, u32& binding) { + void DeclareImage(const ImageEntry& image, u32& binding) { const auto [dim, arrayed] = GetImageDim(image); constexpr int depth = 0; constexpr bool ms = false; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index df1812514..ad91ad5de 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -21,10 +21,10 @@ class VKDevice; namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using UniformTexelEntry = VideoCommon::Shader::Sampler; -using SamplerEntry = VideoCommon::Shader::Sampler; -using StorageTexelEntry = VideoCommon::Shader::Image; -using ImageEntry = VideoCommon::Shader::Image; +using UniformTexelEntry = VideoCommon::Shader::SamplerEntry; +using SamplerEntry = VideoCommon::Shader::SamplerEntry; +using StorageTexelEntry = VideoCommon::Shader::ImageEntry; +using ImageEntry = VideoCommon::Shader::ImageEntry; constexpr u32 DESCRIPTOR_SET = 0; diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index c1a218d76..38a0be7f2 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -13,18 +13,13 @@ namespace Vulkan { -vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { - // Avoid undefined behavior by copying to a staging allocation - ASSERT(code_size % sizeof(u32) == 0); - const auto data = std::make_unique(code_size / sizeof(u32)); - std::memcpy(data.get(), code_data, code_size); - +vk::ShaderModule BuildShader(const VKDevice& device, std::span code) { return device.GetLogical().CreateShaderModule({ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .codeSize = code_size, - .pCode = data.get(), + .codeSize = static_cast(code.size_bytes()), + .pCode = code.data(), }); } diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h index d1d3f3cae..dce34a140 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.h +++ b/src/video_core/renderer_vulkan/vk_shader_util.h @@ -4,6 +4,8 @@ #pragma once +#include + #include "common/common_types.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -11,6 +13,6 @@ namespace Vulkan { class VKDevice; -vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); +vk::ShaderModule BuildShader(const VKDevice& device, std::span code); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 50164cc08..1779a2e30 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include @@ -29,21 +30,15 @@ using Table = Maxwell3D::DirtyState::Table; using Flags = Maxwell3D::DirtyState::Flags; Flags MakeInvalidationFlags() { + static constexpr std::array INVALIDATION_FLAGS{ + Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, + StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, + DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, + }; Flags flags{}; - flags[Viewports] = true; - flags[Scissors] = true; - flags[DepthBias] = true; - flags[BlendConstants] = true; - flags[DepthBounds] = true; - flags[StencilProperties] = true; - flags[CullMode] = true; - flags[DepthBoundsEnable] = true; - flags[DepthTestEnable] = true; - flags[DepthWriteEnable] = true; - flags[DepthCompareOp] = true; - flags[FrontFace] = true; - flags[StencilOp] = true; - flags[StencilTestEnable] = true; + for (const int flag : INVALIDATION_FLAGS) { + flags[flag] = true; + } return flags; } diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 1de789e57..c335d2bdf 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -52,6 +52,14 @@ public: current_topology = INVALID_TOPOLOGY; } + void InvalidateViewports() { + flags[Dirty::Viewports] = true; + } + + void InvalidateScissors() { + flags[Dirty::Scissors] = true; + } + bool TouchViewports() { return Exchange(Dirty::Viewports, false); } diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 1b59612b9..419cb154d 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -19,6 +19,10 @@ namespace Vulkan { namespace { +constexpr VkBufferUsageFlags BUFFER_USAGE = + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; @@ -56,17 +60,16 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties, } // Anonymous namespace -VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_, - VkBufferUsageFlags usage) +VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_) : device{device_}, scheduler{scheduler_} { - CreateBuffers(usage); + CreateBuffers(); ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); } VKStreamBuffer::~VKStreamBuffer() = default; -std::tuple VKStreamBuffer::Map(u64 size, u64 alignment) { +std::pair VKStreamBuffer::Map(u64 size, u64 alignment) { ASSERT(size <= stream_buffer_size); mapped_size = size; @@ -76,7 +79,6 @@ std::tuple VKStreamBuffer::Map(u64 size, u64 alignment) { WaitPendingOperations(offset); - bool invalidated = false; if (offset + size > stream_buffer_size) { // The buffer would overflow, save the amount of used watches and reset the state. invalidation_mark = current_watch_cursor; @@ -90,11 +92,9 @@ std::tuple VKStreamBuffer::Map(u64 size, u64 alignment) { // Ensure that we don't wait for uncommitted fences. scheduler.Flush(); - - invalidated = true; } - return {memory.Map(offset, size), offset, invalidated}; + return std::make_pair(memory.Map(offset, size), offset); } void VKStreamBuffer::Unmap(u64 size) { @@ -113,7 +113,7 @@ void VKStreamBuffer::Unmap(u64 size) { watch.tick = scheduler.CurrentTick(); } -void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { +void VKStreamBuffer::CreateBuffers() { const auto memory_properties = device.GetPhysical().GetMemoryProperties(); const u32 preferred_type = GetMemoryType(memory_properties); const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex; @@ -127,7 +127,7 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { .pNext = nullptr, .flags = 0, .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size), - .usage = usage, + .usage = BUFFER_USAGE, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 5e15ad78f..1428f77bf 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -5,7 +5,7 @@ #pragma once #include -#include +#include #include #include "common/common_types.h" @@ -19,17 +19,15 @@ class VKScheduler; class VKStreamBuffer final { public: - explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, - VkBufferUsageFlags usage); + explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler); ~VKStreamBuffer(); /** * Reserves a region of memory from the stream buffer. * @param size Size to reserve. - * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer - * offset and a boolean that's true when buffer has been invalidated. + * @returns A pair of a raw memory pointer (with offset added), and the buffer offset */ - std::tuple Map(u64 size, u64 alignment); + std::pair Map(u64 size, u64 alignment); /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. void Unmap(u64 size); @@ -49,7 +47,7 @@ private: }; /// Creates Vulkan buffer handles committing the required the required memory. - void CreateBuffers(VkBufferUsageFlags usage); + void CreateBuffers(); /// Increases the amount of watches available. void ReserveWatches(std::vector& watches, std::size_t grow_size); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index ae2e3322c..261808391 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -4,614 +4,1103 @@ #include #include -#include -#include -#include -#include +#include #include -#include "common/assert.h" -#include "common/common_types.h" -#include "core/core.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/morton.h" +#include "video_core/engines/fermi_2d.h" +#include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/surface.h" namespace Vulkan { -using VideoCore::MortonSwizzle; -using VideoCore::MortonSwizzleMode; - +using Tegra::Engines::Fermi2D; using Tegra::Texture::SwizzleSource; -using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceTarget; +using Tegra::Texture::TextureMipmapFilter; +using VideoCommon::BufferImageCopy; +using VideoCommon::ImageInfo; +using VideoCommon::ImageType; +using VideoCommon::SubresourceRange; +using VideoCore::Surface::IsPixelFormatASTC; namespace { -VkImageType SurfaceTargetToImage(SurfaceTarget target) { - switch (target) { - case SurfaceTarget::Texture1D: - case SurfaceTarget::Texture1DArray: - return VK_IMAGE_TYPE_1D; - case SurfaceTarget::Texture2D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: - return VK_IMAGE_TYPE_2D; - case SurfaceTarget::Texture3D: - return VK_IMAGE_TYPE_3D; - case SurfaceTarget::TextureBuffer: - UNREACHABLE(); - return {}; - } - UNREACHABLE_MSG("Unknown texture target={}", target); - return {}; -} +constexpr std::array ATTACHMENT_REFERENCES{ + VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, +}; -VkImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { - if (pixel_format < PixelFormat::MaxColorFormat) { - return VK_IMAGE_ASPECT_COLOR_BIT; - } else if (pixel_format < PixelFormat::MaxDepthFormat) { - return VK_IMAGE_ASPECT_DEPTH_BIT; - } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { - return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; +constexpr VkBorderColor ConvertBorderColor(const std::array& color) { + if (color == std::array{0, 0, 0, 0}) { + return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + } else if (color == std::array{0, 0, 0, 1}) { + return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; + } else if (color == std::array{1, 1, 1, 1}) { + return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + } + if (color[0] + color[1] + color[2] > 1.35f) { + // If color elements are brighter than roughly 0.5 average, use white border + return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + } else if (color[3] > 0.5f) { + return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; } else { - UNREACHABLE_MSG("Invalid pixel format={}", pixel_format); - return VK_IMAGE_ASPECT_COLOR_BIT; + return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; } } -VkImageViewType GetImageViewType(SurfaceTarget target) { - switch (target) { - case SurfaceTarget::Texture1D: - return VK_IMAGE_VIEW_TYPE_1D; - case SurfaceTarget::Texture2D: - return VK_IMAGE_VIEW_TYPE_2D; - case SurfaceTarget::Texture3D: - return VK_IMAGE_VIEW_TYPE_3D; - case SurfaceTarget::Texture1DArray: - return VK_IMAGE_VIEW_TYPE_1D_ARRAY; - case SurfaceTarget::Texture2DArray: - return VK_IMAGE_VIEW_TYPE_2D_ARRAY; - case SurfaceTarget::TextureCubemap: - return VK_IMAGE_VIEW_TYPE_CUBE; - case SurfaceTarget::TextureCubeArray: - return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; - case SurfaceTarget::TextureBuffer: +[[nodiscard]] VkImageType ConvertImageType(const ImageType type) { + switch (type) { + case ImageType::e1D: + return VK_IMAGE_TYPE_1D; + case ImageType::e2D: + case ImageType::Linear: + return VK_IMAGE_TYPE_2D; + case ImageType::e3D: + return VK_IMAGE_TYPE_3D; + case ImageType::Buffer: break; } - UNREACHABLE(); + UNREACHABLE_MSG("Invalid image type={}", type); return {}; } -vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, - std::size_t host_memory_size) { - // TODO(Rodrigo): Move texture buffer creation to the buffer cache - return device.GetLogical().CreateBuffer({ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = static_cast(host_memory_size), - .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }); +[[nodiscard]] VkSampleCountFlagBits ConvertSampleCount(u32 num_samples) { + switch (num_samples) { + case 1: + return VK_SAMPLE_COUNT_1_BIT; + case 2: + return VK_SAMPLE_COUNT_2_BIT; + case 4: + return VK_SAMPLE_COUNT_4_BIT; + case 8: + return VK_SAMPLE_COUNT_8_BIT; + case 16: + return VK_SAMPLE_COUNT_16_BIT; + default: + UNREACHABLE_MSG("Invalid number of samples={}", num_samples); + return VK_SAMPLE_COUNT_1_BIT; + } } -VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, - const SurfaceParams& params, VkBuffer buffer, - std::size_t host_memory_size) { - ASSERT(params.IsBuffer()); - - return { - .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .buffer = buffer, - .format = - MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format, - .offset = 0, - .range = static_cast(host_memory_size), - }; -} - -VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { - ASSERT(!params.IsBuffer()); - - const auto [format, attachable, storage] = - MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); - - VkImageCreateInfo ci{ +[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const VKDevice& device, const ImageInfo& info) { + const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, info.format); + VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + if (info.type == ImageType::e2D && info.resources.layers >= 6 && + info.size.width == info.size.height) { + flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + } + if (info.type == ImageType::e3D) { + flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; + } + VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT; + if (format_info.attachable) { + switch (VideoCore::Surface::GetFormatType(info.format)) { + case VideoCore::Surface::SurfaceType::ColorTexture: + usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + break; + case VideoCore::Surface::SurfaceType::Depth: + case VideoCore::Surface::SurfaceType::DepthStencil: + usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + break; + default: + UNREACHABLE_MSG("Invalid surface type"); + } + } + if (format_info.storage) { + usage |= VK_IMAGE_USAGE_STORAGE_BIT; + } + const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(info.num_samples); + return VkImageCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, - .flags = 0, - .imageType = SurfaceTargetToImage(params.target), - .format = format, - .extent = {}, - .mipLevels = params.num_levels, - .arrayLayers = static_cast(params.GetNumLayers()), - .samples = VK_SAMPLE_COUNT_1_BIT, + .flags = flags, + .imageType = ConvertImageType(info.type), + .format = format_info.format, + .extent = + { + .width = info.size.width >> samples_x, + .height = info.size.height >> samples_y, + .depth = info.size.depth, + }, + .mipLevels = static_cast(info.resources.levels), + .arrayLayers = static_cast(info.resources.layers), + .samples = ConvertSampleCount(info.num_samples), .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + .usage = usage, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }; - if (attachable) { - ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT - : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - } - if (storage) { - ci.usage |= VK_IMAGE_USAGE_STORAGE_BIT; - } - - switch (params.target) { - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: - ci.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; - [[fallthrough]]; - case SurfaceTarget::Texture1D: - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2D: - case SurfaceTarget::Texture2DArray: - ci.extent = {params.width, params.height, 1}; - break; - case SurfaceTarget::Texture3D: - ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; - ci.extent = {params.width, params.height, params.depth}; - break; - case SurfaceTarget::TextureBuffer: - UNREACHABLE(); - } - - return ci; } -u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, - SwizzleSource w_source) { - return (static_cast(x_source) << 24) | (static_cast(y_source) << 16) | - (static_cast(z_source) << 8) | static_cast(w_source); -} - -} // Anonymous namespace - -CachedSurface::CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_, - VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, - GPUVAddr gpu_addr_, const SurfaceParams& params_) - : SurfaceBase{gpu_addr_, params_, device_.IsOptimalAstcSupported()}, device{device_}, - memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{staging_pool_} { - if (params.IsBuffer()) { - buffer = CreateBuffer(device, params, host_memory_size); - commit = memory_manager.Commit(buffer, false); - - const auto buffer_view_ci = - GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size); - format = buffer_view_ci.format; - - buffer_view = device.GetLogical().CreateBufferView(buffer_view_ci); - } else { - const auto image_ci = GenerateImageCreateInfo(device, params); - format = image_ci.format; - - image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format)); - commit = memory_manager.Commit(image->GetHandle(), false); +[[nodiscard]] vk::Image MakeImage(const VKDevice& device, const ImageInfo& info) { + if (info.type == ImageType::Buffer) { + return vk::Image{}; } + return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); +} - // TODO(Rodrigo): Move this to a virtual function. - u32 num_layers = 1; - if (params.is_layered || params.target == SurfaceTarget::Texture3D) { - num_layers = params.depth; +[[nodiscard]] vk::Buffer MakeBuffer(const VKDevice& device, const ImageInfo& info) { + if (info.type != ImageType::Buffer) { + return vk::Buffer{}; } - main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels)); -} - -CachedSurface::~CachedSurface() = default; - -void CachedSurface::UploadTexture(const std::vector& staging_buffer) { - // To upload data we have to be outside of a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); - - if (params.IsBuffer()) { - UploadBuffer(staging_buffer); - } else { - UploadImage(staging_buffer); - } -} - -void CachedSurface::DownloadTexture(std::vector& staging_buffer) { - UNIMPLEMENTED_IF(params.IsBuffer()); - - if (params.pixel_format == PixelFormat::A1B5G5R5_UNORM) { - LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed"); - } - - // We can't copy images to buffers inside a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); - - FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - - const auto& unused_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); - // TODO(Rodrigo): Do this in a single copy - for (u32 level = 0; level < params.num_levels; ++level) { - scheduler.Record([image = *image->GetHandle(), buffer = *unused_buffer.handle, - copy = GetBufferImageCopy(level)](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); - }); - } - scheduler.Finish(); - - // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy. - std::memcpy(staging_buffer.data(), unused_buffer.commit->Map(host_memory_size), - host_memory_size); -} - -void CachedSurface::DecorateSurfaceName() { - // TODO(Rodrigo): Add name decorations -} - -View CachedSurface::CreateView(const ViewParams& view_params) { - // TODO(Rodrigo): Add name decorations - return views[view_params] = std::make_shared(device, *this, view_params); -} - -void CachedSurface::UploadBuffer(const std::vector& staging_buffer) { - const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); - std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); - - scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, - size = host_memory_size](vk::CommandBuffer cmdbuf) { - VkBufferCopy copy; - copy.srcOffset = 0; - copy.dstOffset = 0; - copy.size = size; - cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); - - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // They'll be ignored anyway - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = dst_buffer; - barrier.offset = 0; - barrier.size = size; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, - 0, {}, barrier, {}); + const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format); + return device.GetLogical().CreateBuffer(VkBufferCreateInfo{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = info.size.width * bytes_per_block, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, }); } -void CachedSurface::UploadImage(const std::vector& staging_buffer) { - const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); - std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); - - FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - for (u32 level = 0; level < params.num_levels; ++level) { - const VkBufferImageCopy copy = GetBufferImageCopy(level); - if (image->GetAspectMask() == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), - copy](vk::CommandBuffer cmdbuf) { - std::array copies = {copy, copy}; - copies[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - copies[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; - cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - copies); - }); - } else { - scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), - copy](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); - }); - } +[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { + switch (VideoCore::Surface::GetFormatType(format)) { + case VideoCore::Surface::SurfaceType::ColorTexture: + return VK_IMAGE_ASPECT_COLOR_BIT; + case VideoCore::Surface::SurfaceType::Depth: + return VK_IMAGE_ASPECT_DEPTH_BIT; + case VideoCore::Surface::SurfaceType::DepthStencil: + return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + default: + UNREACHABLE_MSG("Invalid surface type"); + return VkImageAspectFlags{}; } } -VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { - return { - .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted), - .bufferRowLength = 0, - .bufferImageHeight = 0, - .imageSubresource = - { - .aspectMask = image->GetAspectMask(), - .mipLevel = level, - .baseArrayLayer = 0, - .layerCount = static_cast(params.GetNumLayers()), - }, - .imageOffset = {.x = 0, .y = 0, .z = 0}, - .imageExtent = - { - .width = params.GetMipWidth(level), - .height = params.GetMipHeight(level), - .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U, - }, +[[nodiscard]] VkImageAspectFlags ImageViewAspectMask(const VideoCommon::ImageViewInfo& info) { + if (info.IsRenderTarget()) { + return ImageAspectMask(info.format); + } + const bool is_first = info.Swizzle()[0] == SwizzleSource::R; + switch (info.format) { + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: + return is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; + case PixelFormat::S8_UINT_D24_UNORM: + return is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; + case PixelFormat::D16_UNORM: + case PixelFormat::D32_FLOAT: + return VK_IMAGE_ASPECT_DEPTH_BIT; + default: + return VK_IMAGE_ASPECT_COLOR_BIT; + } +} + +[[nodiscard]] VkAttachmentDescription AttachmentDescription(const VKDevice& device, + const ImageView* image_view) { + const auto pixel_format = image_view->format; + return VkAttachmentDescription{ + .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format).format, + .samples = image_view->Samples(), + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, }; } -VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { - return {image->GetAspectMask(), 0, params.num_levels, 0, - static_cast(params.GetNumLayers())}; +[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { + switch (swizzle) { + case SwizzleSource::Zero: + return VK_COMPONENT_SWIZZLE_ZERO; + case SwizzleSource::R: + return VK_COMPONENT_SWIZZLE_R; + case SwizzleSource::G: + return VK_COMPONENT_SWIZZLE_G; + case SwizzleSource::B: + return VK_COMPONENT_SWIZZLE_B; + case SwizzleSource::A: + return VK_COMPONENT_SWIZZLE_A; + case SwizzleSource::OneFloat: + case SwizzleSource::OneInt: + return VK_COMPONENT_SWIZZLE_ONE; + } + UNREACHABLE_MSG("Invalid swizzle={}", swizzle); + return VK_COMPONENT_SWIZZLE_ZERO; } -CachedSurfaceView::CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_, - const ViewParams& view_params_) - : ViewBase{view_params_}, surface_params{surface_.GetSurfaceParams()}, - image{surface_.GetImageHandle()}, buffer_view{surface_.GetBufferViewHandle()}, - aspect_mask{surface_.GetAspectMask()}, device{device_}, surface{surface_}, - base_level{view_params_.base_level}, num_levels{view_params_.num_levels}, - image_view_type{image ? GetImageViewType(view_params_.target) : VK_IMAGE_VIEW_TYPE_1D} { - if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { - base_layer = 0; - num_layers = 1; - base_slice = view_params_.base_layer; - num_slices = view_params_.num_layers; - } else { - base_layer = view_params_.base_layer; - num_layers = view_params_.num_layers; +[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { + switch (type) { + case VideoCommon::ImageViewType::e1D: + return VK_IMAGE_VIEW_TYPE_1D; + case VideoCommon::ImageViewType::e2D: + return VK_IMAGE_VIEW_TYPE_2D; + case VideoCommon::ImageViewType::Cube: + return VK_IMAGE_VIEW_TYPE_CUBE; + case VideoCommon::ImageViewType::e3D: + return VK_IMAGE_VIEW_TYPE_3D; + case VideoCommon::ImageViewType::e1DArray: + return VK_IMAGE_VIEW_TYPE_1D_ARRAY; + case VideoCommon::ImageViewType::e2DArray: + return VK_IMAGE_VIEW_TYPE_2D_ARRAY; + case VideoCommon::ImageViewType::CubeArray: + return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; + case VideoCommon::ImageViewType::Rect: + LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported"); + return VK_IMAGE_VIEW_TYPE_2D; + case VideoCommon::ImageViewType::Buffer: + UNREACHABLE_MSG("Texture buffers can't be image views"); + return VK_IMAGE_VIEW_TYPE_1D; } + UNREACHABLE_MSG("Invalid image view type={}", type); + return VK_IMAGE_VIEW_TYPE_2D; } -CachedSurfaceView::~CachedSurfaceView() = default; +[[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers( + VideoCommon::SubresourceLayers subresource, VkImageAspectFlags aspect_mask) { + return VkImageSubresourceLayers{ + .aspectMask = aspect_mask, + .mipLevel = static_cast(subresource.base_level), + .baseArrayLayer = static_cast(subresource.base_layer), + .layerCount = static_cast(subresource.num_layers), + }; +} -VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source, - SwizzleSource z_source, SwizzleSource w_source) { - const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); - if (last_image_view && last_swizzle == new_swizzle) { - return last_image_view; - } - last_swizzle = new_swizzle; +[[nodiscard]] VkOffset3D MakeOffset3D(VideoCommon::Offset3D offset3d) { + return VkOffset3D{ + .x = offset3d.x, + .y = offset3d.y, + .z = offset3d.z, + }; +} - const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); - auto& image_view = entry->second; - if (!is_cache_miss) { - return last_image_view = *image_view; - } +[[nodiscard]] VkExtent3D MakeExtent3D(VideoCommon::Extent3D extent3d) { + return VkExtent3D{ + .width = static_cast(extent3d.width), + .height = static_cast(extent3d.height), + .depth = static_cast(extent3d.depth), + }; +} - std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), - MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)}; - if (surface_params.pixel_format == PixelFormat::A1B5G5R5_UNORM) { - // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. - std::swap(swizzle[0], swizzle[2]); - } +[[nodiscard]] VkImageCopy MakeImageCopy(const VideoCommon::ImageCopy& copy, + VkImageAspectFlags aspect_mask) noexcept { + return VkImageCopy{ + .srcSubresource = MakeImageSubresourceLayers(copy.src_subresource, aspect_mask), + .srcOffset = MakeOffset3D(copy.src_offset), + .dstSubresource = MakeImageSubresourceLayers(copy.dst_subresource, aspect_mask), + .dstOffset = MakeOffset3D(copy.dst_offset), + .extent = MakeExtent3D(copy.extent), + }; +} - // Games can sample depth or stencil values on textures. This is decided by the swizzle value on - // hardware. To emulate this on Vulkan we specify it in the aspect. - VkImageAspectFlags aspect = aspect_mask; - if (aspect == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); - const bool is_first = x_source == SwizzleSource::R; - switch (surface_params.pixel_format) { - case PixelFormat::D24_UNORM_S8_UINT: - case PixelFormat::D32_FLOAT_S8_UINT: - aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; - break; - case PixelFormat::S8_UINT_D24_UNORM: - aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; - break; - default: - aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - UNIMPLEMENTED(); +[[nodiscard]] std::vector TransformBufferCopies( + std::span copies, size_t buffer_offset) { + std::vector result(copies.size()); + std::ranges::transform( + copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { + return VkBufferCopy{ + .srcOffset = static_cast(copy.src_offset + buffer_offset), + .dstOffset = static_cast(copy.dst_offset), + .size = static_cast(copy.size), + }; + }); + return result; +} + +[[nodiscard]] std::vector TransformBufferImageCopies( + std::span copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { + struct Maker { + VkBufferImageCopy operator()(const BufferImageCopy& copy) const { + return VkBufferImageCopy{ + .bufferOffset = copy.buffer_offset + buffer_offset, + .bufferRowLength = copy.buffer_row_length, + .bufferImageHeight = copy.buffer_image_height, + .imageSubresource = + { + .aspectMask = aspect_mask, + .mipLevel = static_cast(copy.image_subresource.base_level), + .baseArrayLayer = static_cast(copy.image_subresource.base_layer), + .layerCount = static_cast(copy.image_subresource.num_layers), + }, + .imageOffset = + { + .x = copy.image_offset.x, + .y = copy.image_offset.y, + .z = copy.image_offset.z, + }, + .imageExtent = + { + .width = copy.image_extent.width, + .height = copy.image_extent.height, + .depth = copy.image_extent.depth, + }, + }; } - - // Make sure we sample the first component - std::transform( - swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) { - return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component; - }); + size_t buffer_offset; + VkImageAspectFlags aspect_mask; + }; + if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + std::vector result(copies.size() * 2); + std::ranges::transform(copies, result.begin(), + Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); + std::ranges::transform(copies, result.begin() + copies.size(), + Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); + return result; + } else { + std::vector result(copies.size()); + std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); + return result; } - - if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { - ASSERT(base_slice == 0); - ASSERT(num_slices == surface_params.depth); - } - - image_view = device.GetLogical().CreateImageView({ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = surface.GetImageHandle(), - .viewType = image_view_type, - .format = surface.GetImage().GetFormat(), - .components = - { - .r = swizzle[0], - .g = swizzle[1], - .b = swizzle[2], - .a = swizzle[3], - }, - .subresourceRange = - { - .aspectMask = aspect, - .baseMipLevel = base_level, - .levelCount = num_levels, - .baseArrayLayer = base_layer, - .layerCount = num_layers, - }, - }); - - return last_image_view = *image_view; } -VkImageView CachedSurfaceView::GetAttachment() { - if (render_target) { - return *render_target; - } +[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(VkImageAspectFlags aspect_mask, + const SubresourceRange& range) { + return VkImageSubresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = static_cast(range.base.level), + .levelCount = static_cast(range.extent.levels), + .baseArrayLayer = static_cast(range.base.layer), + .layerCount = static_cast(range.extent.layers), + }; +} - VkImageViewCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, +[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(const ImageView* image_view) { + SubresourceRange range = image_view->range; + if (True(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { + // Slice image views always affect a single layer, but their subresource range corresponds + // to the slice. Override the value to affect a single layer. + range.base.layer = 0; + range.extent.layers = 1; + } + return MakeSubresourceRange(ImageAspectMask(image_view->format), range); +} + +[[nodiscard]] VkImageSubresourceLayers MakeSubresourceLayers(const ImageView* image_view) { + return VkImageSubresourceLayers{ + .aspectMask = ImageAspectMask(image_view->format), + .mipLevel = static_cast(image_view->range.base.level), + .baseArrayLayer = static_cast(image_view->range.base.layer), + .layerCount = static_cast(image_view->range.extent.layers), + }; +} + +[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) { + switch (value) { + case SwizzleSource::G: + return SwizzleSource::R; + default: + return value; + } +} + +void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, + VkImageAspectFlags aspect_mask, bool is_initialized, + std::span copies) { + static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + const VkImageMemoryBarrier read_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, - .flags = 0, - .image = surface.GetImageHandle(), - .viewType = VK_IMAGE_VIEW_TYPE_1D, - .format = surface.GetImage().GetFormat(), - .components = - { - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, - }, + .srcAccessMask = ACCESS_FLAGS, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, .subresourceRange = { .aspectMask = aspect_mask, - .baseMipLevel = base_level, - .levelCount = num_levels, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, .baseArrayLayer = 0, - .layerCount = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { - ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; - ci.subresourceRange.baseArrayLayer = base_slice; - ci.subresourceRange.layerCount = num_slices; - } else { - ci.viewType = image_view_type; - ci.subresourceRange.baseArrayLayer = base_layer; - ci.subresourceRange.layerCount = num_layers; - } - render_target = device.GetLogical().CreateImageView(ci); - return *render_target; -} - -VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, - VKMemoryManager& memory_manager_, VKScheduler& scheduler_, - VKStagingBufferPool& staging_pool_) - : TextureCache(rasterizer_, maxwell3d_, gpu_memory_, device_.IsOptimalAstcSupported()), - device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ - staging_pool_} {} - -VKTextureCache::~VKTextureCache() = default; - -Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { - return std::make_shared(device, memory_manager, scheduler, staging_pool, - gpu_addr, params); -} - -void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, - const VideoCommon::CopyParams& copy_params) { - const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; - const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; - UNIMPLEMENTED_IF(src_3d); - - // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and - // dimension respectively. - const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z; - const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0; - - const u32 extent_z = dst_3d ? copy_params.depth : 1; - const u32 num_layers = dst_3d ? 1 : copy_params.depth; - - // We can't copy inside a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); - - src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - dst_surface->Transition(dst_base_layer, num_layers, copy_params.dest_level, 1, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - const VkImageCopy copy{ - .srcSubresource = + const VkImageMemoryBarrier write_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = ACCESS_FLAGS, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = { - .aspectMask = src_surface->GetAspectMask(), - .mipLevel = copy_params.source_level, - .baseArrayLayer = copy_params.source_z, - .layerCount = num_layers, + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + read_barrier); + cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies); + // TODO: Move this to another API + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, + write_barrier); +} + +[[nodiscard]] VkImageBlit MakeImageBlit(const std::array& dst_region, + const std::array& src_region, + const VkImageSubresourceLayers& dst_layers, + const VkImageSubresourceLayers& src_layers) { + return VkImageBlit{ + .srcSubresource = src_layers, + .srcOffsets = + { + { + .x = src_region[0].x, + .y = src_region[0].y, + .z = 0, + }, + { + .x = src_region[1].x, + .y = src_region[1].y, + .z = 1, + }, + }, + .dstSubresource = dst_layers, + .dstOffsets = + { + { + .x = dst_region[0].x, + .y = dst_region[0].y, + .z = 0, + }, + { + .x = dst_region[1].x, + .y = dst_region[1].y, + .z = 1, + }, + }, + }; +} + +[[nodiscard]] VkImageResolve MakeImageResolve(const std::array& dst_region, + const std::array& src_region, + const VkImageSubresourceLayers& dst_layers, + const VkImageSubresourceLayers& src_layers) { + return VkImageResolve{ + .srcSubresource = src_layers, .srcOffset = { - .x = static_cast(copy_params.source_x), - .y = static_cast(copy_params.source_y), + .x = src_region[0].x, + .y = src_region[0].y, .z = 0, }, - .dstSubresource = - { - .aspectMask = dst_surface->GetAspectMask(), - .mipLevel = copy_params.dest_level, - .baseArrayLayer = dst_base_layer, - .layerCount = num_layers, - }, + .dstSubresource = dst_layers, .dstOffset = { - .x = static_cast(copy_params.dest_x), - .y = static_cast(copy_params.dest_y), - .z = static_cast(dst_offset_z), + .x = dst_region[0].x, + .y = dst_region[0].y, + .z = 0, }, .extent = { - .width = copy_params.width, - .height = copy_params.height, - .depth = extent_z, + .width = static_cast(dst_region[1].x - dst_region[0].x), + .height = static_cast(dst_region[1].y - dst_region[0].y), + .depth = 1, }, }; - - const VkImage src_image = src_surface->GetImageHandle(); - const VkImage dst_image = dst_surface->GetImageHandle(); - scheduler.Record([src_image, dst_image, copy](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); - }); } -void VKTextureCache::ImageBlit(View& src_view, View& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) { - // We can't blit inside a renderpass +struct RangedBarrierRange { + u32 min_mip = std::numeric_limits::max(); + u32 max_mip = std::numeric_limits::min(); + u32 min_layer = std::numeric_limits::max(); + u32 max_layer = std::numeric_limits::min(); + + void AddLayers(const VkImageSubresourceLayers& layers) { + min_mip = std::min(min_mip, layers.mipLevel); + max_mip = std::max(max_mip, layers.mipLevel + 1); + min_layer = std::min(min_layer, layers.baseArrayLayer); + max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount); + } + + VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept { + return VkImageSubresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = min_mip, + .levelCount = max_mip - min_mip, + .baseArrayLayer = min_layer, + .layerCount = max_layer - min_layer, + }; + } +}; + +} // Anonymous namespace + +void TextureCacheRuntime::Finish() { + scheduler.Finish(); +} + +ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { + const auto& buffer = staging_buffer_pool.GetUnusedBuffer(size, true); + return ImageBufferMap{ + .handle = *buffer.handle, + .map = buffer.commit->Map(size), + }; +} + +void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, + const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation) { + const VkImageAspectFlags aspect_mask = ImageAspectMask(src.format); + const bool is_dst_msaa = dst.Samples() != VK_SAMPLE_COUNT_1_BIT; + const bool is_src_msaa = src.Samples() != VK_SAMPLE_COUNT_1_BIT; + ASSERT(aspect_mask == ImageAspectMask(dst.format)); + if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) { + blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter, + operation); + return; + } + if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + if (!device.IsBlitDepthStencilSupported()) { + UNIMPLEMENTED_IF(is_src_msaa || is_dst_msaa); + blit_image_helper.BlitDepthStencil(dst_framebuffer, src.DepthView(), src.StencilView(), + dst_region, src_region, filter, operation); + return; + } + } + ASSERT(src.ImageFormat() == dst.ImageFormat()); + ASSERT(!(is_dst_msaa && !is_src_msaa)); + ASSERT(operation == Fermi2D::Operation::SrcCopy); + + const VkImage dst_image = dst.ImageHandle(); + const VkImage src_image = src.ImageHandle(); + const VkImageSubresourceLayers dst_layers = MakeSubresourceLayers(&dst); + const VkImageSubresourceLayers src_layers = MakeSubresourceLayers(&src); + const bool is_resolve = is_src_msaa && !is_dst_msaa; scheduler.RequestOutsideRenderPassOperationContext(); - - src_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_READ_BIT); - dst_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT); - - VkImageBlit blit; - blit.srcSubresource = src_view->GetImageSubresourceLayers(); - blit.srcOffsets[0].x = copy_config.src_rect.left; - blit.srcOffsets[0].y = copy_config.src_rect.top; - blit.srcOffsets[0].z = 0; - blit.srcOffsets[1].x = copy_config.src_rect.right; - blit.srcOffsets[1].y = copy_config.src_rect.bottom; - blit.srcOffsets[1].z = 1; - blit.dstSubresource = dst_view->GetImageSubresourceLayers(); - blit.dstOffsets[0].x = copy_config.dst_rect.left; - blit.dstOffsets[0].y = copy_config.dst_rect.top; - blit.dstOffsets[0].z = 0; - blit.dstOffsets[1].x = copy_config.dst_rect.right; - blit.dstOffsets[1].y = copy_config.dst_rect.bottom; - blit.dstOffsets[1].z = 1; - - const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; - - scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, - is_linear](vk::CommandBuffer cmdbuf) { - cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit, - is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); + scheduler.Record([filter, dst_region, src_region, dst_image, src_image, dst_layers, src_layers, + aspect_mask, is_resolve](vk::CommandBuffer cmdbuf) { + const std::array read_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + VkImageMemoryBarrier write_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, nullptr, nullptr, read_barriers); + if (is_resolve) { + cmdbuf.ResolveImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + MakeImageResolve(dst_region, src_region, dst_layers, src_layers)); + } else { + const bool is_linear = filter == Fermi2D::Filter::Bilinear; + const VkFilter vk_filter = is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; + cmdbuf.BlitImage( + src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + MakeImageBlit(dst_region, src_region, dst_layers, src_layers), vk_filter); + } + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, write_barrier); }); } -void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) { - // Currently unimplemented. PBO copies should be dropped and we should use a render pass to - // convert from color to depth and viceversa. - LOG_WARNING(Render_Vulkan, "Unimplemented"); +void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { + switch (dst_view.format) { + case PixelFormat::R16_UNORM: + if (src_view.format == PixelFormat::D16_UNORM) { + return blit_image_helper.ConvertD16ToR16(dst, src_view); + } + break; + case PixelFormat::R32_FLOAT: + if (src_view.format == PixelFormat::D32_FLOAT) { + return blit_image_helper.ConvertD32ToR32(dst, src_view); + } + break; + case PixelFormat::D16_UNORM: + if (src_view.format == PixelFormat::R16_UNORM) { + return blit_image_helper.ConvertR16ToD16(dst, src_view); + } + break; + case PixelFormat::D32_FLOAT: + if (src_view.format == PixelFormat::R32_FLOAT) { + return blit_image_helper.ConvertR32ToD32(dst, src_view); + } + break; + default: + break; + } + UNIMPLEMENTED_MSG("Unimplemented format copy from {} to {}", src_view.format, dst_view.format); +} + +void TextureCacheRuntime::CopyImage(Image& dst, Image& src, + std::span copies) { + std::vector vk_copies(copies.size()); + const VkImageAspectFlags aspect_mask = dst.AspectMask(); + ASSERT(aspect_mask == src.AspectMask()); + + std::ranges::transform(copies, vk_copies.begin(), [aspect_mask](const auto& copy) { + return MakeImageCopy(copy, aspect_mask); + }); + const VkImage dst_image = dst.Handle(); + const VkImage src_image = src.Handle(); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([dst_image, src_image, aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { + RangedBarrierRange dst_range; + RangedBarrierRange src_range; + for (const VkImageCopy& copy : vk_copies) { + dst_range.AddLayers(copy.dstSubresource); + src_range.AddLayers(copy.srcSubresource); + } + const std::array read_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = src_range.SubresourceRange(aspect_mask), + }, + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = dst_range.SubresourceRange(aspect_mask), + }, + }; + const VkImageMemoryBarrier write_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = dst_range.SubresourceRange(aspect_mask), + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, {}, {}, read_barriers); + cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, write_barrier); + }); +} + +Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, + VAddr cpu_addr_) + : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, + image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), + aspect_mask(ImageAspectMask(info.format)) { + if (image) { + commit = runtime.memory_manager.Commit(image, false); + } else { + commit = runtime.memory_manager.Commit(buffer, false); + } + if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { + flags |= VideoCommon::ImageFlagBits::Converted; + } + if (runtime.device.HasDebuggingToolAttached()) { + if (image) { + image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); + } else { + buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); + } + } +} + +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies) { + // TODO: Move this to another API + scheduler->RequestOutsideRenderPassOperationContext(); + std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); + const VkBuffer src_buffer = map.handle; + const VkImage vk_image = *image; + const VkImageAspectFlags vk_aspect_mask = aspect_mask; + const bool is_initialized = std::exchange(initialized, true); + scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized, + vk_copies](vk::CommandBuffer cmdbuf) { + CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies); + }); +} + +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies) { + // TODO: Move this to another API + scheduler->RequestOutsideRenderPassOperationContext(); + std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); + const VkBuffer src_buffer = map.handle; + const VkBuffer dst_buffer = *buffer; + scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { + // TODO: Barriers + cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); + }); +} + +void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies) { + std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); + scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask, + vk_copies](vk::CommandBuffer cmdbuf) { + // TODO: Barriers + cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies); + }); +} + +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, + ImageId image_id_, Image& image) + : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, + image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount( + image.info.num_samples)} { + const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); + std::array swizzle{ + SwizzleSource::R, + SwizzleSource::G, + SwizzleSource::B, + SwizzleSource::A, + }; + if (!info.IsRenderTarget()) { + swizzle = info.Swizzle(); + if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) { + std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); + } + } + const VkFormat vk_format = + MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format; + const VkImageViewCreateInfo create_info{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = image.Handle(), + .viewType = VkImageViewType{}, + .format = vk_format, + .components{ + .r = ComponentSwizzle(swizzle[0]), + .g = ComponentSwizzle(swizzle[1]), + .b = ComponentSwizzle(swizzle[2]), + .a = ComponentSwizzle(swizzle[3]), + }, + .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), + }; + const auto create = [&](VideoCommon::ImageViewType view_type, std::optional num_layers) { + VkImageViewCreateInfo ci{create_info}; + ci.viewType = ImageViewType(view_type); + if (num_layers) { + ci.subresourceRange.layerCount = *num_layers; + } + vk::ImageView handle = device->GetLogical().CreateImageView(ci); + if (device->HasDebuggingToolAttached()) { + handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str()); + } + image_views[static_cast(view_type)] = std::move(handle); + }; + switch (info.type) { + case VideoCommon::ImageViewType::e1D: + case VideoCommon::ImageViewType::e1DArray: + create(VideoCommon::ImageViewType::e1D, 1); + create(VideoCommon::ImageViewType::e1DArray, std::nullopt); + render_target = Handle(VideoCommon::ImageViewType::e1DArray); + break; + case VideoCommon::ImageViewType::e2D: + case VideoCommon::ImageViewType::e2DArray: + create(VideoCommon::ImageViewType::e2D, 1); + create(VideoCommon::ImageViewType::e2DArray, std::nullopt); + render_target = Handle(VideoCommon::ImageViewType::e2DArray); + break; + case VideoCommon::ImageViewType::e3D: + create(VideoCommon::ImageViewType::e3D, std::nullopt); + render_target = Handle(VideoCommon::ImageViewType::e3D); + break; + case VideoCommon::ImageViewType::Cube: + case VideoCommon::ImageViewType::CubeArray: + create(VideoCommon::ImageViewType::Cube, 6); + create(VideoCommon::ImageViewType::CubeArray, std::nullopt); + break; + case VideoCommon::ImageViewType::Rect: + UNIMPLEMENTED(); + break; + case VideoCommon::ImageViewType::Buffer: + buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .buffer = image.Buffer(), + .format = vk_format, + .offset = 0, // TODO: Redesign buffer cache to support this + .range = image.guest_size_bytes, + }); + break; + } +} + +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) + : VideoCommon::ImageViewBase{params} {} + +VkImageView ImageView::DepthView() { + if (depth_view) { + return *depth_view; + } + depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT); + return *depth_view; +} + +VkImageView ImageView::StencilView() { + if (stencil_view) { + return *stencil_view; + } + stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT); + return *stencil_view; +} + +vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) { + return device->GetLogical().CreateImageView({ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = image_handle, + .viewType = ImageViewType(type), + .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format, + .components{ + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = MakeSubresourceRange(aspect_mask, range), + }); +} + +Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& tsc) { + const auto& device = runtime.device; + const bool arbitrary_borders = runtime.device.IsExtCustomBorderColorSupported(); + const std::array color = tsc.BorderColor(); + // C++20 bit_cast + VkClearColorValue border_color; + std::memcpy(&border_color, &color, sizeof(color)); + const VkSamplerCustomBorderColorCreateInfoEXT border_ci{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, + .pNext = nullptr, + .customBorderColor = border_color, + .format = VK_FORMAT_UNDEFINED, + }; + const void* pnext = nullptr; + if (arbitrary_borders) { + pnext = &border_ci; + } + const VkSamplerReductionModeCreateInfoEXT reduction_ci{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT, + .pNext = pnext, + .reductionMode = MaxwellToVK::SamplerReduction(tsc.reduction_filter), + }; + if (runtime.device.IsExtSamplerFilterMinmaxSupported()) { + pnext = &reduction_ci; + } else if (reduction_ci.reductionMode != VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT) { + LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required"); + } + // Some games have samplers with garbage. Sanitize them here. + const float max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); + sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = pnext, + .flags = 0, + .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), + .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), + .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), + .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), + .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), + .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), + .mipLodBias = tsc.LodBias(), + .anisotropyEnable = static_cast(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE), + .maxAnisotropy = max_anisotropy, + .compareEnable = tsc.depth_compare_enabled, + .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), + .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), + .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), + .borderColor = + arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), + .unnormalizedCoordinates = VK_FALSE, + }); +} + +Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span color_buffers, + ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { + std::vector descriptions; + std::vector attachments; + RenderPassKey renderpass_key{}; + s32 num_layers = 1; + + for (size_t index = 0; index < NUM_RT; ++index) { + const ImageView* const color_buffer = color_buffers[index]; + if (!color_buffer) { + renderpass_key.color_formats[index] = PixelFormat::Invalid; + continue; + } + descriptions.push_back(AttachmentDescription(runtime.device, color_buffer)); + attachments.push_back(color_buffer->RenderTarget()); + renderpass_key.color_formats[index] = color_buffer->format; + num_layers = std::max(num_layers, color_buffer->range.extent.layers); + images[num_images] = color_buffer->ImageHandle(); + image_ranges[num_images] = MakeSubresourceRange(color_buffer); + samples = color_buffer->Samples(); + ++num_images; + } + const size_t num_colors = attachments.size(); + const VkAttachmentReference* depth_attachment = + depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr; + if (depth_buffer) { + descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer)); + attachments.push_back(depth_buffer->RenderTarget()); + renderpass_key.depth_format = depth_buffer->format; + num_layers = std::max(num_layers, depth_buffer->range.extent.layers); + images[num_images] = depth_buffer->ImageHandle(); + image_ranges[num_images] = MakeSubresourceRange(depth_buffer); + samples = depth_buffer->Samples(); + ++num_images; + } else { + renderpass_key.depth_format = PixelFormat::Invalid; + } + renderpass_key.samples = samples; + + const auto& device = runtime.device.GetLogical(); + const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key); + if (is_new) { + const VkSubpassDescription subpass{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = static_cast(num_colors), + .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = depth_attachment, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; + cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = static_cast(descriptions.size()), + .pAttachments = descriptions.data(), + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 0, + .pDependencies = nullptr, + }); + } + renderpass = *cache_pair->second; + render_area = VkExtent2D{ + .width = key.size.width, + .height = key.size.height, + }; + num_color_buffers = static_cast(num_colors); + framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .renderPass = renderpass, + .attachmentCount = static_cast(attachments.size()), + .pAttachments = attachments.data(), + .width = key.size.width, + .height = key.size.height, + .layers = static_cast(num_layers), + }); + if (runtime.device.HasDebuggingToolAttached()) { + framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index b0be4cb0f..edc3d80c0 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -4,217 +4,265 @@ #pragma once -#include -#include +#include +#include -#include "common/common_types.h" -#include "video_core/renderer_vulkan/vk_image.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/texture_cache.h" -namespace VideoCore { -class RasterizerInterface; -} - namespace Vulkan { -class RasterizerVulkan; +using VideoCommon::ImageId; +using VideoCommon::NUM_RT; +using VideoCommon::Offset2D; +using VideoCommon::RenderTargets; +using VideoCore::Surface::PixelFormat; + class VKDevice; class VKScheduler; class VKStagingBufferPool; -class CachedSurfaceView; -class CachedSurface; +class BlitImageHelper; +class Image; +class ImageView; +class Framebuffer; -using Surface = std::shared_ptr; -using View = std::shared_ptr; -using TextureCacheBase = VideoCommon::TextureCache; +struct RenderPassKey { + constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; -using VideoCommon::SurfaceParams; -using VideoCommon::ViewParams; - -class CachedSurface final : public VideoCommon::SurfaceBase { - friend CachedSurfaceView; - -public: - explicit CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_, - VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, - GPUVAddr gpu_addr_, const SurfaceParams& params_); - ~CachedSurface(); - - void UploadTexture(const std::vector& staging_buffer) override; - void DownloadTexture(std::vector& staging_buffer) override; - - void FullTransition(VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, - VkImageLayout new_layout) { - image->Transition(0, static_cast(params.GetNumLayers()), 0, params.num_levels, - new_stage_mask, new_access, new_layout); - } - - void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, - VkImageLayout new_layout) { - image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, - new_access, new_layout); - } - - VKImage& GetImage() { - return *image; - } - - const VKImage& GetImage() const { - return *image; - } - - VkImage GetImageHandle() const { - return *image->GetHandle(); - } - - VkImageAspectFlags GetAspectMask() const { - return image->GetAspectMask(); - } - - VkBufferView GetBufferViewHandle() const { - return *buffer_view; - } - -protected: - void DecorateSurfaceName() override; - - View CreateView(const ViewParams& view_params) override; - -private: - void UploadBuffer(const std::vector& staging_buffer); - - void UploadImage(const std::vector& staging_buffer); - - VkBufferImageCopy GetBufferImageCopy(u32 level) const; - - VkImageSubresourceRange GetImageSubresourceRange() const; - - const VKDevice& device; - VKMemoryManager& memory_manager; - VKScheduler& scheduler; - VKStagingBufferPool& staging_pool; - - std::optional image; - vk::Buffer buffer; - vk::BufferView buffer_view; - VKMemoryCommit commit; - - VkFormat format = VK_FORMAT_UNDEFINED; -}; - -class CachedSurfaceView final : public VideoCommon::ViewBase { -public: - explicit CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_, - const ViewParams& view_params_); - ~CachedSurfaceView(); - - VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); - - VkImageView GetAttachment(); - - bool IsSameSurface(const CachedSurfaceView& rhs) const { - return &surface == &rhs.surface; - } - - u32 GetWidth() const { - return surface_params.GetMipWidth(base_level); - } - - u32 GetHeight() const { - return surface_params.GetMipHeight(base_level); - } - - u32 GetNumLayers() const { - return num_layers; - } - - bool IsBufferView() const { - return buffer_view; - } - - VkImage GetImage() const { - return image; - } - - VkBufferView GetBufferView() const { - return buffer_view; - } - - VkImageSubresourceRange GetImageSubresourceRange() const { - return {aspect_mask, base_level, num_levels, base_layer, num_layers}; - } - - VkImageSubresourceLayers GetImageSubresourceLayers() const { - return {surface.GetAspectMask(), base_level, base_layer, num_layers}; - } - - void Transition(VkImageLayout new_layout, VkPipelineStageFlags new_stage_mask, - VkAccessFlags new_access) const { - surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, - new_access, new_layout); - } - - void MarkAsModified(u64 tick) { - surface.MarkAsModified(true, tick); - } - -private: - // Store a copy of these values to avoid double dereference when reading them - const SurfaceParams surface_params; - const VkImage image; - const VkBufferView buffer_view; - const VkImageAspectFlags aspect_mask; - - const VKDevice& device; - CachedSurface& surface; - const u32 base_level; - const u32 num_levels; - const VkImageViewType image_view_type; - u32 base_layer = 0; - u32 num_layers = 0; - u32 base_slice = 0; - u32 num_slices = 0; - - VkImageView last_image_view = nullptr; - u32 last_swizzle = 0; - - vk::ImageView render_target; - std::unordered_map view_cache; -}; - -class VKTextureCache final : public TextureCacheBase { -public: - explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, - VKMemoryManager& memory_manager_, VKScheduler& scheduler_, - VKStagingBufferPool& staging_pool_); - ~VKTextureCache(); - -private: - Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; - - void ImageCopy(Surface& src_surface, Surface& dst_surface, - const VideoCommon::CopyParams& copy_params) override; - - void ImageBlit(View& src_view, View& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) override; - - void BufferCopy(Surface& src_surface, Surface& dst_surface) override; - - const VKDevice& device; - VKMemoryManager& memory_manager; - VKScheduler& scheduler; - VKStagingBufferPool& staging_pool; + std::array color_formats; + PixelFormat depth_format; + VkSampleCountFlagBits samples; }; } // namespace Vulkan + +namespace std { +template <> +struct hash { + [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { + size_t value = static_cast(key.depth_format) << 48; + value ^= static_cast(key.samples) << 52; + for (size_t i = 0; i < key.color_formats.size(); ++i) { + value ^= static_cast(key.color_formats[i]) << (i * 6); + } + return value; + } +}; +} // namespace std + +namespace Vulkan { + +struct ImageBufferMap { + [[nodiscard]] VkBuffer Handle() const noexcept { + return handle; + } + + [[nodiscard]] std::span Span() const noexcept { + return map.Span(); + } + + VkBuffer handle; + MemoryMap map; +}; + +struct TextureCacheRuntime { + const VKDevice& device; + VKScheduler& scheduler; + VKMemoryManager& memory_manager; + VKStagingBufferPool& staging_buffer_pool; + BlitImageHelper& blit_image_helper; + std::unordered_map renderpass_cache; + + void Finish(); + + [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size); + + [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size) { + // TODO: Have a special function for this + return MapUploadBuffer(size); + } + + void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, + const std::array& dst_region, + const std::array& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation); + + void CopyImage(Image& dst, Image& src, std::span copies); + + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); + + [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { + return false; + } + + void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t, + std::span) { + UNREACHABLE(); + } + + void InsertUploadMemoryBarrier() {} +}; + +class Image : public VideoCommon::ImageBase { +public: + explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, + VAddr cpu_addr); + + void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies); + + void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies); + + void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span copies); + + [[nodiscard]] VkImage Handle() const noexcept { + return *image; + } + + [[nodiscard]] VkBuffer Buffer() const noexcept { + return *buffer; + } + + [[nodiscard]] VkImageCreateFlags AspectMask() const noexcept { + return aspect_mask; + } + +private: + VKScheduler* scheduler; + vk::Image image; + vk::Buffer buffer; + VKMemoryCommit commit; + VkImageAspectFlags aspect_mask = 0; + bool initialized = false; +}; + +class ImageView : public VideoCommon::ImageViewBase { +public: + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); + + [[nodiscard]] VkImageView DepthView(); + + [[nodiscard]] VkImageView StencilView(); + + [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept { + return *image_views[static_cast(query_type)]; + } + + [[nodiscard]] VkBufferView BufferView() const noexcept { + return *buffer_view; + } + + [[nodiscard]] VkImage ImageHandle() const noexcept { + return image_handle; + } + + [[nodiscard]] VkImageView RenderTarget() const noexcept { + return render_target; + } + + [[nodiscard]] PixelFormat ImageFormat() const noexcept { + return image_format; + } + + [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { + return samples; + } + +private: + [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); + + const VKDevice* device = nullptr; + std::array image_views; + vk::ImageView depth_view; + vk::ImageView stencil_view; + vk::BufferView buffer_view; + VkImage image_handle = VK_NULL_HANDLE; + VkImageView render_target = VK_NULL_HANDLE; + PixelFormat image_format = PixelFormat::Invalid; + VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; +}; + +class ImageAlloc : public VideoCommon::ImageAllocBase {}; + +class Sampler { +public: + explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); + + [[nodiscard]] VkSampler Handle() const noexcept { + return *sampler; + } + +private: + vk::Sampler sampler; +}; + +class Framebuffer { +public: + explicit Framebuffer(TextureCacheRuntime&, std::span color_buffers, + ImageView* depth_buffer, const VideoCommon::RenderTargets& key); + + [[nodiscard]] VkFramebuffer Handle() const noexcept { + return *framebuffer; + } + + [[nodiscard]] VkRenderPass RenderPass() const noexcept { + return renderpass; + } + + [[nodiscard]] VkExtent2D RenderArea() const noexcept { + return render_area; + } + + [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { + return samples; + } + + [[nodiscard]] u32 NumColorBuffers() const noexcept { + return num_color_buffers; + } + + [[nodiscard]] u32 NumImages() const noexcept { + return num_images; + } + + [[nodiscard]] const std::array& Images() const noexcept { + return images; + } + + [[nodiscard]] const std::array& ImageRanges() const noexcept { + return image_ranges; + } + +private: + vk::Framebuffer framebuffer; + VkRenderPass renderpass{}; + VkExtent2D render_area{}; + VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; + u32 num_color_buffers = 0; + u32 num_images = 0; + std::array images{}; + std::array image_ranges{}; +}; + +struct TextureCacheParams { + static constexpr bool ENABLE_VALIDATION = true; + static constexpr bool FRAMEBUFFER_BLITS = false; + static constexpr bool HAS_EMULATED_COPIES = false; + + using Runtime = Vulkan::TextureCacheRuntime; + using Image = Vulkan::Image; + using ImageAlloc = Vulkan::ImageAlloc; + using ImageView = Vulkan::ImageView; + using Sampler = Vulkan::Sampler; + using Framebuffer = Vulkan::Framebuffer; +}; + +using TextureCache = VideoCommon::TextureCache; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index f7e3c9821..f098a8540 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -40,30 +40,34 @@ public: void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); - void AddSampledImage(VkSampler sampler, VkImageView image_view) { - payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); + void AddSampledImage(VkImageView image_view, VkSampler sampler) { + payload.emplace_back(VkDescriptorImageInfo{ + .sampler = sampler, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }); } void AddImage(VkImageView image_view) { - payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); + payload.emplace_back(VkDescriptorImageInfo{ + .sampler = VK_NULL_HANDLE, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }); } - void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { - payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); + void AddBuffer(VkBuffer buffer, u64 offset, size_t size) { + payload.emplace_back(VkDescriptorBufferInfo{ + .buffer = buffer, + .offset = offset, + .range = size, + }); } void AddTexelBuffer(VkBufferView texel_buffer) { payload.emplace_back(texel_buffer); } - VkImageLayout* LastImageLayout() { - return &payload.back().image.imageLayout; - } - - const VkImageLayout* LastImageLayout() const { - return &payload.back().image.imageLayout; - } - private: const VKDevice& device; VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 1eced809e..2a21e850d 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -81,6 +81,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdBeginQuery); X(vkCmdBeginRenderPass); X(vkCmdBeginTransformFeedbackEXT); + X(vkCmdBeginDebugUtilsLabelEXT); X(vkCmdBindDescriptorSets); X(vkCmdBindIndexBuffer); X(vkCmdBindPipeline); @@ -98,6 +99,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdEndQuery); X(vkCmdEndRenderPass); X(vkCmdEndTransformFeedbackEXT); + X(vkCmdEndDebugUtilsLabelEXT); X(vkCmdFillBuffer); X(vkCmdPipelineBarrier); X(vkCmdPushConstants); @@ -121,6 +123,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetPrimitiveTopologyEXT); X(vkCmdSetStencilOpEXT); X(vkCmdSetStencilTestEnableEXT); + X(vkCmdResolveImage); X(vkCreateBuffer); X(vkCreateBufferView); X(vkCreateCommandPool); @@ -176,6 +179,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkQueueSubmit); X(vkResetFences); X(vkResetQueryPoolEXT); + X(vkSetDebugUtilsObjectNameEXT); + X(vkSetDebugUtilsObjectTagEXT); X(vkUnmapMemory); X(vkUpdateDescriptorSetWithTemplateKHR); X(vkUpdateDescriptorSets); @@ -184,6 +189,19 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { #undef X } +template +void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjectType type, + const char* name) { + const VkDebugUtilsObjectNameInfoEXT name_info{ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, + .pNext = nullptr, + .objectType = VK_OBJECT_TYPE_IMAGE, + .objectHandle = reinterpret_cast(handle), + .pObjectName = name, + }; + Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info)); +} + } // Anonymous namespace bool Load(InstanceDispatch& dld) noexcept { @@ -476,8 +494,7 @@ DebugCallback Instance::TryCreateDebugCallback( VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT, .pfnUserCallback = callback, .pUserData = nullptr, }; @@ -493,10 +510,38 @@ void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); } +void Buffer::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name); +} + +void BufferView::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name); +} + void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { Check(dld->vkBindImageMemory(owner, handle, memory, offset)); } +void Image::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name); +} + +void ImageView::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name); +} + +void DeviceMemory::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name); +} + +void Fence::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FENCE, name); +} + +void Framebuffer::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FRAMEBUFFER, name); +} + DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const { const std::size_t num = ai.descriptorSetCount; std::unique_ptr sets = std::make_unique(num); @@ -510,6 +555,10 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c } } +void DescriptorPool::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DESCRIPTOR_POOL, name); +} + CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { const VkCommandBufferAllocateInfo ai{ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, @@ -530,6 +579,10 @@ CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLev } } +void CommandPool::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_COMMAND_POOL, name); +} + std::vector SwapchainKHR::GetImages() const { u32 num; Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr)); @@ -538,6 +591,18 @@ std::vector SwapchainKHR::GetImages() const { return images; } +void Event::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_EVENT, name); +} + +void ShaderModule::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name); +} + +void Semaphore::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name); +} + Device Device::Create(VkPhysicalDevice physical_device, Span queues_ci, Span enabled_extensions, const void* next, DeviceDispatch& dispatch) noexcept { diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index 76f790eab..f9a184e00 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,10 @@ #include "common/common_types.h" +#ifdef _MSC_VER +#pragma warning(disable : 26812) // Disable prefer enum class over enum +#endif + namespace Vulkan::vk { /** @@ -41,6 +46,9 @@ public: /// Construct an empty span. constexpr Span() noexcept = default; + /// Construct an empty span + constexpr Span(std::nullptr_t) noexcept {} + /// Construct a span from a single element. constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {} @@ -177,6 +185,7 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkCmdBeginQuery vkCmdBeginQuery; PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass; PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; + PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT; PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets; PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; PFN_vkCmdBindPipeline vkCmdBindPipeline; @@ -194,6 +203,7 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkCmdEndQuery vkCmdEndQuery; PFN_vkCmdEndRenderPass vkCmdEndRenderPass; PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT; + PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT; PFN_vkCmdFillBuffer vkCmdFillBuffer; PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; PFN_vkCmdPushConstants vkCmdPushConstants; @@ -217,6 +227,7 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; + PFN_vkCmdResolveImage vkCmdResolveImage; PFN_vkCreateBuffer vkCreateBuffer; PFN_vkCreateBufferView vkCreateBufferView; PFN_vkCreateCommandPool vkCreateCommandPool; @@ -272,6 +283,8 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkQueueSubmit vkQueueSubmit; PFN_vkResetFences vkResetFences; PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; + PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT; + PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT; PFN_vkUnmapMemory vkUnmapMemory; PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; @@ -542,18 +555,14 @@ private: const DeviceDispatch* dld = nullptr; }; -using BufferView = Handle; using DebugCallback = Handle; using DescriptorSetLayout = Handle; using DescriptorUpdateTemplateKHR = Handle; -using Framebuffer = Handle; -using ImageView = Handle; using Pipeline = Handle; using PipelineLayout = Handle; using QueryPool = Handle; using RenderPass = Handle; using Sampler = Handle; -using ShaderModule = Handle; using SurfaceKHR = Handle; using DescriptorSets = PoolAllocations; @@ -605,6 +614,17 @@ class Buffer : public Handle { public: /// Attaches a memory allocation. void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; + + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + +class BufferView : public Handle { + using Handle::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; }; class Image : public Handle { @@ -613,12 +633,26 @@ class Image : public Handle { public: /// Attaches a memory allocation. void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; + + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + +class ImageView : public Handle { + using Handle::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; }; class DeviceMemory : public Handle { using Handle::Handle; public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + u8* Map(VkDeviceSize offset, VkDeviceSize size) const { void* data; Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data)); @@ -634,6 +668,9 @@ class Fence : public Handle { using Handle::Handle; public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + VkResult Wait(u64 timeout = std::numeric_limits::max()) const noexcept { return dld->vkWaitForFences(owner, 1, &handle, true, timeout); } @@ -647,11 +684,22 @@ public: } }; +class Framebuffer : public Handle { + using Handle::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + class DescriptorPool : public Handle { using Handle::Handle; public: DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const; + + /// Set object name. + void SetObjectNameEXT(const char* name) const; }; class CommandPool : public Handle { @@ -660,6 +708,9 @@ class CommandPool : public Handle { public: CommandBuffers Allocate(std::size_t num_buffers, VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; + + /// Set object name. + void SetObjectNameEXT(const char* name) const; }; class SwapchainKHR : public Handle { @@ -673,15 +724,29 @@ class Event : public Handle { using Handle::Handle; public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + VkResult GetStatus() const noexcept { return dld->vkGetEventStatus(owner, handle); } }; +class ShaderModule : public Handle { + using Handle::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + class Semaphore : public Handle { using Handle::Handle; public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + [[nodiscard]] u64 GetCounter() const { u64 value; Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value)); @@ -932,6 +997,12 @@ public: regions.data(), filter); } + void ResolveImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, + VkImageLayout dst_layout, Span regions) { + dld->vkCmdResolveImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), + regions.data()); + } + void Dispatch(u32 x, u32 y, u32 z) const noexcept { dld->vkCmdDispatch(handle, x, y, z); } @@ -946,6 +1017,23 @@ public: image_barriers.size(), image_barriers.data()); } + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags = 0) const noexcept { + PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, {}); + } + + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, + const VkBufferMemoryBarrier& buffer_barrier) const noexcept { + PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {}); + } + + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, + const VkImageMemoryBarrier& image_barrier) const noexcept { + PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, image_barrier); + } + void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout, Span regions) const noexcept { dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(), @@ -979,6 +1067,13 @@ public: dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); } + template + void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags, + const T& data) const noexcept { + static_assert(std::is_trivially_copyable_v, " is not trivially copyable"); + dld->vkCmdPushConstants(handle, layout, flags, 0, static_cast(sizeof(T)), &data); + } + void SetViewport(u32 first, Span viewports) const noexcept { dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); } @@ -1088,6 +1183,20 @@ public: counter_buffers, counter_buffer_offsets); } + void BeginDebugUtilsLabelEXT(const char* label, std::span color) const noexcept { + const VkDebugUtilsLabelEXT label_info{ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = label, + .color{color[0], color[1], color[2], color[3]}, + }; + dld->vkCmdBeginDebugUtilsLabelEXT(handle, &label_info); + } + + void EndDebugUtilsLabelEXT() const noexcept { + dld->vkCmdEndDebugUtilsLabelEXT(handle); + } + private: VkCommandBuffer handle; const DeviceDispatch* dld; diff --git a/src/video_core/sampler_cache.cpp b/src/video_core/sampler_cache.cpp deleted file mode 100644 index 53c7ef12d..000000000 --- a/src/video_core/sampler_cache.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/cityhash.h" -#include "common/common_types.h" -#include "video_core/sampler_cache.h" - -namespace VideoCommon { - -std::size_t SamplerCacheKey::Hash() const { - static_assert(sizeof(raw) % sizeof(u64) == 0); - return static_cast( - Common::CityHash64(reinterpret_cast(raw.data()), sizeof(raw) / sizeof(u64))); -} - -bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const { - return raw == rhs.raw; -} - -} // namespace VideoCommon diff --git a/src/video_core/sampler_cache.h b/src/video_core/sampler_cache.h deleted file mode 100644 index cbe3ad071..000000000 --- a/src/video_core/sampler_cache.h +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "video_core/textures/texture.h" - -namespace VideoCommon { - -struct SamplerCacheKey final : public Tegra::Texture::TSCEntry { - std::size_t Hash() const; - - bool operator==(const SamplerCacheKey& rhs) const; - - bool operator!=(const SamplerCacheKey& rhs) const { - return !operator==(rhs); - } -}; - -} // namespace VideoCommon - -namespace std { - -template <> -struct hash { - std::size_t operator()(const VideoCommon::SamplerCacheKey& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace VideoCommon { - -template -class SamplerCache { -public: - SamplerType GetSampler(const Tegra::Texture::TSCEntry& tsc) { - const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc}); - auto& sampler = entry->second; - if (is_cache_miss) { - sampler = CreateSampler(tsc); - } - return ToSamplerType(sampler); - } - -protected: - virtual SamplerStorageType CreateSampler(const Tegra::Texture::TSCEntry& tsc) const = 0; - - virtual SamplerType ToSamplerType(const SamplerStorageType& sampler) const = 0; - -private: - std::unordered_map cache; -}; - -} // namespace VideoCommon \ No newline at end of file diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index 78245473c..09f93463b 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -137,10 +137,9 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, Vulkan::VKDescriptorPool& descriptor_pool, Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - Vulkan::VKRenderPassCache& renderpass_cache, std::vector bindings, Vulkan::SPIRVProgram program, - Vulkan::GraphicsPipelineCacheKey key) { + Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) { std::unique_lock lock(queue_mutex); pending_queue.push({ .backend = Backend::Vulkan, @@ -149,10 +148,10 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, .scheduler = &scheduler, .descriptor_pool = &descriptor_pool, .update_descriptor_queue = &update_descriptor_queue, - .renderpass_cache = &renderpass_cache, .bindings = std::move(bindings), .program = std::move(program), .key = key, + .num_color_buffers = num_color_buffers, }); cv.notify_one(); } @@ -205,8 +204,8 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context } else if (work.backend == Backend::Vulkan) { auto pipeline = std::make_unique( *work.vk_device, *work.scheduler, *work.descriptor_pool, - *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings, - work.program); + *work.update_descriptor_queue, work.key, work.bindings, work.program, + work.num_color_buffers); work.pp_cache->EmplacePipeline(std::move(pipeline)); } diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 5a7216019..004e214a8 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h @@ -98,9 +98,9 @@ public: Vulkan::VKScheduler& scheduler, Vulkan::VKDescriptorPool& descriptor_pool, Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - Vulkan::VKRenderPassCache& renderpass_cache, std::vector bindings, - Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key); + Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key, + u32 num_color_buffers); private: void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); @@ -127,10 +127,10 @@ private: Vulkan::VKScheduler* scheduler; Vulkan::VKDescriptorPool* descriptor_pool; Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; - Vulkan::VKRenderPassCache* renderpass_cache; std::vector bindings; Vulkan::SPIRVProgram program; Vulkan::GraphicsPipelineCacheKey key; + u32 num_color_buffers; }; std::condition_variable cv; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index ab14c1aa3..6576d1208 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -25,7 +25,7 @@ using Tegra::Shader::OpCode; namespace { void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, - const std::list& used_samplers) { + const std::list& used_samplers) { if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { return; } @@ -43,9 +43,9 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, } } -std::optional TryDeduceSamplerSize(const Sampler& sampler_to_deduce, +std::optional TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, VideoCore::GuestDriverProfile& gpu_driver, - const std::list& used_samplers) { + const std::list& used_samplers) { const u32 base_offset = sampler_to_deduce.offset; u32 max_offset{std::numeric_limits::max()}; for (const auto& sampler : used_samplers) { diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 532f66d27..5470e8cf4 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -497,11 +497,12 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { return pc; } -Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { +ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { const auto offset = static_cast(image.index.Value()); - const auto it = std::find_if(std::begin(used_images), std::end(used_images), - [offset](const Image& entry) { return entry.offset == offset; }); + const auto it = + std::find_if(std::begin(used_images), std::end(used_images), + [offset](const ImageEntry& entry) { return entry.offset == offset; }); if (it != std::end(used_images)) { ASSERT(!it->is_bindless && it->type == type); return *it; @@ -511,7 +512,7 @@ Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType t return used_images.emplace_back(next_index, offset, type); } -Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { +ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { const Node image_register = GetRegister(reg); const auto result = TrackCbuf(image_register, global_code, static_cast(global_code.size())); @@ -520,7 +521,7 @@ Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::Im const auto offset = std::get<2>(result); const auto it = std::find_if(std::begin(used_images), std::end(used_images), - [buffer, offset](const Image& entry) { + [buffer, offset](const ImageEntry& entry) { return entry.buffer == buffer && entry.offset == offset; }); if (it != std::end(used_images)) { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index fb18f631f..833fa2a39 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { SamplerInfo info; info.is_shadow = is_depth_compare; - const std::optional sampler = GetSampler(instr.sampler, info); + const std::optional sampler = GetSampler(instr.sampler, info); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -173,9 +173,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { SamplerInfo info; info.type = texture_type; info.is_array = is_array; - const std::optional sampler = is_bindless - ? GetBindlessSampler(base_reg, info, index_var) - : GetSampler(instr.sampler, info); + const std::optional sampler = + is_bindless ? GetBindlessSampler(base_reg, info, index_var) + : GetSampler(instr.sampler, info); Node4 values; if (!sampler) { std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); @@ -217,9 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { [[fallthrough]]; case OpCode::Id::TXQ: { Node index_var; - const std::optional sampler = is_bindless - ? GetBindlessSampler(instr.gpr8, {}, index_var) - : GetSampler(instr.sampler, {}); + const std::optional sampler = + is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) + : GetSampler(instr.sampler, {}); if (!sampler) { u32 indexer = 0; @@ -272,7 +272,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { info.type = texture_type; info.is_array = is_array; Node index_var; - const std::optional sampler = + const std::optional sampler = is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) : GetSampler(instr.sampler, info); @@ -379,14 +379,15 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( return info; } -std::optional ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, - SamplerInfo sampler_info) { +std::optional ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, + SamplerInfo sampler_info) { const u32 offset = static_cast(sampler.index.Value()); const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); // If this sampler has already been used, return the existing mapping. - const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [offset](const Sampler& entry) { return entry.offset == offset; }); + const auto it = + std::find_if(used_samplers.begin(), used_samplers.end(), + [offset](const SamplerEntry& entry) { return entry.offset == offset; }); if (it != used_samplers.end()) { ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); @@ -399,8 +400,8 @@ std::optional ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, *info.is_shadow, *info.is_buffer, false); } -std::optional ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, - Node& index_var) { +std::optional ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, + SamplerInfo info, Node& index_var) { const Node sampler_register = GetRegister(reg); const auto [base_node, tracked_sampler_info] = TrackBindlessSampler(sampler_register, global_code, static_cast(global_code.size())); @@ -416,7 +417,7 @@ std::optional ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, // If this sampler has already been used, return the existing mapping. const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer, offset](const Sampler& entry) { + [buffer, offset](const SamplerEntry& entry) { return entry.buffer == buffer && entry.offset == offset; }); if (it != used_samplers.end()) { @@ -436,11 +437,12 @@ std::optional ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); // Try to use an already created sampler if it exists - const auto it = std::find_if( - used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) { - return offsets == std::pair{entry.offset, entry.secondary_offset} && - indices == std::pair{entry.buffer, entry.secondary_buffer}; - }); + const auto it = + std::find_if(used_samplers.begin(), used_samplers.end(), + [indices, offsets](const SamplerEntry& entry) { + return offsets == std::pair{entry.offset, entry.secondary_offset} && + indices == std::pair{entry.buffer, entry.secondary_buffer}; + }); if (it != used_samplers.end()) { ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); @@ -460,7 +462,7 @@ std::optional ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, // If this sampler has already been used, return the existing mapping. const auto it = std::find_if( used_samplers.begin(), used_samplers.end(), - [base_offset](const Sampler& entry) { return entry.offset == base_offset; }); + [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; }); if (it != used_samplers.end()) { ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && @@ -565,9 +567,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, info.is_buffer = false; Node index_var; - const std::optional sampler = is_bindless - ? GetBindlessSampler(*bindless_reg, info, index_var) - : GetSampler(instr.sampler, info); + const std::optional sampler = + is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) + : GetSampler(instr.sampler, info); if (!sampler) { return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; } @@ -724,7 +726,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de info.is_shadow = depth_compare; Node index_var; - const std::optional sampler = + const std::optional sampler = is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) : GetSampler(instr.sampler, info); Node4 values; @@ -783,7 +785,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - const std::optional sampler = GetSampler(instr.sampler, {}); + const std::optional sampler = GetSampler(instr.sampler, {}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -800,7 +802,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is info.type = texture_type; info.is_array = is_array; info.is_shadow = false; - const std::optional sampler = GetSampler(instr.sampler, info); + const std::optional sampler = GetSampler(instr.sampler, info); const std::size_t type_coord_count = GetCoordCount(texture_type); const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 8db9e1de7..b54d33763 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -282,25 +282,24 @@ struct SeparateSamplerNode; using TrackSamplerData = std::variant; using TrackSampler = std::shared_ptr; -struct Sampler { +struct SamplerEntry { /// Bound samplers constructor - constexpr explicit Sampler(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, - bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) + explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, + bool is_shadow_, bool is_buffer_, bool is_indexed_) : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_indexed{is_indexed_} {} /// Separate sampler constructor - constexpr explicit Sampler(u32 index_, std::pair offsets_, - std::pair buffers_, Tegra::Shader::TextureType type_, - bool is_array_, bool is_shadow_, bool is_buffer_) - : index{index_}, offset{offsets_.first}, secondary_offset{offsets_.second}, - buffer{buffers_.first}, secondary_buffer{buffers_.second}, type{type_}, - is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} + explicit SamplerEntry(u32 index_, std::pair offsets, std::pair buffers, + Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, + bool is_buffer_) + : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, + buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, + is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} /// Bindless samplers constructor - constexpr explicit Sampler(u32 index_, u32 offset_, u32 buffer_, - Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, - bool is_buffer_, bool is_indexed_) + explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, + bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { } @@ -340,14 +339,14 @@ struct BindlessSamplerNode { u32 offset; }; -struct Image { +struct ImageEntry { public: /// Bound images constructor - constexpr explicit Image(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) + explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) : index{index_}, offset{offset_}, type{type_} {} /// Bindless samplers constructor - constexpr explicit Image(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) + explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} void MarkWrite() { @@ -391,7 +390,7 @@ struct MetaArithmetic { /// Parameters describing a texture sampler struct MetaTexture { - Sampler sampler; + SamplerEntry sampler; Node array; Node depth_compare; std::vector aoffi; @@ -405,7 +404,7 @@ struct MetaTexture { }; struct MetaImage { - const Image& image; + const ImageEntry& image; std::vector values; u32 element{}; }; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 6aae14e34..0c6ab0f07 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -94,11 +94,11 @@ public: return used_cbufs; } - const std::list& GetSamplers() const { + const std::list& GetSamplers() const { return used_samplers; } - const std::list& GetImages() const { + const std::list& GetImages() const { return used_images; } @@ -334,17 +334,17 @@ private: std::optional sampler); /// Accesses a texture sampler. - std::optional GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); + std::optional GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); /// Accesses a texture sampler for a bindless texture. - std::optional GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, - Node& index_var); + std::optional GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, + Node& index_var); /// Accesses an image. - Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); + ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); /// Access a bindless image sampler. - Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); + ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -454,8 +454,8 @@ private: std::set used_input_attributes; std::set used_output_attributes; std::map used_cbufs; - std::list used_samplers; - std::list used_images; + std::list used_samplers; + std::list used_images; std::array used_clip_distances{}; std::map used_global_memory; bool uses_layer{}; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 937e29d1e..6308aef94 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -280,7 +280,7 @@ bool IsPixelFormatSRGB(PixelFormat format) { } std::pair GetASTCBlockSize(PixelFormat format) { - return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; + return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; } } // namespace VideoCore::Surface diff --git a/src/video_core/surface.h b/src/video_core/surface.h index cfd12fa61..c40ab89d0 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -120,7 +120,7 @@ enum class PixelFormat { Max = MaxDepthStencilFormat, Invalid = 255, }; -static constexpr std::size_t MaxPixelFormat = static_cast(PixelFormat::Max); +constexpr std::size_t MaxPixelFormat = static_cast(PixelFormat::Max); enum class SurfaceType { ColorTexture = 0, @@ -140,117 +140,7 @@ enum class SurfaceTarget { TextureCubeArray, }; -constexpr std::array compression_factor_shift_table = {{ - 0, // A8B8G8R8_UNORM - 0, // A8B8G8R8_SNORM - 0, // A8B8G8R8_SINT - 0, // A8B8G8R8_UINT - 0, // R5G6B5_UNORM - 0, // B5G6R5_UNORM - 0, // A1R5G5B5_UNORM - 0, // A2B10G10R10_UNORM - 0, // A2B10G10R10_UINT - 0, // A1B5G5R5_UNORM - 0, // R8_UNORM - 0, // R8_SNORM - 0, // R8_SINT - 0, // R8_UINT - 0, // R16G16B16A16_FLOAT - 0, // R16G16B16A16_UNORM - 0, // R16G16B16A16_SNORM - 0, // R16G16B16A16_SINT - 0, // R16G16B16A16_UINT - 0, // B10G11R11_FLOAT - 0, // R32G32B32A32_UINT - 2, // BC1_RGBA_UNORM - 2, // BC2_UNORM - 2, // BC3_UNORM - 2, // BC4_UNORM - 2, // BC4_SNORM - 2, // BC5_UNORM - 2, // BC5_SNORM - 2, // BC7_UNORM - 2, // BC6H_UFLOAT - 2, // BC6H_SFLOAT - 2, // ASTC_2D_4X4_UNORM - 0, // B8G8R8A8_UNORM - 0, // R32G32B32A32_FLOAT - 0, // R32G32B32A32_SINT - 0, // R32G32_FLOAT - 0, // R32G32_SINT - 0, // R32_FLOAT - 0, // R16_FLOAT - 0, // R16_UNORM - 0, // R16_SNORM - 0, // R16_UINT - 0, // R16_SINT - 0, // R16G16_UNORM - 0, // R16G16_FLOAT - 0, // R16G16_UINT - 0, // R16G16_SINT - 0, // R16G16_SNORM - 0, // R32G32B32_FLOAT - 0, // A8B8G8R8_SRGB - 0, // R8G8_UNORM - 0, // R8G8_SNORM - 0, // R8G8_SINT - 0, // R8G8_UINT - 0, // R32G32_UINT - 0, // R16G16B16X16_FLOAT - 0, // R32_UINT - 0, // R32_SINT - 2, // ASTC_2D_8X8_UNORM - 2, // ASTC_2D_8X5_UNORM - 2, // ASTC_2D_5X4_UNORM - 0, // B8G8R8A8_SRGB - 2, // BC1_RGBA_SRGB - 2, // BC2_SRGB - 2, // BC3_SRGB - 2, // BC7_SRGB - 0, // A4B4G4R4_UNORM - 2, // ASTC_2D_4X4_SRGB - 2, // ASTC_2D_8X8_SRGB - 2, // ASTC_2D_8X5_SRGB - 2, // ASTC_2D_5X4_SRGB - 2, // ASTC_2D_5X5_UNORM - 2, // ASTC_2D_5X5_SRGB - 2, // ASTC_2D_10X8_UNORM - 2, // ASTC_2D_10X8_SRGB - 2, // ASTC_2D_6X6_UNORM - 2, // ASTC_2D_6X6_SRGB - 2, // ASTC_2D_10X10_UNORM - 2, // ASTC_2D_10X10_SRGB - 2, // ASTC_2D_12X12_UNORM - 2, // ASTC_2D_12X12_SRGB - 2, // ASTC_2D_8X6_UNORM - 2, // ASTC_2D_8X6_SRGB - 2, // ASTC_2D_6X5_UNORM - 2, // ASTC_2D_6X5_SRGB - 0, // E5B9G9R9_FLOAT - 0, // D32_FLOAT - 0, // D16_UNORM - 0, // D24_UNORM_S8_UINT - 0, // S8_UINT_D24_UNORM - 0, // D32_FLOAT_S8_UINT -}}; - -/** - * Gets the compression factor for the specified PixelFormat. This applies to just the - * "compressed width" and "compressed height", not the overall compression factor of a - * compressed image. This is used for maintaining proper surface sizes for compressed - * texture formats. - */ -inline constexpr u32 GetCompressionFactorShift(PixelFormat format) { - DEBUG_ASSERT(format != PixelFormat::Invalid); - DEBUG_ASSERT(static_cast(format) < compression_factor_shift_table.size()); - return compression_factor_shift_table[static_cast(format)]; -} - -inline constexpr u32 GetCompressionFactor(PixelFormat format) { - return 1U << GetCompressionFactorShift(format); -} - -constexpr std::array block_width_table = {{ +constexpr std::array BLOCK_WIDTH_TABLE = {{ 1, // A8B8G8R8_UNORM 1, // A8B8G8R8_SNORM 1, // A8B8G8R8_SINT @@ -344,15 +234,12 @@ constexpr std::array block_width_table = {{ 1, // D32_FLOAT_S8_UINT }}; -static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { - if (format == PixelFormat::Invalid) - return 0; - - ASSERT(static_cast(format) < block_width_table.size()); - return block_width_table[static_cast(format)]; +constexpr u32 DefaultBlockWidth(PixelFormat format) { + ASSERT(static_cast(format) < BLOCK_WIDTH_TABLE.size()); + return BLOCK_WIDTH_TABLE[static_cast(format)]; } -constexpr std::array block_height_table = {{ +constexpr std::array BLOCK_HEIGHT_TABLE = {{ 1, // A8B8G8R8_UNORM 1, // A8B8G8R8_SNORM 1, // A8B8G8R8_SINT @@ -446,15 +333,12 @@ constexpr std::array block_height_table = {{ 1, // D32_FLOAT_S8_UINT }}; -static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { - if (format == PixelFormat::Invalid) - return 0; - - ASSERT(static_cast(format) < block_height_table.size()); - return block_height_table[static_cast(format)]; +constexpr u32 DefaultBlockHeight(PixelFormat format) { + ASSERT(static_cast(format) < BLOCK_HEIGHT_TABLE.size()); + return BLOCK_HEIGHT_TABLE[static_cast(format)]; } -constexpr std::array bpp_table = {{ +constexpr std::array BITS_PER_BLOCK_TABLE = {{ 32, // A8B8G8R8_UNORM 32, // A8B8G8R8_SNORM 32, // A8B8G8R8_SINT @@ -548,20 +432,14 @@ constexpr std::array bpp_table = {{ 64, // D32_FLOAT_S8_UINT }}; -static constexpr u32 GetFormatBpp(PixelFormat format) { - if (format == PixelFormat::Invalid) - return 0; - - ASSERT(static_cast(format) < bpp_table.size()); - return bpp_table[static_cast(format)]; +constexpr u32 BitsPerBlock(PixelFormat format) { + ASSERT(static_cast(format) < BITS_PER_BLOCK_TABLE.size()); + return BITS_PER_BLOCK_TABLE[static_cast(format)]; } /// Returns the sizer in bytes of the specified pixel format -static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { - if (pixel_format == PixelFormat::Invalid) { - return 0; - } - return GetFormatBpp(pixel_format) / CHAR_BIT; +constexpr u32 BytesPerBlock(PixelFormat pixel_format) { + return BitsPerBlock(pixel_format) / CHAR_BIT; } SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp new file mode 100644 index 000000000..a4fc1184b --- /dev/null +++ b/src/video_core/texture_cache/accelerated_swizzle.cpp @@ -0,0 +1,70 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/alignment.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/accelerated_swizzle.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/decoders.h" + +namespace VideoCommon::Accelerated { + +using Tegra::Texture::GOB_SIZE_SHIFT; +using Tegra::Texture::GOB_SIZE_X; +using Tegra::Texture::GOB_SIZE_X_SHIFT; +using Tegra::Texture::GOB_SIZE_Y_SHIFT; +using VideoCore::Surface::BytesPerBlock; + +BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle, + const ImageInfo& info) { + const Extent3D block = swizzle.block; + const Extent3D num_tiles = swizzle.num_tiles; + const u32 bytes_per_block = BytesPerBlock(info.format); + const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); + const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; + const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); + return BlockLinearSwizzle2DParams{ + .origin{0, 0, 0}, + .destination{0, 0, 0}, + .bytes_per_block_log2 = static_cast(std::countr_zero(bytes_per_block)), + .layer_stride = info.layer_stride, + .block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth), + .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, + .block_height = block.height, + .block_height_mask = (1U << block.height) - 1, + }; +} + +BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle, + const ImageInfo& info) { + const Extent3D block = swizzle.block; + const Extent3D num_tiles = swizzle.num_tiles; + const u32 bytes_per_block = BytesPerBlock(info.format); + const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); + const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; + + const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT; + const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth); + const u32 slice_size = + Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size; + return BlockLinearSwizzle3DParams{ + .origin{0, 0, 0}, + .destination{0, 0, 0}, + .bytes_per_block_log2 = static_cast(std::countr_zero(bytes_per_block)), + .slice_size = slice_size, + .block_size = block_size, + .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, + .block_height = block.height, + .block_height_mask = (1U << block.height) - 1, + .block_depth = block.depth, + .block_depth_mask = (1U << block.depth) - 1, + }; +} + +} // namespace VideoCommon::Accelerated \ No newline at end of file diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h new file mode 100644 index 000000000..6ec5c78c4 --- /dev/null +++ b/src/video_core/texture_cache/accelerated_swizzle.h @@ -0,0 +1,45 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon::Accelerated { + +struct BlockLinearSwizzle2DParams { + std::array origin; + std::array destination; + u32 bytes_per_block_log2; + u32 layer_stride; + u32 block_size; + u32 x_shift; + u32 block_height; + u32 block_height_mask; +}; + +struct BlockLinearSwizzle3DParams { + std::array origin; + std::array destination; + u32 bytes_per_block_log2; + u32 slice_size; + u32 block_size; + u32 x_shift; + u32 block_height; + u32 block_height_mask; + u32 block_depth; + u32 block_depth_mask; +}; + +[[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams( + const SwizzleParameters& swizzle, const ImageInfo& info); + +[[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams( + const SwizzleParameters& swizzle, const ImageInfo& info); + +} // namespace VideoCommon::Accelerated diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h deleted file mode 100644 index 5b475fe06..000000000 --- a/src/video_core/texture_cache/copy_params.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace VideoCommon { - -struct CopyParams { - constexpr CopyParams(u32 source_x_, u32 source_y_, u32 source_z_, u32 dest_x_, u32 dest_y_, - u32 dest_z_, u32 source_level_, u32 dest_level_, u32 width_, u32 height_, - u32 depth_) - : source_x{source_x_}, source_y{source_y_}, source_z{source_z_}, dest_x{dest_x_}, - dest_y{dest_y_}, dest_z{dest_z_}, source_level{source_level_}, - dest_level{dest_level_}, width{width_}, height{height_}, depth{depth_} {} - - constexpr CopyParams(u32 width_, u32 height_, u32 depth_, u32 level_) - : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level_}, - dest_level{level_}, width{width_}, height{height_}, depth{depth_} {} - - u32 source_x; - u32 source_y; - u32 source_z; - u32 dest_x; - u32 dest_y; - u32 dest_z; - u32 source_level; - u32 dest_level; - u32 width; - u32 height; - u32 depth; -}; - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp new file mode 100644 index 000000000..017327975 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc4.cpp @@ -0,0 +1,97 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/texture_cache/decode_bc4.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt +[[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) { + const u32 code_offset = 16 + 3 * (4 * y + x); + const u32 code = (bits >> code_offset) & 7; + const u32 red0 = (bits >> 0) & 0xff; + const u32 red1 = (bits >> 8) & 0xff; + if (red0 > red1) { + switch (code) { + case 0: + return red0; + case 1: + return red1; + case 2: + return (6 * red0 + 1 * red1) / 7; + case 3: + return (5 * red0 + 2 * red1) / 7; + case 4: + return (4 * red0 + 3 * red1) / 7; + case 5: + return (3 * red0 + 4 * red1) / 7; + case 6: + return (2 * red0 + 5 * red1) / 7; + case 7: + return (1 * red0 + 6 * red1) / 7; + } + } else { + switch (code) { + case 0: + return red0; + case 1: + return red1; + case 2: + return (4 * red0 + 1 * red1) / 5; + case 3: + return (3 * red0 + 2 * red1) / 5; + case 4: + return (2 * red0 + 3 * red1) / 5; + case 5: + return (1 * red0 + 4 * red1) / 5; + case 6: + return 0; + case 7: + return 0xff; + } + } + return 0; +} + +void DecompressBC4(std::span input, Extent3D extent, std::span output) { + UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width); + UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height); + static constexpr u32 BLOCK_SIZE = 4; + size_t input_offset = 0; + for (u32 slice = 0; slice < extent.depth; ++slice) { + for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) { + for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) { + u64 bits; + std::memcpy(&bits, &input[input_offset], sizeof(bits)); + input_offset += sizeof(bits); + + for (u32 y = 0; y < BLOCK_SIZE; ++y) { + for (u32 x = 0; x < BLOCK_SIZE; ++x) { + const u32 linear_z = slice; + const u32 linear_y = block_y * BLOCK_SIZE + y; + const u32 linear_x = block_x * BLOCK_SIZE + x; + const u32 offset_z = linear_z * extent.width * extent.height; + const u32 offset_y = linear_y * extent.width; + const u32 offset_x = linear_x; + const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL; + const u32 color = DecompressBlock(bits, x, y); + output[output_offset + 0] = static_cast(color); + output[output_offset + 1] = 0; + output[output_offset + 2] = 0; + output[output_offset + 3] = 0xff; + } + } + } + } + } +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h new file mode 100644 index 000000000..63fb23508 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc4.h @@ -0,0 +1,16 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +void DecompressBC4(std::span data, Extent3D extent, std::span output); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h new file mode 100644 index 000000000..3a03b786f --- /dev/null +++ b/src/video_core/texture_cache/descriptor_table.h @@ -0,0 +1,82 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "common/logging/log.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" + +namespace VideoCommon { + +template +class DescriptorTable { +public: + explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {} + + [[nodiscard]] bool Synchornize(GPUVAddr gpu_addr, u32 limit) { + [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { + return false; + } + Refresh(gpu_addr, limit); + return true; + } + + void Invalidate() noexcept { + std::ranges::fill(read_descriptors, 0); + } + + [[nodiscard]] std::pair Read(u32 index) { + DEBUG_ASSERT(index <= current_limit); + const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor); + std::pair result; + gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor)); + if (IsDescriptorRead(index)) { + result.second = result.first != descriptors[index]; + } else { + MarkDescriptorAsRead(index); + result.second = true; + } + if (result.second) { + descriptors[index] = result.first; + } + return result; + } + + [[nodiscard]] u32 Limit() const noexcept { + return current_limit; + } + +private: + void Refresh(GPUVAddr gpu_addr, u32 limit) { + current_gpu_addr = gpu_addr; + current_limit = limit; + + const size_t num_descriptors = static_cast(limit) + 1; + read_descriptors.clear(); + read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0); + descriptors.resize(num_descriptors); + } + + void MarkDescriptorAsRead(u32 index) noexcept { + read_descriptors[index / 64] |= 1ULL << (index % 64); + } + + [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept { + return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0; + } + + Tegra::MemoryManager& gpu_memory; + GPUVAddr current_gpu_addr{}; + u32 current_limit{}; + std::vector read_descriptors; + std::vector descriptors; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 7938d71eb..ddfb726fe 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/texture_cache/format_lookup_table.h" @@ -20,198 +19,207 @@ constexpr auto UNORM = ComponentType::UNORM; constexpr auto SINT = ComponentType::SINT; constexpr auto UINT = ComponentType::UINT; constexpr auto FLOAT = ComponentType::FLOAT; -constexpr bool C = false; // Normal color -constexpr bool S = true; // Srgb +constexpr bool LINEAR = false; +constexpr bool SRGB = true; -struct Table { - constexpr Table(TextureFormat texture_format_, bool is_srgb_, ComponentType red_component_, - ComponentType green_component_, ComponentType blue_component_, - ComponentType alpha_component_, PixelFormat pixel_format_) - : texture_format{texture_format_}, pixel_format{pixel_format_}, - red_component{red_component_}, green_component{green_component_}, - blue_component{blue_component_}, alpha_component{alpha_component_}, is_srgb{is_srgb_} {} +constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component, + ComponentType blue_component, ComponentType alpha_component, bool is_srgb) { + u32 hash = is_srgb ? 1 : 0; + hash |= static_cast(red_component) << 1; + hash |= static_cast(green_component) << 4; + hash |= static_cast(blue_component) << 7; + hash |= static_cast(alpha_component) << 10; + hash |= static_cast(format) << 13; + return hash; +} - TextureFormat texture_format; - PixelFormat pixel_format; - ComponentType red_component; - ComponentType green_component; - ComponentType blue_component; - ComponentType alpha_component; - bool is_srgb; -}; -constexpr std::array DefinitionTable = {{ - {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM}, - {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM}, - {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT}, - {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT}, - {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB}, - - {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM}, - - {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM}, - {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT}, - - {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM}, - - {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM}, - - {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM}, - {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM}, - {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT}, - {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT}, - - {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM}, - {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM}, - {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT}, - {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT}, - - {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM}, - {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM}, - {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT}, - {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT}, - {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT}, - - {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT}, - {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM}, - {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM}, - {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT}, - {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT}, - - {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT}, - {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM}, - {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM}, - {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT}, - {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT}, - - {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT}, - - {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT}, - {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT}, - {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT}, - - {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT}, - - {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT}, - {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT}, - {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT}, - - {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT}, - {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT}, - {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT}, - - {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT}, - - {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT}, - {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM}, - {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, - {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, - {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT}, - - {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM}, - {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB}, - - {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM}, - {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB}, - - {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM}, - {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB}, - - {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM}, - {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM}, - - {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM}, - {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM}, - - {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM}, - {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB}, - - {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT}, - {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT}, - - {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM}, - {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB}, - - {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM}, - {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB}, - - {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM}, - {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB}, - - {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM}, - {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB}, - - {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM}, - {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB}, - - {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM}, - {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB}, - - {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM}, - {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB}, - - {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM}, - {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB}, - - {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM}, - {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB}, - - {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM}, - {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB}, - - {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM}, - {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB}, -}}; +constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb = LINEAR) { + return Hash(format, component, component, component, component, is_srgb); +} } // Anonymous namespace -FormatLookupTable::FormatLookupTable() { - table.fill(static_cast(PixelFormat::Invalid)); - - for (const auto& entry : DefinitionTable) { - table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component, - entry.green_component, entry.blue_component, entry.alpha_component)] = - static_cast(entry.pixel_format); - } -} - -PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb, - ComponentType red_component, - ComponentType green_component, - ComponentType blue_component, - ComponentType alpha_component) const noexcept { - const auto pixel_format = static_cast(table[CalculateIndex( - format, is_srgb, red_component, green_component, blue_component, alpha_component)]); - // [[likely]] - if (pixel_format != PixelFormat::Invalid) { - return pixel_format; +PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green, + ComponentType blue, ComponentType alpha, + bool is_srgb) noexcept { + switch (Hash(format, red, green, blue, alpha, is_srgb)) { + case Hash(TextureFormat::A8R8G8B8, UNORM): + return PixelFormat::A8B8G8R8_UNORM; + case Hash(TextureFormat::A8R8G8B8, SNORM): + return PixelFormat::A8B8G8R8_SNORM; + case Hash(TextureFormat::A8R8G8B8, UINT): + return PixelFormat::A8B8G8R8_UINT; + case Hash(TextureFormat::A8R8G8B8, SINT): + return PixelFormat::A8B8G8R8_SINT; + case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB): + return PixelFormat::A8B8G8R8_SRGB; + case Hash(TextureFormat::B5G6R5, UNORM): + return PixelFormat::B5G6R5_UNORM; + case Hash(TextureFormat::A2B10G10R10, UNORM): + return PixelFormat::A2B10G10R10_UNORM; + case Hash(TextureFormat::A2B10G10R10, UINT): + return PixelFormat::A2B10G10R10_UINT; + case Hash(TextureFormat::A1B5G5R5, UNORM): + return PixelFormat::A1B5G5R5_UNORM; + case Hash(TextureFormat::A4B4G4R4, UNORM): + return PixelFormat::A4B4G4R4_UNORM; + case Hash(TextureFormat::R8, UNORM): + return PixelFormat::R8_UNORM; + case Hash(TextureFormat::R8, SNORM): + return PixelFormat::R8_SNORM; + case Hash(TextureFormat::R8, UINT): + return PixelFormat::R8_UINT; + case Hash(TextureFormat::R8, SINT): + return PixelFormat::R8_SINT; + case Hash(TextureFormat::R8G8, UNORM): + return PixelFormat::R8G8_UNORM; + case Hash(TextureFormat::R8G8, SNORM): + return PixelFormat::R8G8_SNORM; + case Hash(TextureFormat::R8G8, UINT): + return PixelFormat::R8G8_UINT; + case Hash(TextureFormat::R8G8, SINT): + return PixelFormat::R8G8_SINT; + case Hash(TextureFormat::R16G16B16A16, FLOAT): + return PixelFormat::R16G16B16A16_FLOAT; + case Hash(TextureFormat::R16G16B16A16, UNORM): + return PixelFormat::R16G16B16A16_UNORM; + case Hash(TextureFormat::R16G16B16A16, SNORM): + return PixelFormat::R16G16B16A16_SNORM; + case Hash(TextureFormat::R16G16B16A16, UINT): + return PixelFormat::R16G16B16A16_UINT; + case Hash(TextureFormat::R16G16B16A16, SINT): + return PixelFormat::R16G16B16A16_SINT; + case Hash(TextureFormat::R16G16, FLOAT): + return PixelFormat::R16G16_FLOAT; + case Hash(TextureFormat::R16G16, UNORM): + return PixelFormat::R16G16_UNORM; + case Hash(TextureFormat::R16G16, SNORM): + return PixelFormat::R16G16_SNORM; + case Hash(TextureFormat::R16G16, UINT): + return PixelFormat::R16G16_UINT; + case Hash(TextureFormat::R16G16, SINT): + return PixelFormat::R16G16_SINT; + case Hash(TextureFormat::R16, FLOAT): + return PixelFormat::R16_FLOAT; + case Hash(TextureFormat::R16, UNORM): + return PixelFormat::R16_UNORM; + case Hash(TextureFormat::R16, SNORM): + return PixelFormat::R16_SNORM; + case Hash(TextureFormat::R16, UINT): + return PixelFormat::R16_UINT; + case Hash(TextureFormat::R16, SINT): + return PixelFormat::R16_SINT; + case Hash(TextureFormat::B10G11R11, FLOAT): + return PixelFormat::B10G11R11_FLOAT; + case Hash(TextureFormat::R32G32B32A32, FLOAT): + return PixelFormat::R32G32B32A32_FLOAT; + case Hash(TextureFormat::R32G32B32A32, UINT): + return PixelFormat::R32G32B32A32_UINT; + case Hash(TextureFormat::R32G32B32A32, SINT): + return PixelFormat::R32G32B32A32_SINT; + case Hash(TextureFormat::R32G32B32, FLOAT): + return PixelFormat::R32G32B32_FLOAT; + case Hash(TextureFormat::R32G32, FLOAT): + return PixelFormat::R32G32_FLOAT; + case Hash(TextureFormat::R32G32, UINT): + return PixelFormat::R32G32_UINT; + case Hash(TextureFormat::R32G32, SINT): + return PixelFormat::R32G32_SINT; + case Hash(TextureFormat::R32, FLOAT): + return PixelFormat::R32_FLOAT; + case Hash(TextureFormat::R32, UINT): + return PixelFormat::R32_UINT; + case Hash(TextureFormat::R32, SINT): + return PixelFormat::R32_SINT; + case Hash(TextureFormat::E5B9G9R9, FLOAT): + return PixelFormat::E5B9G9R9_FLOAT; + case Hash(TextureFormat::D32, FLOAT): + return PixelFormat::D32_FLOAT; + case Hash(TextureFormat::D16, UNORM): + return PixelFormat::D16_UNORM; + case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR): + return PixelFormat::S8_UINT_D24_UNORM; + case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR): + return PixelFormat::S8_UINT_D24_UNORM; + case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR): + return PixelFormat::D32_FLOAT_S8_UINT; + case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR): + return PixelFormat::BC1_RGBA_UNORM; + case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB): + return PixelFormat::BC1_RGBA_SRGB; + case Hash(TextureFormat::BC2, UNORM, LINEAR): + return PixelFormat::BC2_UNORM; + case Hash(TextureFormat::BC2, UNORM, SRGB): + return PixelFormat::BC2_SRGB; + case Hash(TextureFormat::BC3, UNORM, LINEAR): + return PixelFormat::BC3_UNORM; + case Hash(TextureFormat::BC3, UNORM, SRGB): + return PixelFormat::BC3_SRGB; + case Hash(TextureFormat::BC4, UNORM): + return PixelFormat::BC4_UNORM; + case Hash(TextureFormat::BC4, SNORM): + return PixelFormat::BC4_SNORM; + case Hash(TextureFormat::BC5, UNORM): + return PixelFormat::BC5_UNORM; + case Hash(TextureFormat::BC5, SNORM): + return PixelFormat::BC5_SNORM; + case Hash(TextureFormat::BC7, UNORM, LINEAR): + return PixelFormat::BC7_UNORM; + case Hash(TextureFormat::BC7, UNORM, SRGB): + return PixelFormat::BC7_SRGB; + case Hash(TextureFormat::BC6H_SFLOAT, FLOAT): + return PixelFormat::BC6H_SFLOAT; + case Hash(TextureFormat::BC6H_UFLOAT, FLOAT): + return PixelFormat::BC6H_UFLOAT; + case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR): + return PixelFormat::ASTC_2D_4X4_UNORM; + case Hash(TextureFormat::ASTC_2D_4X4, UNORM, SRGB): + return PixelFormat::ASTC_2D_4X4_SRGB; + case Hash(TextureFormat::ASTC_2D_5X4, UNORM, LINEAR): + return PixelFormat::ASTC_2D_5X4_UNORM; + case Hash(TextureFormat::ASTC_2D_5X4, UNORM, SRGB): + return PixelFormat::ASTC_2D_5X4_SRGB; + case Hash(TextureFormat::ASTC_2D_5X5, UNORM, LINEAR): + return PixelFormat::ASTC_2D_5X5_UNORM; + case Hash(TextureFormat::ASTC_2D_5X5, UNORM, SRGB): + return PixelFormat::ASTC_2D_5X5_SRGB; + case Hash(TextureFormat::ASTC_2D_8X8, UNORM, LINEAR): + return PixelFormat::ASTC_2D_8X8_UNORM; + case Hash(TextureFormat::ASTC_2D_8X8, UNORM, SRGB): + return PixelFormat::ASTC_2D_8X8_SRGB; + case Hash(TextureFormat::ASTC_2D_8X5, UNORM, LINEAR): + return PixelFormat::ASTC_2D_8X5_UNORM; + case Hash(TextureFormat::ASTC_2D_8X5, UNORM, SRGB): + return PixelFormat::ASTC_2D_8X5_SRGB; + case Hash(TextureFormat::ASTC_2D_10X8, UNORM, LINEAR): + return PixelFormat::ASTC_2D_10X8_UNORM; + case Hash(TextureFormat::ASTC_2D_10X8, UNORM, SRGB): + return PixelFormat::ASTC_2D_10X8_SRGB; + case Hash(TextureFormat::ASTC_2D_6X6, UNORM, LINEAR): + return PixelFormat::ASTC_2D_6X6_UNORM; + case Hash(TextureFormat::ASTC_2D_6X6, UNORM, SRGB): + return PixelFormat::ASTC_2D_6X6_SRGB; + case Hash(TextureFormat::ASTC_2D_10X10, UNORM, LINEAR): + return PixelFormat::ASTC_2D_10X10_UNORM; + case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB): + return PixelFormat::ASTC_2D_10X10_SRGB; + case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR): + return PixelFormat::ASTC_2D_12X12_UNORM; + case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB): + return PixelFormat::ASTC_2D_12X12_SRGB; + case Hash(TextureFormat::ASTC_2D_8X6, UNORM, LINEAR): + return PixelFormat::ASTC_2D_8X6_UNORM; + case Hash(TextureFormat::ASTC_2D_8X6, UNORM, SRGB): + return PixelFormat::ASTC_2D_8X6_SRGB; + case Hash(TextureFormat::ASTC_2D_6X5, UNORM, LINEAR): + return PixelFormat::ASTC_2D_6X5_UNORM; + case Hash(TextureFormat::ASTC_2D_6X5, UNORM, SRGB): + return PixelFormat::ASTC_2D_6X5_SRGB; } UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}", - static_cast(format), is_srgb, static_cast(red_component), - static_cast(green_component), static_cast(blue_component), - static_cast(alpha_component)); + static_cast(format), is_srgb, static_cast(red), + static_cast(green), static_cast(blue), static_cast(alpha)); return PixelFormat::A8B8G8R8_UNORM; } -void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component, - ComponentType green_component, ComponentType blue_component, - ComponentType alpha_component, PixelFormat pixel_format) {} - -std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb, - ComponentType red_component, - ComponentType green_component, - ComponentType blue_component, - ComponentType alpha_component) noexcept { - const auto format_index = static_cast(format); - const auto red_index = static_cast(red_component); - const auto green_index = static_cast(green_component); - const auto blue_index = static_cast(blue_component); - const auto alpha_index = static_cast(alpha_component); - const std::size_t srgb_index = is_srgb ? 1 : 0; - - return format_index * PerFormat + - srgb_index * PerComponent * PerComponent * PerComponent * PerComponent + - alpha_index * PerComponent * PerComponent * PerComponent + - blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index; -} - } // namespace VideoCommon diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h index aa77e0a5a..729533999 100644 --- a/src/video_core/texture_cache/format_lookup_table.h +++ b/src/video_core/texture_cache/format_lookup_table.h @@ -4,48 +4,14 @@ #pragma once -#include -#include #include "video_core/surface.h" #include "video_core/textures/texture.h" namespace VideoCommon { -class FormatLookupTable { -public: - explicit FormatLookupTable(); - - VideoCore::Surface::PixelFormat GetPixelFormat( - Tegra::Texture::TextureFormat format, bool is_srgb, - Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component, - Tegra::Texture::ComponentType blue_component, - Tegra::Texture::ComponentType alpha_component) const noexcept; - -private: - static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits::max()); - - static constexpr std::size_t NumTextureFormats = 128; - - static constexpr std::size_t PerComponent = 8; - static constexpr std::size_t PerComponents2 = PerComponent * PerComponent; - static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent; - static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent; - static constexpr std::size_t PerFormat = PerComponents4 * 2; - - static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb, - Tegra::Texture::ComponentType red_component, - Tegra::Texture::ComponentType green_component, - Tegra::Texture::ComponentType blue_component, - Tegra::Texture::ComponentType alpha_component) noexcept; - - void Set(Tegra::Texture::TextureFormat format, bool is_srgb, - Tegra::Texture::ComponentType red_component, - Tegra::Texture::ComponentType green_component, - Tegra::Texture::ComponentType blue_component, - Tegra::Texture::ComponentType alpha_component, - VideoCore::Surface::PixelFormat pixel_format); - - std::array table; -}; +VideoCore::Surface::PixelFormat PixelFormatFromTextureInfo( + Tegra::Texture::TextureFormat format, Tegra::Texture::ComponentType red_component, + Tegra::Texture::ComponentType green_component, Tegra::Texture::ComponentType blue_component, + Tegra::Texture::ComponentType alpha_component, bool is_srgb) noexcept; } // namespace VideoCommon diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp new file mode 100644 index 000000000..d10ba4ccd --- /dev/null +++ b/src/video_core/texture_cache/formatter.cpp @@ -0,0 +1,95 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/render_targets.h" + +namespace VideoCommon { + +std::string Name(const ImageBase& image) { + const GPUVAddr gpu_addr = image.gpu_addr; + const ImageInfo& info = image.info; + const u32 width = info.size.width; + const u32 height = info.size.height; + const u32 depth = info.size.depth; + const u32 num_layers = image.info.resources.layers; + const u32 num_levels = image.info.resources.levels; + std::string resource; + if (num_layers > 1) { + resource += fmt::format(":L{}", num_layers); + } + if (num_levels > 1) { + resource += fmt::format(":M{}", num_levels); + } + switch (image.info.type) { + case ImageType::e1D: + return fmt::format("Image 1D 0x{:x} {}{}", gpu_addr, width, resource); + case ImageType::e2D: + return fmt::format("Image 2D 0x{:x} {}x{}{}", gpu_addr, width, height, resource); + case ImageType::e3D: + return fmt::format("Image 2D 0x{:x} {}x{}x{}{}", gpu_addr, width, height, depth, resource); + case ImageType::Linear: + return fmt::format("Image Linear 0x{:x} {}x{}", gpu_addr, width, height); + case ImageType::Buffer: + return fmt::format("Buffer 0x{:x} {}", image.gpu_addr, image.info.size.width); + } + return "Invalid"; +} + +std::string Name(const ImageViewBase& image_view, std::optional type) { + const u32 width = image_view.size.width; + const u32 height = image_view.size.height; + const u32 depth = image_view.size.depth; + const u32 num_levels = image_view.range.extent.levels; + const u32 num_layers = image_view.range.extent.layers; + + const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; + switch (type.value_or(image_view.type)) { + case ImageViewType::e1D: + return fmt::format("ImageView 1D {}{}", width, level); + case ImageViewType::e2D: + return fmt::format("ImageView 2D {}x{}{}", width, height, level); + case ImageViewType::Cube: + return fmt::format("ImageView Cube {}x{}{}", width, height, level); + case ImageViewType::e3D: + return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level); + case ImageViewType::e1DArray: + return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers); + case ImageViewType::e2DArray: + return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers); + case ImageViewType::CubeArray: + return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers); + case ImageViewType::Rect: + return fmt::format("ImageView Rect {}x{}{}", width, height, level); + case ImageViewType::Buffer: + return fmt::format("BufferView {}", width); + } + return "Invalid"; +} + +std::string Name(const RenderTargets& render_targets) { + std::string_view debug_prefix; + const auto num_color = std::ranges::count_if( + render_targets.color_buffer_ids, [](ImageViewId id) { return static_cast(id); }); + if (render_targets.depth_buffer_id) { + debug_prefix = num_color > 0 ? "R" : "Z"; + } else { + debug_prefix = num_color > 0 ? "C" : "X"; + } + const Extent2D size = render_targets.size; + if (num_color > 0) { + return fmt::format("Framebuffer {}{} {}x{}", debug_prefix, num_color, size.width, + size.height); + } else { + return fmt::format("Framebuffer {} {}x{}", debug_prefix, size.width, size.height); + } +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h new file mode 100644 index 000000000..a48413983 --- /dev/null +++ b/src/video_core/texture_cache/formatter.h @@ -0,0 +1,263 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" + +template <> +struct fmt::formatter : fmt::formatter { + template + auto format(VideoCore::Surface::PixelFormat format, FormatContext& ctx) { + using VideoCore::Surface::PixelFormat; + const string_view name = [format] { + switch (format) { + case PixelFormat::A8B8G8R8_UNORM: + return "A8B8G8R8_UNORM"; + case PixelFormat::A8B8G8R8_SNORM: + return "A8B8G8R8_SNORM"; + case PixelFormat::A8B8G8R8_SINT: + return "A8B8G8R8_SINT"; + case PixelFormat::A8B8G8R8_UINT: + return "A8B8G8R8_UINT"; + case PixelFormat::R5G6B5_UNORM: + return "R5G6B5_UNORM"; + case PixelFormat::B5G6R5_UNORM: + return "B5G6R5_UNORM"; + case PixelFormat::A1R5G5B5_UNORM: + return "A1R5G5B5_UNORM"; + case PixelFormat::A2B10G10R10_UNORM: + return "A2B10G10R10_UNORM"; + case PixelFormat::A2B10G10R10_UINT: + return "A2B10G10R10_UINT"; + case PixelFormat::A1B5G5R5_UNORM: + return "A1B5G5R5_UNORM"; + case PixelFormat::R8_UNORM: + return "R8_UNORM"; + case PixelFormat::R8_SNORM: + return "R8_SNORM"; + case PixelFormat::R8_SINT: + return "R8_SINT"; + case PixelFormat::R8_UINT: + return "R8_UINT"; + case PixelFormat::R16G16B16A16_FLOAT: + return "R16G16B16A16_FLOAT"; + case PixelFormat::R16G16B16A16_UNORM: + return "R16G16B16A16_UNORM"; + case PixelFormat::R16G16B16A16_SNORM: + return "R16G16B16A16_SNORM"; + case PixelFormat::R16G16B16A16_SINT: + return "R16G16B16A16_SINT"; + case PixelFormat::R16G16B16A16_UINT: + return "R16G16B16A16_UINT"; + case PixelFormat::B10G11R11_FLOAT: + return "B10G11R11_FLOAT"; + case PixelFormat::R32G32B32A32_UINT: + return "R32G32B32A32_UINT"; + case PixelFormat::BC1_RGBA_UNORM: + return "BC1_RGBA_UNORM"; + case PixelFormat::BC2_UNORM: + return "BC2_UNORM"; + case PixelFormat::BC3_UNORM: + return "BC3_UNORM"; + case PixelFormat::BC4_UNORM: + return "BC4_UNORM"; + case PixelFormat::BC4_SNORM: + return "BC4_SNORM"; + case PixelFormat::BC5_UNORM: + return "BC5_UNORM"; + case PixelFormat::BC5_SNORM: + return "BC5_SNORM"; + case PixelFormat::BC7_UNORM: + return "BC7_UNORM"; + case PixelFormat::BC6H_UFLOAT: + return "BC6H_UFLOAT"; + case PixelFormat::BC6H_SFLOAT: + return "BC6H_SFLOAT"; + case PixelFormat::ASTC_2D_4X4_UNORM: + return "ASTC_2D_4X4_UNORM"; + case PixelFormat::B8G8R8A8_UNORM: + return "B8G8R8A8_UNORM"; + case PixelFormat::R32G32B32A32_FLOAT: + return "R32G32B32A32_FLOAT"; + case PixelFormat::R32G32B32A32_SINT: + return "R32G32B32A32_SINT"; + case PixelFormat::R32G32_FLOAT: + return "R32G32_FLOAT"; + case PixelFormat::R32G32_SINT: + return "R32G32_SINT"; + case PixelFormat::R32_FLOAT: + return "R32_FLOAT"; + case PixelFormat::R16_FLOAT: + return "R16_FLOAT"; + case PixelFormat::R16_UNORM: + return "R16_UNORM"; + case PixelFormat::R16_SNORM: + return "R16_SNORM"; + case PixelFormat::R16_UINT: + return "R16_UINT"; + case PixelFormat::R16_SINT: + return "R16_SINT"; + case PixelFormat::R16G16_UNORM: + return "R16G16_UNORM"; + case PixelFormat::R16G16_FLOAT: + return "R16G16_FLOAT"; + case PixelFormat::R16G16_UINT: + return "R16G16_UINT"; + case PixelFormat::R16G16_SINT: + return "R16G16_SINT"; + case PixelFormat::R16G16_SNORM: + return "R16G16_SNORM"; + case PixelFormat::R32G32B32_FLOAT: + return "R32G32B32_FLOAT"; + case PixelFormat::A8B8G8R8_SRGB: + return "A8B8G8R8_SRGB"; + case PixelFormat::R8G8_UNORM: + return "R8G8_UNORM"; + case PixelFormat::R8G8_SNORM: + return "R8G8_SNORM"; + case PixelFormat::R8G8_SINT: + return "R8G8_SINT"; + case PixelFormat::R8G8_UINT: + return "R8G8_UINT"; + case PixelFormat::R32G32_UINT: + return "R32G32_UINT"; + case PixelFormat::R16G16B16X16_FLOAT: + return "R16G16B16X16_FLOAT"; + case PixelFormat::R32_UINT: + return "R32_UINT"; + case PixelFormat::R32_SINT: + return "R32_SINT"; + case PixelFormat::ASTC_2D_8X8_UNORM: + return "ASTC_2D_8X8_UNORM"; + case PixelFormat::ASTC_2D_8X5_UNORM: + return "ASTC_2D_8X5_UNORM"; + case PixelFormat::ASTC_2D_5X4_UNORM: + return "ASTC_2D_5X4_UNORM"; + case PixelFormat::B8G8R8A8_SRGB: + return "B8G8R8A8_SRGB"; + case PixelFormat::BC1_RGBA_SRGB: + return "BC1_RGBA_SRGB"; + case PixelFormat::BC2_SRGB: + return "BC2_SRGB"; + case PixelFormat::BC3_SRGB: + return "BC3_SRGB"; + case PixelFormat::BC7_SRGB: + return "BC7_SRGB"; + case PixelFormat::A4B4G4R4_UNORM: + return "A4B4G4R4_UNORM"; + case PixelFormat::ASTC_2D_4X4_SRGB: + return "ASTC_2D_4X4_SRGB"; + case PixelFormat::ASTC_2D_8X8_SRGB: + return "ASTC_2D_8X8_SRGB"; + case PixelFormat::ASTC_2D_8X5_SRGB: + return "ASTC_2D_8X5_SRGB"; + case PixelFormat::ASTC_2D_5X4_SRGB: + return "ASTC_2D_5X4_SRGB"; + case PixelFormat::ASTC_2D_5X5_UNORM: + return "ASTC_2D_5X5_UNORM"; + case PixelFormat::ASTC_2D_5X5_SRGB: + return "ASTC_2D_5X5_SRGB"; + case PixelFormat::ASTC_2D_10X8_UNORM: + return "ASTC_2D_10X8_UNORM"; + case PixelFormat::ASTC_2D_10X8_SRGB: + return "ASTC_2D_10X8_SRGB"; + case PixelFormat::ASTC_2D_6X6_UNORM: + return "ASTC_2D_6X6_UNORM"; + case PixelFormat::ASTC_2D_6X6_SRGB: + return "ASTC_2D_6X6_SRGB"; + case PixelFormat::ASTC_2D_10X10_UNORM: + return "ASTC_2D_10X10_UNORM"; + case PixelFormat::ASTC_2D_10X10_SRGB: + return "ASTC_2D_10X10_SRGB"; + case PixelFormat::ASTC_2D_12X12_UNORM: + return "ASTC_2D_12X12_UNORM"; + case PixelFormat::ASTC_2D_12X12_SRGB: + return "ASTC_2D_12X12_SRGB"; + case PixelFormat::ASTC_2D_8X6_UNORM: + return "ASTC_2D_8X6_UNORM"; + case PixelFormat::ASTC_2D_8X6_SRGB: + return "ASTC_2D_8X6_SRGB"; + case PixelFormat::ASTC_2D_6X5_UNORM: + return "ASTC_2D_6X5_UNORM"; + case PixelFormat::ASTC_2D_6X5_SRGB: + return "ASTC_2D_6X5_SRGB"; + case PixelFormat::E5B9G9R9_FLOAT: + return "E5B9G9R9_FLOAT"; + case PixelFormat::D32_FLOAT: + return "D32_FLOAT"; + case PixelFormat::D16_UNORM: + return "D16_UNORM"; + case PixelFormat::D24_UNORM_S8_UINT: + return "D24_UNORM_S8_UINT"; + case PixelFormat::S8_UINT_D24_UNORM: + return "S8_UINT_D24_UNORM"; + case PixelFormat::D32_FLOAT_S8_UINT: + return "D32_FLOAT_S8_UINT"; + case PixelFormat::MaxDepthStencilFormat: + case PixelFormat::Invalid: + return "Invalid"; + } + return "Invalid"; + }(); + return formatter::format(name, ctx); + } +}; + +template <> +struct fmt::formatter : fmt::formatter { + template + auto format(VideoCommon::ImageType type, FormatContext& ctx) { + const string_view name = [type] { + using VideoCommon::ImageType; + switch (type) { + case ImageType::e1D: + return "1D"; + case ImageType::e2D: + return "2D"; + case ImageType::e3D: + return "3D"; + case ImageType::Linear: + return "Linear"; + case ImageType::Buffer: + return "Buffer"; + } + return "Invalid"; + }(); + return formatter::format(name, ctx); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(fmt::format_parse_context& ctx) { + return ctx.begin(); + } + + template + auto format(const VideoCommon::Extent3D& extent, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{{{}, {}, {}}}", extent.width, extent.height, + extent.depth); + } +}; + +namespace VideoCommon { + +struct ImageBase; +struct ImageViewBase; +struct RenderTargets; + +[[nodiscard]] std::string Name(const ImageBase& image); + +[[nodiscard]] std::string Name(const ImageViewBase& image_view, + std::optional type = std::nullopt); + +[[nodiscard]] std::string Name(const RenderTargets& render_targets); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp new file mode 100644 index 000000000..448a05fcc --- /dev/null +++ b/src/video_core/texture_cache/image_base.cpp @@ -0,0 +1,216 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include + +#include "common/common_types.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/util.h" + +namespace VideoCommon { + +using VideoCore::Surface::DefaultBlockHeight; +using VideoCore::Surface::DefaultBlockWidth; + +namespace { +/// Returns the base layer and mip level offset +[[nodiscard]] std::pair LayerMipOffset(s32 diff, u32 layer_stride) { + if (layer_stride == 0) { + return {0, diff}; + } else { + return {diff / layer_stride, diff % layer_stride}; + } +} + +[[nodiscard]] bool ValidateLayers(const SubresourceLayers& layers, const ImageInfo& info) { + return layers.base_level < info.resources.levels && + layers.base_layer + layers.num_layers <= info.resources.layers; +} + +[[nodiscard]] bool ValidateCopy(const ImageCopy& copy, const ImageInfo& dst, const ImageInfo& src) { + const Extent3D src_size = MipSize(src.size, copy.src_subresource.base_level); + const Extent3D dst_size = MipSize(dst.size, copy.dst_subresource.base_level); + if (!ValidateLayers(copy.src_subresource, src)) { + return false; + } + if (!ValidateLayers(copy.dst_subresource, dst)) { + return false; + } + if (copy.src_offset.x + copy.extent.width > src_size.width || + copy.src_offset.y + copy.extent.height > src_size.height || + copy.src_offset.z + copy.extent.depth > src_size.depth) { + return false; + } + if (copy.dst_offset.x + copy.extent.width > dst_size.width || + copy.dst_offset.y + copy.extent.height > dst_size.height || + copy.dst_offset.z + copy.extent.depth > dst_size.depth) { + return false; + } + return true; +} +} // Anonymous namespace + +ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) + : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, + unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, + converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_}, + cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes}, + mip_level_offsets{CalculateMipLevelOffsets(info)} { + if (info.type == ImageType::e3D) { + slice_offsets = CalculateSliceOffsets(info); + slice_subresources = CalculateSliceSubresources(info); + } +} + +std::optional ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { + if (other_addr < gpu_addr) { + // Subresource address can't be lower than the base + return std::nullopt; + } + const u32 diff = static_cast(other_addr - gpu_addr); + if (diff > guest_size_bytes) { + // This can happen when two CPU addresses are used for different GPU addresses + return std::nullopt; + } + if (info.type != ImageType::e3D) { + const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride); + const auto end = mip_level_offsets.begin() + info.resources.levels; + const auto it = std::find(mip_level_offsets.begin(), end, mip_offset); + if (layer > info.resources.layers || it == end) { + return std::nullopt; + } + return SubresourceBase{ + .level = static_cast(std::distance(mip_level_offsets.begin(), it)), + .layer = layer, + }; + } else { + // TODO: Consider using binary_search after a threshold + const auto it = std::ranges::find(slice_offsets, diff); + if (it == slice_offsets.cend()) { + return std::nullopt; + } + return slice_subresources[std::distance(slice_offsets.begin(), it)]; + } +} + +ImageViewId ImageBase::FindView(const ImageViewInfo& view_info) const noexcept { + const auto it = std::ranges::find(image_view_infos, view_info); + if (it == image_view_infos.end()) { + return ImageViewId{}; + } + return image_view_ids[std::distance(image_view_infos.begin(), it)]; +} + +void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id) { + image_view_infos.push_back(view_info); + image_view_ids.push_back(image_view_id); +} + +void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { + static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; + ASSERT(lhs.info.type == rhs.info.type); + std::optional base; + if (lhs.info.type == ImageType::Linear) { + base = SubresourceBase{.level = 0, .layer = 0}; + } else { + base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS); + } + if (!base) { + LOG_ERROR(HW_GPU, "Image alias should have been flipped"); + return; + } + const PixelFormat lhs_format = lhs.info.format; + const PixelFormat rhs_format = rhs.info.format; + const Extent2D lhs_block{ + .width = DefaultBlockWidth(lhs_format), + .height = DefaultBlockHeight(lhs_format), + }; + const Extent2D rhs_block{ + .width = DefaultBlockWidth(rhs_format), + .height = DefaultBlockHeight(rhs_format), + }; + const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1; + const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1; + if (is_lhs_compressed && is_rhs_compressed) { + LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented"); + return; + } + const s32 lhs_mips = lhs.info.resources.levels; + const s32 rhs_mips = rhs.info.resources.levels; + const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips); + AliasedImage lhs_alias; + AliasedImage rhs_alias; + lhs_alias.id = rhs_id; + rhs_alias.id = lhs_id; + lhs_alias.copies.reserve(num_mips); + rhs_alias.copies.reserve(num_mips); + for (s32 mip_level = 0; mip_level < num_mips; ++mip_level) { + Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level); + Extent3D rhs_size = MipSize(rhs.info.size, mip_level); + if (is_lhs_compressed) { + lhs_size.width /= lhs_block.width; + lhs_size.height /= lhs_block.height; + } + if (is_rhs_compressed) { + rhs_size.width /= rhs_block.width; + rhs_size.height /= rhs_block.height; + } + const Extent3D copy_size{ + .width = std::min(lhs_size.width, rhs_size.width), + .height = std::min(lhs_size.height, rhs_size.height), + .depth = std::min(lhs_size.depth, rhs_size.depth), + }; + if (copy_size.width == 0 || copy_size.height == 0) { + LOG_WARNING(HW_GPU, "Copy size is smaller than block size. Mip cannot be aliased."); + continue; + } + const bool is_lhs_3d = lhs.info.type == ImageType::e3D; + const bool is_rhs_3d = rhs.info.type == ImageType::e3D; + const Offset3D lhs_offset{0, 0, 0}; + const Offset3D rhs_offset{0, 0, is_rhs_3d ? base->layer : 0}; + const s32 lhs_layers = is_lhs_3d ? 1 : lhs.info.resources.layers - base->layer; + const s32 rhs_layers = is_rhs_3d ? 1 : rhs.info.resources.layers; + const s32 num_layers = std::min(lhs_layers, rhs_layers); + const SubresourceLayers lhs_subresource{ + .base_level = mip_level, + .base_layer = 0, + .num_layers = num_layers, + }; + const SubresourceLayers rhs_subresource{ + .base_level = base->level + mip_level, + .base_layer = is_rhs_3d ? 0 : base->layer, + .num_layers = num_layers, + }; + [[maybe_unused]] const ImageCopy& to_lhs_copy = lhs_alias.copies.emplace_back(ImageCopy{ + .src_subresource = lhs_subresource, + .dst_subresource = rhs_subresource, + .src_offset = lhs_offset, + .dst_offset = rhs_offset, + .extent = copy_size, + }); + [[maybe_unused]] const ImageCopy& to_rhs_copy = rhs_alias.copies.emplace_back(ImageCopy{ + .src_subresource = rhs_subresource, + .dst_subresource = lhs_subresource, + .src_offset = rhs_offset, + .dst_offset = lhs_offset, + .extent = copy_size, + }); + ASSERT_MSG(ValidateCopy(to_lhs_copy, lhs.info, rhs.info), "Invalid RHS to LHS copy"); + ASSERT_MSG(ValidateCopy(to_rhs_copy, rhs.info, lhs.info), "Invalid LHS to RHS copy"); + } + ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty()); + if (lhs_alias.copies.empty()) { + return; + } + lhs.aliased_images.push_back(std::move(lhs_alias)); + rhs.aliased_images.push_back(std::move(rhs_alias)); +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h new file mode 100644 index 000000000..b7f3b7e43 --- /dev/null +++ b/src/video_core/texture_cache/image_base.h @@ -0,0 +1,83 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +enum class ImageFlagBits : u32 { + AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU + Converted = 1 << 1, ///< Guest format is not supported natively and it has to be converted + CpuModified = 1 << 2, ///< Contents have been modified from the CPU + GpuModified = 1 << 3, ///< Contents have been modified from the GPU + Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU JIT + Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted + Registered = 1 << 6, ///< True when the image is registered + Picked = 1 << 7, ///< Temporary flag to mark the image as picked +}; +DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) + +struct ImageViewInfo; + +struct AliasedImage { + std::vector copies; + ImageId id; +}; + +struct ImageBase { + explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + + [[nodiscard]] std::optional TryFindBase(GPUVAddr other_addr) const noexcept; + + [[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept; + + void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id); + + [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { + const VAddr overlap_end = overlap_cpu_addr + overlap_size; + return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; + } + + ImageInfo info; + + u32 guest_size_bytes = 0; + u32 unswizzled_size_bytes = 0; + u32 converted_size_bytes = 0; + ImageFlagBits flags = ImageFlagBits::CpuModified; + + GPUVAddr gpu_addr = 0; + VAddr cpu_addr = 0; + VAddr cpu_addr_end = 0; + + u64 modification_tick = 0; + u64 frame_tick = 0; + + std::array mip_level_offsets{}; + + std::vector image_view_infos; + std::vector image_view_ids; + + std::vector slice_offsets; + std::vector slice_subresources; + + std::vector aliased_images; +}; + +struct ImageAllocBase { + std::vector images; +}; + +void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp new file mode 100644 index 000000000..64fd7010a --- /dev/null +++ b/src/video_core/texture_cache/image_info.cpp @@ -0,0 +1,189 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/format_lookup_table.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/samples_helper.h" +#include "video_core/texture_cache/types.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using VideoCore::Surface::PixelFormat; + +ImageInfo::ImageInfo(const TICEntry& config) noexcept { + format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, + config.a_type, config.srgb_conversion); + num_samples = NumSamples(config.msaa_mode); + resources.levels = config.max_mip_level + 1; + if (config.IsPitchLinear()) { + pitch = config.Pitch(); + } else if (config.IsBlockLinear()) { + block = Extent3D{ + .width = config.block_width, + .height = config.block_height, + .depth = config.block_depth, + }; + } + tile_width_spacing = config.tile_width_spacing; + if (config.texture_type != TextureType::Texture2D && + config.texture_type != TextureType::Texture2DNoMipmap) { + ASSERT(!config.IsPitchLinear()); + } + switch (config.texture_type) { + case TextureType::Texture1D: + ASSERT(config.BaseLayer() == 0); + type = ImageType::e1D; + size.width = config.Width(); + break; + case TextureType::Texture1DArray: + UNIMPLEMENTED_IF(config.BaseLayer() != 0); + type = ImageType::e1D; + size.width = config.Width(); + resources.layers = config.Depth(); + break; + case TextureType::Texture2D: + case TextureType::Texture2DNoMipmap: + ASSERT(config.Depth() == 1); + type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D; + size.width = config.Width(); + size.height = config.Height(); + resources.layers = config.BaseLayer() + 1; + break; + case TextureType::Texture2DArray: + type = ImageType::e2D; + size.width = config.Width(); + size.height = config.Height(); + resources.layers = config.BaseLayer() + config.Depth(); + break; + case TextureType::TextureCubemap: + ASSERT(config.Depth() == 1); + type = ImageType::e2D; + size.width = config.Width(); + size.height = config.Height(); + resources.layers = config.BaseLayer() + 6; + break; + case TextureType::TextureCubeArray: + UNIMPLEMENTED_IF(config.load_store_hint != 0); + type = ImageType::e2D; + size.width = config.Width(); + size.height = config.Height(); + resources.layers = config.BaseLayer() + config.Depth() * 6; + break; + case TextureType::Texture3D: + ASSERT(config.BaseLayer() == 0); + type = ImageType::e3D; + size.width = config.Width(); + size.height = config.Height(); + size.depth = config.Depth(); + break; + case TextureType::Texture1DBuffer: + type = ImageType::Buffer; + size.width = config.Width(); + break; + default: + UNREACHABLE_MSG("Invalid texture_type={}", static_cast(config.texture_type.Value())); + break; + } + if (type != ImageType::Linear) { + // FIXME: Call this without passing *this + layer_stride = CalculateLayerStride(*this); + maybe_unaligned_layer_stride = CalculateLayerSize(*this); + } +} + +ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept { + const auto& rt = regs.rt[index]; + format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format); + if (rt.tile_mode.is_pitch_linear) { + ASSERT(rt.tile_mode.is_3d == 0); + type = ImageType::Linear; + pitch = rt.width; + size = Extent3D{ + .width = pitch / BytesPerBlock(format), + .height = rt.height, + .depth = 1, + }; + return; + } + size.width = rt.width; + size.height = rt.height; + layer_stride = rt.layer_stride * 4; + maybe_unaligned_layer_stride = layer_stride; + num_samples = NumSamples(regs.multisample_mode); + block = Extent3D{ + .width = rt.tile_mode.block_width, + .height = rt.tile_mode.block_height, + .depth = rt.tile_mode.block_depth, + }; + if (rt.tile_mode.is_3d) { + type = ImageType::e3D; + size.depth = rt.depth; + } else { + type = ImageType::e2D; + resources.layers = rt.depth; + } +} + +ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { + format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format); + size.width = regs.zeta_width; + size.height = regs.zeta_height; + resources.levels = 1; + layer_stride = regs.zeta.layer_stride * 4; + maybe_unaligned_layer_stride = layer_stride; + num_samples = NumSamples(regs.multisample_mode); + block = Extent3D{ + .width = regs.zeta.tile_mode.block_width, + .height = regs.zeta.tile_mode.block_height, + .depth = regs.zeta.tile_mode.block_depth, + }; + if (regs.zeta.tile_mode.is_pitch_linear) { + ASSERT(regs.zeta.tile_mode.is_3d == 0); + type = ImageType::Linear; + pitch = size.width * BytesPerBlock(format); + } else if (regs.zeta.tile_mode.is_3d) { + ASSERT(regs.zeta.tile_mode.is_pitch_linear == 0); + type = ImageType::e3D; + size.depth = regs.zeta_depth; + } else { + type = ImageType::e2D; + resources.layers = regs.zeta_depth; + } +} + +ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { + UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero"); + format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format); + if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) { + type = ImageType::Linear; + size = Extent3D{ + .width = config.pitch / VideoCore::Surface::BytesPerBlock(format), + .height = config.height, + .depth = 1, + }; + pitch = config.pitch; + } else { + type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D; + block = Extent3D{ + .width = config.block_width, + .height = config.block_height, + .depth = config.block_depth, + }; + // 3D blits with more than once slice are not implemented for now + // Render to individual slices + size = Extent3D{ + .width = config.width, + .height = config.height, + .depth = 1, + }; + } +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h new file mode 100644 index 000000000..5049fc36e --- /dev/null +++ b/src/video_core/texture_cache/image_info.h @@ -0,0 +1,38 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/engines/fermi_2d.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +using Tegra::Texture::TICEntry; +using VideoCore::Surface::PixelFormat; + +struct ImageInfo { + explicit ImageInfo() = default; + explicit ImageInfo(const TICEntry& config) noexcept; + explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept; + explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept; + explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept; + + PixelFormat format = PixelFormat::Invalid; + ImageType type = ImageType::e1D; + SubresourceExtent resources; + Extent3D size{1, 1, 1}; + union { + Extent3D block{0, 0, 0}; + u32 pitch; + }; + u32 layer_stride = 0; + u32 maybe_unaligned_layer_stride = 0; + u32 num_samples = 1; + u32 tile_width_spacing = 0; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp new file mode 100644 index 000000000..076a4bcfd --- /dev/null +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -0,0 +1,41 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/assert.h" +#include "core/settings.h" +#include "video_core/compatible_formats.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, + ImageId image_id_) + : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range}, + size{ + .width = std::max(image_info.size.width >> range.base.level, 1u), + .height = std::max(image_info.size.height >> range.base.level, 1u), + .depth = std::max(image_info.size.depth >> range.base.level, 1u), + } { + ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format), + "Image view format {} is incompatible with image format {}", info.format, + image_info.format); + const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); + if (image_info.type == ImageType::Linear && is_async) { + flags |= ImageViewFlagBits::PreemtiveDownload; + } + if (image_info.type == ImageType::e3D && info.type != ImageViewType::e3D) { + flags |= ImageViewFlagBits::Slice; + } +} + +ImageViewBase::ImageViewBase(const NullImageParams&) {} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h new file mode 100644 index 000000000..73954167e --- /dev/null +++ b/src/video_core/texture_cache/image_view_base.h @@ -0,0 +1,47 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_funcs.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +using VideoCore::Surface::PixelFormat; + +struct ImageViewInfo; +struct ImageInfo; + +struct NullImageParams {}; + +enum class ImageViewFlagBits : u16 { + PreemtiveDownload = 1 << 0, + Strong = 1 << 1, + Slice = 1 << 2, +}; +DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) + +struct ImageViewBase { + explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, + ImageId image_id); + explicit ImageViewBase(const NullImageParams&); + + [[nodiscard]] bool IsBuffer() const noexcept { + return type == ImageViewType::Buffer; + } + + ImageId image_id{}; + PixelFormat format{}; + ImageViewType type{}; + SubresourceRange range; + Extent3D size{0, 0, 0}; + ImageViewFlagBits flags{}; + + u64 invalidation_tick = 0; + u64 modification_tick = 0; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp new file mode 100644 index 000000000..faf5b151f --- /dev/null +++ b/src/video_core/texture_cache/image_view_info.cpp @@ -0,0 +1,88 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/assert.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +namespace { + +constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits::max(); + +[[nodiscard]] u8 CastSwizzle(SwizzleSource source) { + const u8 casted = static_cast(source); + ASSERT(static_cast(casted) == source); + return casted; +} + +} // Anonymous namespace + +ImageViewInfo::ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept + : format{PixelFormatFromTIC(config)}, x_source{CastSwizzle(config.x_source)}, + y_source{CastSwizzle(config.y_source)}, z_source{CastSwizzle(config.z_source)}, + w_source{CastSwizzle(config.w_source)} { + range.base = SubresourceBase{ + .level = static_cast(config.res_min_mip_level), + .layer = base_layer, + }; + range.extent.levels = config.res_max_mip_level - config.res_min_mip_level + 1; + + switch (config.texture_type) { + case TextureType::Texture1D: + ASSERT(config.Height() == 1); + ASSERT(config.Depth() == 1); + type = ImageViewType::e1D; + break; + case TextureType::Texture2D: + case TextureType::Texture2DNoMipmap: + ASSERT(config.Depth() == 1); + type = config.normalized_coords ? ImageViewType::e2D : ImageViewType::Rect; + break; + case TextureType::Texture3D: + type = ImageViewType::e3D; + break; + case TextureType::TextureCubemap: + ASSERT(config.Depth() == 1); + type = ImageViewType::Cube; + range.extent.layers = 6; + break; + case TextureType::Texture1DArray: + type = ImageViewType::e1DArray; + range.extent.layers = config.Depth(); + break; + case TextureType::Texture2DArray: + type = ImageViewType::e2DArray; + range.extent.layers = config.Depth(); + break; + case TextureType::Texture1DBuffer: + type = ImageViewType::Buffer; + break; + case TextureType::TextureCubeArray: + type = ImageViewType::CubeArray; + range.extent.layers = config.Depth() * 6; + break; + default: + UNREACHABLE_MSG("Invalid texture_type={}", static_cast(config.texture_type.Value())); + break; + } +} + +ImageViewInfo::ImageViewInfo(ImageViewType type_, PixelFormat format_, + SubresourceRange range_) noexcept + : type{type_}, format{format_}, range{range_}, x_source{RENDER_TARGET_SWIZZLE}, + y_source{RENDER_TARGET_SWIZZLE}, z_source{RENDER_TARGET_SWIZZLE}, + w_source{RENDER_TARGET_SWIZZLE} {} + +bool ImageViewInfo::IsRenderTarget() const noexcept { + return x_source == RENDER_TARGET_SWIZZLE && y_source == RENDER_TARGET_SWIZZLE && + z_source == RENDER_TARGET_SWIZZLE && w_source == RENDER_TARGET_SWIZZLE; +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_info.h b/src/video_core/texture_cache/image_view_info.h new file mode 100644 index 000000000..0c1f99117 --- /dev/null +++ b/src/video_core/texture_cache/image_view_info.h @@ -0,0 +1,50 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TICEntry; +using VideoCore::Surface::PixelFormat; + +/// Properties used to determine a image view +struct ImageViewInfo { + explicit ImageViewInfo() noexcept = default; + explicit ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept; + explicit ImageViewInfo(ImageViewType type, PixelFormat format, + SubresourceRange range = {}) noexcept; + + auto operator<=>(const ImageViewInfo&) const noexcept = default; + + [[nodiscard]] bool IsRenderTarget() const noexcept; + + [[nodiscard]] std::array Swizzle() const noexcept { + return std::array{ + static_cast(x_source), + static_cast(y_source), + static_cast(z_source), + static_cast(w_source), + }; + } + + ImageViewType type{}; + PixelFormat format{}; + SubresourceRange range; + u8 x_source = static_cast(SwizzleSource::R); + u8 y_source = static_cast(SwizzleSource::G); + u8 z_source = static_cast(SwizzleSource::B); + u8 w_source = static_cast(SwizzleSource::A); +}; +static_assert(std::has_unique_object_representations_v); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h new file mode 100644 index 000000000..9b9544b07 --- /dev/null +++ b/src/video_core/texture_cache/render_targets.h @@ -0,0 +1,51 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/bit_cast.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +/// Framebuffer properties used to lookup a framebuffer +struct RenderTargets { + constexpr auto operator<=>(const RenderTargets&) const noexcept = default; + + constexpr bool Contains(std::span elements) const noexcept { + const auto contains = [elements](ImageViewId item) { + return std::ranges::find(elements, item) != elements.end(); + }; + return std::ranges::any_of(color_buffer_ids, contains) || contains(depth_buffer_id); + } + + std::array color_buffer_ids; + ImageViewId depth_buffer_id; + std::array draw_buffers{}; + Extent2D size; +}; + +} // namespace VideoCommon + +namespace std { + +template <> +struct hash { + size_t operator()(const VideoCommon::RenderTargets& rt) const noexcept { + using VideoCommon::ImageViewId; + size_t value = std::hash{}(rt.depth_buffer_id); + for (const ImageViewId color_buffer_id : rt.color_buffer_ids) { + value ^= std::hash{}(color_buffer_id); + } + value ^= Common::BitCast(rt.draw_buffers); + value ^= Common::BitCast(rt.size); + return value; + } +}; + +} // namespace std diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h new file mode 100644 index 000000000..04539a43c --- /dev/null +++ b/src/video_core/texture_cache/samples_helper.h @@ -0,0 +1,55 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/assert.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +[[nodiscard]] inline std::pair SamplesLog2(int num_samples) { + switch (num_samples) { + case 1: + return {0, 0}; + case 2: + return {1, 0}; + case 4: + return {1, 1}; + case 8: + return {2, 1}; + case 16: + return {2, 2}; + } + UNREACHABLE_MSG("Invalid number of samples={}", num_samples); + return {1, 1}; +} + +[[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) { + using Tegra::Texture::MsaaMode; + switch (msaa_mode) { + case MsaaMode::Msaa1x1: + return 1; + case MsaaMode::Msaa2x1: + case MsaaMode::Msaa2x1_D3D: + return 2; + case MsaaMode::Msaa2x2: + case MsaaMode::Msaa2x2_VC4: + case MsaaMode::Msaa2x2_VC12: + return 4; + case MsaaMode::Msaa4x2: + case MsaaMode::Msaa4x2_D3D: + case MsaaMode::Msaa4x2_VC8: + case MsaaMode::Msaa4x2_VC24: + return 8; + case MsaaMode::Msaa4x4: + return 16; + } + UNREACHABLE_MSG("Invalid MSAA mode={}", static_cast(msaa_mode)); + return 1; +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h new file mode 100644 index 000000000..eae3be6ea --- /dev/null +++ b/src/video_core/texture_cache/slot_vector.h @@ -0,0 +1,156 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" + +namespace VideoCommon { + +struct SlotId { + static constexpr u32 INVALID_INDEX = std::numeric_limits::max(); + + constexpr auto operator<=>(const SlotId&) const noexcept = default; + + constexpr explicit operator bool() const noexcept { + return index != INVALID_INDEX; + } + + u32 index = INVALID_INDEX; +}; + +template +requires std::is_nothrow_move_assignable_v&& + std::is_nothrow_move_constructible_v class SlotVector { +public: + ~SlotVector() noexcept { + size_t index = 0; + for (u64 bits : stored_bitset) { + for (size_t bit = 0; bits; ++bit, bits >>= 1) { + if ((bits & 1) != 0) { + values[index + bit].object.~T(); + } + } + index += 64; + } + delete[] values; + } + + [[nodiscard]] T& operator[](SlotId id) noexcept { + ValidateIndex(id); + return values[id.index].object; + } + + [[nodiscard]] const T& operator[](SlotId id) const noexcept { + ValidateIndex(id); + return values[id.index].object; + } + + template + [[nodiscard]] SlotId insert(Args&&... args) noexcept { + const u32 index = FreeValueIndex(); + new (&values[index].object) T(std::forward(args)...); + SetStorageBit(index); + + return SlotId{index}; + } + + void erase(SlotId id) noexcept { + values[id.index].object.~T(); + free_list.push_back(id.index); + ResetStorageBit(id.index); + } + +private: + struct NonTrivialDummy { + NonTrivialDummy() noexcept {} + }; + + union Entry { + Entry() noexcept : dummy{} {} + ~Entry() noexcept {} + + NonTrivialDummy dummy; + T object; + }; + + void SetStorageBit(u32 index) noexcept { + stored_bitset[index / 64] |= u64(1) << (index % 64); + } + + void ResetStorageBit(u32 index) noexcept { + stored_bitset[index / 64] &= ~(u64(1) << (index % 64)); + } + + bool ReadStorageBit(u32 index) noexcept { + return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0; + } + + void ValidateIndex(SlotId id) const noexcept { + DEBUG_ASSERT(id); + DEBUG_ASSERT(id.index / 64 < stored_bitset.size()); + DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); + } + + [[nodiscard]] u32 FreeValueIndex() noexcept { + if (free_list.empty()) { + Reserve(values_capacity ? (values_capacity << 1) : 1); + } + const u32 free_index = free_list.back(); + free_list.pop_back(); + return free_index; + } + + void Reserve(size_t new_capacity) noexcept { + Entry* const new_values = new Entry[new_capacity]; + size_t index = 0; + for (u64 bits : stored_bitset) { + for (size_t bit = 0; bits; ++bit, bits >>= 1) { + const size_t i = index + bit; + if ((bits & 1) == 0) { + continue; + } + T& old_value = values[i].object; + new (&new_values[i].object) T(std::move(old_value)); + old_value.~T(); + } + index += 64; + } + + stored_bitset.resize((new_capacity + 63) / 64); + + const size_t old_free_size = free_list.size(); + free_list.resize(old_free_size + (new_capacity - values_capacity)); + std::iota(free_list.begin() + old_free_size, free_list.end(), + static_cast(values_capacity)); + + delete[] values; + values = new_values; + values_capacity = new_capacity; + } + + Entry* values = nullptr; + size_t values_capacity = 0; + size_t values_size = 0; + + std::vector stored_bitset; + std::vector free_list; +}; + +} // namespace VideoCommon + +template <> +struct std::hash { + size_t operator()(const VideoCommon::SlotId& id) const noexcept { + return std::hash{}(id.index); + } +}; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp deleted file mode 100644 index efbcf6723..000000000 --- a/src/video_core/texture_cache/surface_base.cpp +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/algorithm.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/microprofile.h" -#include "video_core/memory_manager.h" -#include "video_core/texture_cache/surface_base.h" -#include "video_core/texture_cache/surface_params.h" -#include "video_core/textures/convert.h" - -namespace VideoCommon { - -MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128)); - -using Tegra::Texture::ConvertFromGuestToHost; -using VideoCore::MortonSwizzleMode; -using VideoCore::Surface::IsPixelFormatASTC; -using VideoCore::Surface::PixelFormat; - -StagingCache::StagingCache() = default; - -StagingCache::~StagingCache() = default; - -SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_, - bool is_astc_supported_) - : params{params_}, gpu_addr{gpu_addr_}, mipmap_sizes(params_.num_levels), - mipmap_offsets(params.num_levels) { - is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported_; - host_memory_size = params.GetHostSizeInBytes(is_converted); - - std::size_t offset = 0; - for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; - mipmap_sizes[level] = mipmap_size; - mipmap_offsets[level] = offset; - offset += mipmap_size; - } - layer_size = offset; - if (params.is_layered) { - if (params.is_tiled) { - layer_size = - SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth); - } - guest_memory_size = layer_size * params.depth; - } else { - guest_memory_size = layer_size; - } -} - -MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const { - const u32 src_bpp{params.GetBytesPerPixel()}; - const u32 dst_bpp{rhs.GetBytesPerPixel()}; - const bool ib1 = params.IsBuffer(); - const bool ib2 = rhs.IsBuffer(); - if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { - const bool cb1 = params.IsCompressed(); - const bool cb2 = rhs.IsCompressed(); - if (cb1 == cb2) { - return MatchTopologyResult::FullMatch; - } - return MatchTopologyResult::CompressUnmatch; - } - return MatchTopologyResult::None; -} - -MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const { - // Buffer surface Check - if (params.IsBuffer()) { - const std::size_t wd1 = params.width * params.GetBytesPerPixel(); - const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); - if (wd1 == wd2) { - return MatchStructureResult::FullMatch; - } - return MatchStructureResult::None; - } - - // Linear Surface check - if (!params.is_tiled) { - if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) { - if (params.width == rhs.width) { - return MatchStructureResult::FullMatch; - } else { - return MatchStructureResult::SemiMatch; - } - } - return MatchStructureResult::None; - } - - // Tiled Surface check - if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, - params.tile_width_spacing, params.num_levels) == - std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, - rhs.tile_width_spacing, rhs.num_levels)) { - if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { - return MatchStructureResult::FullMatch; - } - const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format, - rhs.pixel_format); - const u32 hs = - SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); - const u32 w1 = params.GetBlockAlignedWidth(); - if (std::tie(w1, params.height) == std::tie(ws, hs)) { - return MatchStructureResult::SemiMatch; - } - } - return MatchStructureResult::None; -} - -std::optional> SurfaceBaseImpl::GetLayerMipmap( - const GPUVAddr candidate_gpu_addr) const { - if (gpu_addr == candidate_gpu_addr) { - return {{0, 0}}; - } - - if (candidate_gpu_addr < gpu_addr) { - return std::nullopt; - } - - const auto relative_address{static_cast(candidate_gpu_addr - gpu_addr)}; - const auto layer{static_cast(relative_address / layer_size)}; - if (layer >= params.depth) { - return std::nullopt; - } - - const GPUVAddr mipmap_address = relative_address - layer_size * layer; - const auto mipmap_it = - Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); - if (mipmap_it == mipmap_offsets.end()) { - return std::nullopt; - } - - const auto level{static_cast(std::distance(mipmap_offsets.begin(), mipmap_it))}; - return std::make_pair(layer, level); -} - -std::vector SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const { - const u32 layers{params.depth}; - const u32 mipmaps{params.num_levels}; - std::vector result; - result.reserve(static_cast(layers) * static_cast(mipmaps)); - - for (u32 layer = 0; layer < layers; layer++) { - for (u32 level = 0; level < mipmaps; level++) { - const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); - const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); - result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1); - } - } - return result; -} - -std::vector SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const { - const u32 mipmaps{params.num_levels}; - std::vector result; - result.reserve(mipmaps); - - for (u32 level = 0; level < mipmaps; level++) { - const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); - const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); - const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; - result.emplace_back(width, height, depth, level); - } - return result; -} - -void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, - const SurfaceParams& surface_params, u8* buffer, u32 level) { - const u32 width{surface_params.GetMipWidth(level)}; - const u32 height{surface_params.GetMipHeight(level)}; - const u32 block_height{surface_params.GetMipBlockHeight(level)}; - const u32 block_depth{surface_params.GetMipBlockDepth(level)}; - - std::size_t guest_offset{mipmap_offsets[level]}; - if (surface_params.is_layered) { - std::size_t host_offset = 0; - const std::size_t guest_stride = layer_size; - const std::size_t host_stride = surface_params.GetHostLayerSize(level); - for (u32 layer = 0; layer < surface_params.depth; ++layer) { - MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height, - block_depth, 1, surface_params.tile_width_spacing, buffer + host_offset, - memory + guest_offset); - guest_offset += guest_stride; - host_offset += host_stride; - } - } else { - MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height, block_depth, - surface_params.GetMipDepth(level), surface_params.tile_width_spacing, buffer, - memory + guest_offset); - } -} - -void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, - StagingCache& staging_cache) { - MICROPROFILE_SCOPE(GPU_Load_Texture); - auto& staging_buffer = staging_cache.GetBuffer(0); - u8* host_ptr; - // Use an extra temporal buffer - auto& tmp_buffer = staging_cache.GetBuffer(1); - tmp_buffer.resize(guest_memory_size); - host_ptr = tmp_buffer.data(); - memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); - - if (params.is_tiled) { - ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", - params.block_width, static_cast(params.target)); - for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; - SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, - staging_buffer.data() + host_offset, level); - } - } else { - ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented"); - const u32 bpp{params.GetBytesPerPixel()}; - const u32 block_width{params.GetDefaultBlockWidth()}; - const u32 block_height{params.GetDefaultBlockHeight()}; - const u32 width{(params.width + block_width - 1) / block_width}; - const u32 height{(params.height + block_height - 1) / block_height}; - const u32 copy_size{width * bpp}; - if (params.pitch == copy_size) { - std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false)); - } else { - const u8* start{host_ptr}; - u8* write_to{staging_buffer.data()}; - for (u32 h = height; h > 0; --h) { - std::memcpy(write_to, start, copy_size); - start += params.pitch; - write_to += copy_size; - } - } - } - - if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) { - return; - } - - for (u32 level = params.num_levels; level--;) { - const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)}; - const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)}; - u8* const in_buffer = staging_buffer.data() + in_host_offset; - u8* const out_buffer = staging_buffer.data() + out_host_offset; - ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, - params.GetMipWidth(level), params.GetMipHeight(level), - params.GetMipDepth(level), true, true); - } -} - -void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, - StagingCache& staging_cache) { - MICROPROFILE_SCOPE(GPU_Flush_Texture); - auto& staging_buffer = staging_cache.GetBuffer(0); - u8* host_ptr; - - // Use an extra temporal buffer - auto& tmp_buffer = staging_cache.GetBuffer(1); - tmp_buffer.resize(guest_memory_size); - host_ptr = tmp_buffer.data(); - - if (params.target == SurfaceTarget::Texture3D) { - // Special case for 3D texture segments - memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); - } - - if (params.is_tiled) { - ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); - for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; - SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, - staging_buffer.data() + host_offset, level); - } - } else if (params.IsBuffer()) { - // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest - // memory. - std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); - } else { - ASSERT(params.target == SurfaceTarget::Texture2D); - ASSERT(params.num_levels == 1); - - const u32 bpp{params.GetBytesPerPixel()}; - const u32 copy_size{params.width * bpp}; - if (params.pitch == copy_size) { - std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); - } else { - u8* start{host_ptr}; - const u8* read_to{staging_buffer.data()}; - for (u32 h = params.height; h > 0; --h) { - std::memcpy(start, read_to, copy_size); - start += params.pitch; - read_to += copy_size; - } - } - } - memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); -} - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h deleted file mode 100644 index b57135fe4..000000000 --- a/src/video_core/texture_cache/surface_base.h +++ /dev/null @@ -1,333 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/gpu.h" -#include "video_core/morton.h" -#include "video_core/texture_cache/copy_params.h" -#include "video_core/texture_cache/surface_params.h" -#include "video_core/texture_cache/surface_view.h" - -namespace Tegra { -class MemoryManager; -} - -namespace VideoCommon { - -using VideoCore::MortonSwizzleMode; -using VideoCore::Surface::SurfaceTarget; - -enum class MatchStructureResult : u32 { - FullMatch = 0, - SemiMatch = 1, - None = 2, -}; - -enum class MatchTopologyResult : u32 { - FullMatch = 0, - CompressUnmatch = 1, - None = 2, -}; - -class StagingCache { -public: - explicit StagingCache(); - ~StagingCache(); - - std::vector& GetBuffer(std::size_t index) { - return staging_buffer[index]; - } - - const std::vector& GetBuffer(std::size_t index) const { - return staging_buffer[index]; - } - - void SetSize(std::size_t size) { - staging_buffer.resize(size); - } - -private: - std::vector> staging_buffer; -}; - -class SurfaceBaseImpl { -public: - void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); - - void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); - - GPUVAddr GetGpuAddr() const { - return gpu_addr; - } - - bool Overlaps(const VAddr start, const VAddr end) const { - return (cpu_addr < end) && (cpu_addr_end > start); - } - - bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const { - const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; - return gpu_addr <= other_start && other_end <= gpu_addr_end; - } - - // Use only when recycling a surface - void SetGpuAddr(const GPUVAddr new_addr) { - gpu_addr = new_addr; - } - - VAddr GetCpuAddr() const { - return cpu_addr; - } - - VAddr GetCpuAddrEnd() const { - return cpu_addr_end; - } - - void SetCpuAddr(const VAddr new_addr) { - cpu_addr = new_addr; - cpu_addr_end = new_addr + guest_memory_size; - } - - const SurfaceParams& GetSurfaceParams() const { - return params; - } - - std::size_t GetSizeInBytes() const { - return guest_memory_size; - } - - std::size_t GetHostSizeInBytes() const { - return host_memory_size; - } - - std::size_t GetMipmapSize(const u32 level) const { - return mipmap_sizes[level]; - } - - bool IsLinear() const { - return !params.is_tiled; - } - - bool IsConverted() const { - return is_converted; - } - - bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { - return params.pixel_format == pixel_format; - } - - VideoCore::Surface::PixelFormat GetFormat() const { - return params.pixel_format; - } - - bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const { - return params.target == target; - } - - MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const; - - MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const; - - bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const { - return std::tie(gpu_addr, params.target, params.num_levels) == - std::tie(other_gpu_addr, rhs.target, rhs.num_levels) && - params.target == SurfaceTarget::Texture2D && params.num_levels == 1; - } - - std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const; - - std::vector BreakDown(const SurfaceParams& in_params) const { - return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params); - } - -protected: - explicit SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_, - bool is_astc_supported_); - ~SurfaceBaseImpl() = default; - - virtual void DecorateSurfaceName() = 0; - - const SurfaceParams params; - std::size_t layer_size; - std::size_t guest_memory_size; - std::size_t host_memory_size; - GPUVAddr gpu_addr{}; - VAddr cpu_addr{}; - VAddr cpu_addr_end{}; - bool is_converted{}; - - std::vector mipmap_sizes; - std::vector mipmap_offsets; - -private: - void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& surface_params, - u8* buffer, u32 level); - - std::vector BreakDownLayered(const SurfaceParams& in_params) const; - - std::vector BreakDownNonLayered(const SurfaceParams& in_params) const; -}; - -template -class SurfaceBase : public SurfaceBaseImpl { -public: - virtual void UploadTexture(const std::vector& staging_buffer) = 0; - - virtual void DownloadTexture(std::vector& staging_buffer) = 0; - - void MarkAsModified(bool is_modified_, u64 tick) { - is_modified = is_modified_ || is_target; - modification_tick = tick; - } - - void MarkAsRenderTarget(bool is_target_, u32 index_) { - is_target = is_target_; - index = index_; - } - - void SetMemoryMarked(bool is_memory_marked_) { - is_memory_marked = is_memory_marked_; - } - - bool IsMemoryMarked() const { - return is_memory_marked; - } - - void SetSyncPending(bool is_sync_pending_) { - is_sync_pending = is_sync_pending_; - } - - bool IsSyncPending() const { - return is_sync_pending; - } - - void MarkAsPicked(bool is_picked_) { - is_picked = is_picked_; - } - - bool IsModified() const { - return is_modified; - } - - bool IsProtected() const { - // Only 3D slices are to be protected - return is_target && params.target == SurfaceTarget::Texture3D; - } - - bool IsRenderTarget() const { - return is_target; - } - - u32 GetRenderTarget() const { - return index; - } - - bool IsRegistered() const { - return is_registered; - } - - bool IsPicked() const { - return is_picked; - } - - void MarkAsRegistered(bool is_reg) { - is_registered = is_reg; - } - - u64 GetModificationTick() const { - return modification_tick; - } - - TView EmplaceOverview(const SurfaceParams& overview_params) { - const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth}; - return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); - } - - TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) { - return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth, - base_level, num_levels)); - } - - std::optional EmplaceIrregularView(const SurfaceParams& view_params, - const GPUVAddr view_addr, - const std::size_t candidate_size, const u32 mipmap, - const u32 layer) { - const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)}; - if (!layer_mipmap) { - return {}; - } - const auto [end_layer, end_mipmap] = *layer_mipmap; - if (layer != end_layer) { - if (mipmap == 0 && end_mipmap == 0) { - return GetView(ViewParams(view_params.target, layer, end_layer - layer, 0, 1)); - } - return {}; - } else { - return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap)); - } - } - - std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, - const std::size_t candidate_size) { - if (params.target == SurfaceTarget::Texture3D || - view_params.target == SurfaceTarget::Texture3D || - (params.num_levels == 1 && !params.is_layered)) { - return {}; - } - const auto layer_mipmap{GetLayerMipmap(view_addr)}; - if (!layer_mipmap) { - return {}; - } - const auto [layer, mipmap] = *layer_mipmap; - if (GetMipmapSize(mipmap) != candidate_size) { - return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer); - } - return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1)); - } - - TView GetMainView() const { - return main_view; - } - -protected: - explicit SurfaceBase(const GPUVAddr gpu_addr_, const SurfaceParams& params_, - bool is_astc_supported_) - : SurfaceBaseImpl{gpu_addr_, params_, is_astc_supported_} {} - - ~SurfaceBase() = default; - - virtual TView CreateView(const ViewParams& view_key) = 0; - - TView main_view; - std::unordered_map views; - -private: - TView GetView(const ViewParams& key) { - const auto [entry, is_cache_miss] = views.try_emplace(key); - auto& view{entry->second}; - if (is_cache_miss) { - view = CreateView(key); - } - return view; - } - - static constexpr u32 NO_RT = 0xFFFFFFFF; - - bool is_modified{}; - bool is_target{}; - bool is_registered{}; - bool is_picked{}; - bool is_memory_marked{}; - bool is_sync_pending{}; - u32 index{NO_RT}; - u64 modification_tick{}; -}; - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp deleted file mode 100644 index 96f93246d..000000000 --- a/src/video_core/texture_cache/surface_params.cpp +++ /dev/null @@ -1,445 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/alignment.h" -#include "common/bit_util.h" -#include "core/core.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/surface.h" -#include "video_core/texture_cache/format_lookup_table.h" -#include "video_core/texture_cache/surface_params.h" - -namespace VideoCommon { - -using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::PixelFormatFromDepthFormat; -using VideoCore::Surface::PixelFormatFromRenderTargetFormat; -using VideoCore::Surface::SurfaceTarget; -using VideoCore::Surface::SurfaceTargetFromTextureType; -using VideoCore::Surface::SurfaceType; - -namespace { - -SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { - switch (type) { - case Tegra::Shader::TextureType::Texture1D: - return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D; - case Tegra::Shader::TextureType::Texture2D: - return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; - case Tegra::Shader::TextureType::Texture3D: - ASSERT(!is_array); - return SurfaceTarget::Texture3D; - case Tegra::Shader::TextureType::TextureCube: - return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap; - default: - UNREACHABLE(); - return SurfaceTarget::Texture2D; - } -} - -SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) { - switch (type) { - case Tegra::Shader::ImageType::Texture1D: - return SurfaceTarget::Texture1D; - case Tegra::Shader::ImageType::TextureBuffer: - return SurfaceTarget::TextureBuffer; - case Tegra::Shader::ImageType::Texture1DArray: - return SurfaceTarget::Texture1DArray; - case Tegra::Shader::ImageType::Texture2D: - return SurfaceTarget::Texture2D; - case Tegra::Shader::ImageType::Texture2DArray: - return SurfaceTarget::Texture2DArray; - case Tegra::Shader::ImageType::Texture3D: - return SurfaceTarget::Texture3D; - default: - UNREACHABLE(); - return SurfaceTarget::Texture2D; - } -} - -constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { - return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); -} - -} // Anonymous namespace - -SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table, - const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Sampler& entry) { - SurfaceParams params; - params.is_tiled = tic.IsTiled(); - params.srgb_conversion = tic.IsSrgbConversionEnabled(); - params.block_width = params.is_tiled ? tic.BlockWidth() : 0; - params.block_height = params.is_tiled ? tic.BlockHeight() : 0; - params.block_depth = params.is_tiled ? tic.BlockDepth() : 0; - params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; - params.pixel_format = lookup_table.GetPixelFormat( - tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); - params.type = GetFormatType(params.pixel_format); - if (entry.is_shadow && params.type == SurfaceType::ColorTexture) { - switch (params.pixel_format) { - case PixelFormat::R16_UNORM: - case PixelFormat::R16_FLOAT: - params.pixel_format = PixelFormat::D16_UNORM; - break; - case PixelFormat::R32_FLOAT: - params.pixel_format = PixelFormat::D32_FLOAT; - break; - default: - UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", - static_cast(params.pixel_format)); - } - params.type = GetFormatType(params.pixel_format); - } - // TODO: on 1DBuffer we should use the tic info. - if (tic.IsBuffer()) { - params.target = SurfaceTarget::TextureBuffer; - params.width = tic.Width(); - params.pitch = params.width * params.GetBytesPerPixel(); - params.height = 1; - params.depth = 1; - params.num_levels = 1; - params.emulated_levels = 1; - params.is_layered = false; - } else { - params.target = TextureTypeToSurfaceTarget(entry.type, entry.is_array); - params.width = tic.Width(); - params.height = tic.Height(); - params.depth = tic.Depth(); - params.pitch = params.is_tiled ? 0 : tic.Pitch(); - if (params.target == SurfaceTarget::TextureCubemap || - params.target == SurfaceTarget::TextureCubeArray) { - params.depth *= 6; - } - params.num_levels = tic.max_mip_level + 1; - params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); - params.is_layered = params.IsLayered(); - } - return params; -} - -SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table, - const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Image& entry) { - SurfaceParams params; - params.is_tiled = tic.IsTiled(); - params.srgb_conversion = tic.IsSrgbConversionEnabled(); - params.block_width = params.is_tiled ? tic.BlockWidth() : 0; - params.block_height = params.is_tiled ? tic.BlockHeight() : 0; - params.block_depth = params.is_tiled ? tic.BlockDepth() : 0; - params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; - params.pixel_format = lookup_table.GetPixelFormat( - tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); - params.type = GetFormatType(params.pixel_format); - params.target = ImageTypeToSurfaceTarget(entry.type); - // TODO: on 1DBuffer we should use the tic info. - if (tic.IsBuffer()) { - params.target = SurfaceTarget::TextureBuffer; - params.width = tic.Width(); - params.pitch = params.width * params.GetBytesPerPixel(); - params.height = 1; - params.depth = 1; - params.num_levels = 1; - params.emulated_levels = 1; - params.is_layered = false; - } else { - params.width = tic.Width(); - params.height = tic.Height(); - params.depth = tic.Depth(); - params.pitch = params.is_tiled ? 0 : tic.Pitch(); - if (params.target == SurfaceTarget::TextureCubemap || - params.target == SurfaceTarget::TextureCubeArray) { - params.depth *= 6; - } - params.num_levels = tic.max_mip_level + 1; - params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); - params.is_layered = params.IsLayered(); - } - return params; -} - -SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) { - const auto& regs = maxwell3d.regs; - const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U); - const bool is_layered = regs.zeta_layers > 1 && block_depth == 0; - const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); - return { - .is_tiled = regs.zeta.memory_layout.type == - Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear, - .srgb_conversion = false, - .is_layered = is_layered, - .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U), - .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U), - .block_depth = block_depth, - .tile_width_spacing = 1, - .width = regs.zeta_width, - .height = regs.zeta_height, - .depth = is_layered ? regs.zeta_layers.Value() : 1U, - .pitch = 0, - .num_levels = 1, - .emulated_levels = 1, - .pixel_format = pixel_format, - .type = GetFormatType(pixel_format), - .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D, - }; -} - -SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, - std::size_t index) { - const auto& config{maxwell3d.regs.rt[index]}; - SurfaceParams params; - params.is_tiled = - config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; - params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || - config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB; - params.block_width = config.memory_layout.block_width; - params.block_height = config.memory_layout.block_height; - params.block_depth = config.memory_layout.block_depth; - params.tile_width_spacing = 1; - params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); - params.type = GetFormatType(params.pixel_format); - if (params.is_tiled) { - params.pitch = 0; - params.width = config.width; - } else { - const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; - params.pitch = config.width; - params.width = params.pitch / bpp; - } - params.height = config.height; - params.num_levels = 1; - params.emulated_levels = 1; - - if (config.memory_layout.is_3d != 0) { - params.depth = config.layers.Value(); - params.is_layered = false; - params.target = SurfaceTarget::Texture3D; - } else if (config.layers > 1) { - params.depth = config.layers.Value(); - params.is_layered = true; - params.target = SurfaceTarget::Texture2DArray; - } else { - params.depth = 1; - params.is_layered = false; - params.target = SurfaceTarget::Texture2D; - } - return params; -} - -SurfaceParams SurfaceParams::CreateForFermiCopySurface( - const Tegra::Engines::Fermi2D::Regs::Surface& config) { - const bool is_tiled = !config.linear; - const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format); - - SurfaceParams params{ - .is_tiled = is_tiled, - .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || - config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB, - .is_layered = false, - .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U, - .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U, - .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U, - .tile_width_spacing = 1, - .width = config.width, - .height = config.height, - .depth = 1, - .pitch = config.pitch, - .num_levels = 1, - .emulated_levels = 1, - .pixel_format = pixel_format, - .type = GetFormatType(pixel_format), - // TODO(Rodrigo): Try to guess texture arrays from parameters - .target = SurfaceTarget::Texture2D, - }; - - params.is_layered = params.IsLayered(); - return params; -} - -VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget( - const VideoCommon::Shader::Sampler& entry) { - return TextureTypeToSurfaceTarget(entry.type, entry.is_array); -} - -VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget( - const VideoCommon::Shader::Image& entry) { - return ImageTypeToSurfaceTarget(entry.type); -} - -bool SurfaceParams::IsLayered() const { - switch (target) { - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: - return true; - default: - return false; - } -} - -// Auto block resizing algorithm from: -// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c -u32 SurfaceParams::GetMipBlockHeight(u32 level) const { - if (level == 0) { - return this->block_height; - } - - const u32 height_new{GetMipHeight(level)}; - const u32 default_block_height{GetDefaultBlockHeight()}; - const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height}; - const u32 block_height_new = Common::Log2Ceil32(blocks_in_y); - return std::clamp(block_height_new, 3U, 7U) - 3U; -} - -u32 SurfaceParams::GetMipBlockDepth(u32 level) const { - if (level == 0) { - return this->block_depth; - } - if (is_layered) { - return 0; - } - - const u32 depth_new{GetMipDepth(level)}; - const u32 block_depth_new = Common::Log2Ceil32(depth_new); - if (block_depth_new > 4) { - return 5 - (GetMipBlockHeight(level) >= 2); - } - return block_depth_new; -} - -std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { - std::size_t offset = 0; - for (u32 i = 0; i < level; i++) { - offset += GetInnerMipmapMemorySize(i, false, false); - } - return offset; -} - -std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const { - std::size_t offset = 0; - if (is_converted) { - for (u32 i = 0; i < level; ++i) { - offset += GetConvertedMipmapSize(i) * GetNumLayers(); - } - } else { - for (u32 i = 0; i < level; ++i) { - offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); - } - } - return offset; -} - -std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { - constexpr std::size_t rgba8_bpp = 4ULL; - const std::size_t mip_width = GetMipWidth(level); - const std::size_t mip_height = GetMipHeight(level); - const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level); - return mip_width * mip_height * mip_depth * rgba8_bpp; -} - -std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { - std::size_t size = 0; - for (u32 level = 0; level < num_levels; ++level) { - size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); - } - if (is_tiled && is_layered) { - return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); - } - return size; -} - -std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, - bool uncompressed) const { - const u32 mip_width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; - const u32 mip_height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; - const u32 mip_depth{is_layered ? 1U : GetMipDepth(level)}; - if (is_tiled) { - return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), mip_width, - mip_height, mip_depth, GetMipBlockHeight(level), - GetMipBlockDepth(level)); - } else if (as_host_size || IsBuffer()) { - return GetBytesPerPixel() * mip_width * mip_height * mip_depth; - } else { - // Linear Texture Case - return pitch * mip_height * mip_depth; - } -} - -bool SurfaceParams::operator==(const SurfaceParams& rhs) const { - return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, - height, depth, pitch, num_levels, pixel_format, type, target) == - std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, - rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, - rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target); -} - -std::string SurfaceParams::TargetName() const { - switch (target) { - case SurfaceTarget::Texture1D: - return "1D"; - case SurfaceTarget::TextureBuffer: - return "TexBuffer"; - case SurfaceTarget::Texture2D: - return "2D"; - case SurfaceTarget::Texture3D: - return "3D"; - case SurfaceTarget::Texture1DArray: - return "1DArray"; - case SurfaceTarget::Texture2DArray: - return "2DArray"; - case SurfaceTarget::TextureCubemap: - return "Cube"; - case SurfaceTarget::TextureCubeArray: - return "CubeArray"; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target); - UNREACHABLE(); - return fmt::format("TUK({})", target); - } -} - -u32 SurfaceParams::GetBlockSize() const { - const u32 x = 64U << block_width; - const u32 y = 8U << block_height; - const u32 z = 1U << block_depth; - return x * y * z; -} - -std::pair SurfaceParams::GetBlockXY() const { - const u32 x_pixels = 64U / GetBytesPerPixel(); - const u32 x = x_pixels << block_width; - const u32 y = 8U << block_height; - return {x, y}; -} - -std::tuple SurfaceParams::GetBlockOffsetXYZ(u32 offset) const { - const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; - const u32 block_size = GetBlockSize(); - const u32 block_index = offset / block_size; - const u32 gob_offset = offset % block_size; - const u32 gob_index = gob_offset / static_cast(Tegra::Texture::GOB_SIZE); - const u32 x_gob_pixels = 64U / GetBytesPerPixel(); - const u32 x_block_pixels = x_gob_pixels << block_width; - const u32 y_block_pixels = 8U << block_height; - const u32 z_block_pixels = 1U << block_depth; - const u32 x_blocks = div_ceil(width, x_block_pixels); - const u32 y_blocks = div_ceil(height, y_block_pixels); - const u32 z_blocks = div_ceil(depth, z_block_pixels); - const u32 base_x = block_index % x_blocks; - const u32 base_y = (block_index / x_blocks) % y_blocks; - const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks; - u32 x = base_x * x_block_pixels; - u32 y = base_y * y_block_pixels; - u32 z = base_z * z_block_pixels; - z += gob_index >> block_height; - y += (gob_index * 8U) % y_block_pixels; - return {x, y, z}; -} - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h deleted file mode 100644 index 4466c3c34..000000000 --- a/src/video_core/texture_cache/surface_params.h +++ /dev/null @@ -1,294 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "common/alignment.h" -#include "common/bit_util.h" -#include "common/cityhash.h" -#include "common/common_types.h" -#include "video_core/engines/fermi_2d.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/surface.h" -#include "video_core/textures/decoders.h" - -namespace VideoCommon { - -class FormatLookupTable; - -class SurfaceParams { -public: - /// Creates SurfaceCachedParams from a texture configuration. - static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table, - const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Sampler& entry); - - /// Creates SurfaceCachedParams from an image configuration. - static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table, - const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Image& entry); - - /// Creates SurfaceCachedParams for a depth buffer configuration. - static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d); - - /// Creates SurfaceCachedParams from a framebuffer configuration. - static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, - std::size_t index); - - /// Creates SurfaceCachedParams from a Fermi2D surface configuration. - static SurfaceParams CreateForFermiCopySurface( - const Tegra::Engines::Fermi2D::Regs::Surface& config); - - /// Obtains the texture target from a shader's sampler entry. - static VideoCore::Surface::SurfaceTarget ExpectedTarget( - const VideoCommon::Shader::Sampler& entry); - - /// Obtains the texture target from a shader's sampler entry. - static VideoCore::Surface::SurfaceTarget ExpectedTarget( - const VideoCommon::Shader::Image& entry); - - std::size_t Hash() const { - return static_cast( - Common::CityHash64(reinterpret_cast(this), sizeof(*this))); - } - - bool operator==(const SurfaceParams& rhs) const; - - bool operator!=(const SurfaceParams& rhs) const { - return !operator==(rhs); - } - - std::size_t GetGuestSizeInBytes() const { - return GetInnerMemorySize(false, false, false); - } - - std::size_t GetHostSizeInBytes(bool is_converted) const { - if (!is_converted) { - return GetInnerMemorySize(true, false, false); - } - // ASTC is uncompressed in software, in emulated as RGBA8 - std::size_t host_size_in_bytes = 0; - for (u32 level = 0; level < num_levels; ++level) { - host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers(); - } - return host_size_in_bytes; - } - - u32 GetBlockAlignedWidth() const { - return Common::AlignUp(width, 64 / GetBytesPerPixel()); - } - - /// Returns the width of a given mipmap level. - u32 GetMipWidth(u32 level) const { - return std::max(1U, width >> level); - } - - /// Returns the height of a given mipmap level. - u32 GetMipHeight(u32 level) const { - return std::max(1U, height >> level); - } - - /// Returns the depth of a given mipmap level. - u32 GetMipDepth(u32 level) const { - return is_layered ? depth : std::max(1U, depth >> level); - } - - /// Returns the block height of a given mipmap level. - u32 GetMipBlockHeight(u32 level) const; - - /// Returns the block depth of a given mipmap level. - u32 GetMipBlockDepth(u32 level) const; - - /// Returns the best possible row/pitch alignment for the surface. - u32 GetRowAlignment(u32 level, bool is_converted) const { - const u32 bpp = is_converted ? 4 : GetBytesPerPixel(); - return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); - } - - /// Returns the offset in bytes in guest memory of a given mipmap level. - std::size_t GetGuestMipmapLevelOffset(u32 level) const; - - /// Returns the offset in bytes in host memory (linear) of a given mipmap level. - std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const; - - /// Returns the size in bytes in guest memory of a given mipmap level. - std::size_t GetGuestMipmapSize(u32 level) const { - return GetInnerMipmapMemorySize(level, false, false); - } - - /// Returns the size in bytes in host memory (linear) of a given mipmap level. - std::size_t GetHostMipmapSize(u32 level) const { - return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); - } - - std::size_t GetConvertedMipmapSize(u32 level) const; - - /// Get this texture Tegra Block size in guest memory layout - u32 GetBlockSize() const; - - /// Get X, Y coordinates max sizes of a single block. - std::pair GetBlockXY() const; - - /// Get the offset in x, y, z coordinates from a memory offset - std::tuple GetBlockOffsetXYZ(u32 offset) const; - - /// Returns the size of a layer in bytes in guest memory. - std::size_t GetGuestLayerSize() const { - return GetLayerSize(false, false); - } - - /// Returns the size of a layer in bytes in host memory for a given mipmap level. - std::size_t GetHostLayerSize(u32 level) const { - ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D); - return GetInnerMipmapMemorySize(level, true, false); - } - - /// Returns the max possible mipmap that the texture can have in host gpu - u32 MaxPossibleMipmap() const { - const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; - const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; - const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h); - if (target != VideoCore::Surface::SurfaceTarget::Texture3D) - return max_mipmap; - return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); - } - - /// Returns if the guest surface is a compressed surface. - bool IsCompressed() const { - return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; - } - - /// Returns the default block width. - u32 GetDefaultBlockWidth() const { - return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); - } - - /// Returns the default block height. - u32 GetDefaultBlockHeight() const { - return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); - } - - /// Returns the bits per pixel. - u32 GetBitsPerPixel() const { - return VideoCore::Surface::GetFormatBpp(pixel_format); - } - - /// Returns the bytes per pixel. - u32 GetBytesPerPixel() const { - return VideoCore::Surface::GetBytesPerPixel(pixel_format); - } - - /// Returns true if the pixel format is a depth and/or stencil format. - bool IsPixelFormatZeta() const { - return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && - pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; - } - - /// Returns is the surface is a TextureBuffer type of surface. - bool IsBuffer() const { - return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; - } - - /// Returns the number of layers in the surface. - std::size_t GetNumLayers() const { - return is_layered ? depth : 1; - } - - /// Returns the debug name of the texture for use in graphic debuggers. - std::string TargetName() const; - - // Helper used for out of class size calculations - static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, - const u32 block_depth) { - return Common::AlignBits(out_size, - Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); - } - - /// Converts a width from a type of surface into another. This helps represent the - /// equivalent value between compressed/non-compressed textures. - static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, - VideoCore::Surface::PixelFormat pixel_format_to) { - const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); - const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); - return (width * bw2 + bw1 - 1) / bw1; - } - - /// Converts a height from a type of surface into another. This helps represent the - /// equivalent value between compressed/non-compressed textures. - static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, - VideoCore::Surface::PixelFormat pixel_format_to) { - const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); - const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); - return (height * bh2 + bh1 - 1) / bh1; - } - - // Finds the maximun possible width between 2 2D layers of different formats - static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, - const u32 src_level, const u32 dst_level) { - const u32 bw1 = src_params.GetDefaultBlockWidth(); - const u32 bw2 = dst_params.GetDefaultBlockWidth(); - const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; - const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; - return std::min(t_src_width, t_dst_width); - } - - // Finds the maximun possible height between 2 2D layers of different formats - static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, - const u32 src_level, const u32 dst_level) { - const u32 bh1 = src_params.GetDefaultBlockHeight(); - const u32 bh2 = dst_params.GetDefaultBlockHeight(); - const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; - const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; - return std::min(t_src_height, t_dst_height); - } - - bool is_tiled; - bool srgb_conversion; - bool is_layered; - u32 block_width; - u32 block_height; - u32 block_depth; - u32 tile_width_spacing; - u32 width; - u32 height; - u32 depth; - u32 pitch; - u32 num_levels; - u32 emulated_levels; - VideoCore::Surface::PixelFormat pixel_format; - VideoCore::Surface::SurfaceType type; - VideoCore::Surface::SurfaceTarget target; - -private: - /// Returns the size of a given mipmap level inside a layer. - std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; - - /// Returns the size of all mipmap levels and aligns as needed. - std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { - return GetLayerSize(as_host_size, uncompressed) * - (layer_only ? 1U : (is_layered ? depth : 1U)); - } - - /// Returns the size of a layer - std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; - - /// Returns true if these parameters are from a layered surface. - bool IsLayered() const; -}; - -} // namespace VideoCommon - -namespace std { - -template <> -struct hash { - std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp deleted file mode 100644 index 6b5f5984b..000000000 --- a/src/video_core/texture_cache/surface_view.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/common_types.h" -#include "video_core/texture_cache/surface_view.h" - -namespace VideoCommon { - -std::size_t ViewParams::Hash() const { - return static_cast(base_layer) ^ (static_cast(num_layers) << 16) ^ - (static_cast(base_level) << 24) ^ - (static_cast(num_levels) << 32) ^ (static_cast(target) << 36); -} - -bool ViewParams::operator==(const ViewParams& rhs) const { - return std::tie(base_layer, num_layers, base_level, num_levels, target) == - std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); -} - -bool ViewParams::operator!=(const ViewParams& rhs) const { - return !operator==(rhs); -} - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h deleted file mode 100644 index 199f72732..000000000 --- a/src/video_core/texture_cache/surface_view.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "common/common_types.h" -#include "video_core/surface.h" -#include "video_core/texture_cache/surface_params.h" - -namespace VideoCommon { - -struct ViewParams { - constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target_, u32 base_layer_, - u32 num_layers_, u32 base_level_, u32 num_levels_) - : target{target_}, base_layer{base_layer_}, num_layers{num_layers_}, - base_level{base_level_}, num_levels{num_levels_} {} - - std::size_t Hash() const; - - bool operator==(const ViewParams& rhs) const; - bool operator!=(const ViewParams& rhs) const; - - bool IsLayered() const { - switch (target) { - case VideoCore::Surface::SurfaceTarget::Texture1DArray: - case VideoCore::Surface::SurfaceTarget::Texture2DArray: - case VideoCore::Surface::SurfaceTarget::TextureCubemap: - case VideoCore::Surface::SurfaceTarget::TextureCubeArray: - return true; - default: - return false; - } - } - - VideoCore::Surface::SurfaceTarget target{}; - u32 base_layer{}; - u32 num_layers{}; - u32 base_level{}; - u32 num_levels{}; -}; - -class ViewBase { -public: - constexpr explicit ViewBase(const ViewParams& view_params) : params{view_params} {} - - constexpr const ViewParams& GetViewParams() const { - return params; - } - -protected: - ViewParams params; -}; - -} // namespace VideoCommon - -namespace std { - -template <> -struct hash { - std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 581d8dd5b..968059842 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -6,1298 +6,1449 @@ #include #include -#include +#include #include #include -#include -#include +#include +#include +#include #include +#include #include #include -#include -#include -#include "common/assert.h" +#include "common/alignment.h" +#include "common/common_funcs.h" #include "common/common_types.h" -#include "common/math_util.h" -#include "core/core.h" -#include "core/memory.h" -#include "core/settings.h" +#include "common/logging/log.h" #include "video_core/compatible_formats.h" +#include "video_core/delayed_destruction_ring.h" #include "video_core/dirty_flags.h" #include "video_core/engines/fermi_2d.h" +#include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/surface.h" -#include "video_core/texture_cache/copy_params.h" +#include "video_core/texture_cache/descriptor_table.h" #include "video_core/texture_cache/format_lookup_table.h" -#include "video_core/texture_cache/surface_base.h" -#include "video_core/texture_cache/surface_params.h" -#include "video_core/texture_cache/surface_view.h" - -namespace Tegra::Texture { -struct FullTextureInfo; -} - -namespace VideoCore { -class RasterizerInterface; -} +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/render_targets.h" +#include "video_core/texture_cache/samples_helper.h" +#include "video_core/texture_cache/slot_vector.h" +#include "video_core/texture_cache/types.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/texture.h" namespace VideoCommon { -using VideoCore::Surface::FormatCompatibility; +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; +using VideoCore::Surface::GetFormatType; +using VideoCore::Surface::IsCopyCompatible; using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceTarget; -using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::SurfaceType; -template +template class TextureCache { - using VectorSurface = boost::container::small_vector; + /// Address shift for caching images into a hash table + static constexpr u64 PAGE_SHIFT = 20; + + /// Enables debugging features to the texture cache + static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; + /// Implement blits as copies between framebuffers + static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; + /// True when some copies have to be emulated + static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; + + /// Image view ID for null descriptors + static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; + /// Sampler ID for bugged sampler ids + static constexpr SamplerId NULL_SAMPLER_ID{0}; + + using Runtime = typename P::Runtime; + using Image = typename P::Image; + using ImageAlloc = typename P::ImageAlloc; + using ImageView = typename P::ImageView; + using Sampler = typename P::Sampler; + using Framebuffer = typename P::Framebuffer; + + struct BlitImages { + ImageId dst_id; + ImageId src_id; + PixelFormat dst_format; + PixelFormat src_format; + }; + + template + struct IdentityHash { + [[nodiscard]] size_t operator()(T value) const noexcept { + return static_cast(value); + } + }; public: - void InvalidateRegion(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; + explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, + Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); - for (const auto& surface : GetSurfacesInRegion(addr, size)) { - Unregister(surface); - } - } + /// Notify the cache that a new frame has been queued + void TickFrame(); - void OnCPUWrite(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; + /// Return an unique mutually exclusive lock for the cache + [[nodiscard]] std::unique_lock AcquireLock(); - for (const auto& surface : GetSurfacesInRegion(addr, size)) { - if (surface->IsMemoryMarked()) { - UnmarkMemory(surface); - surface->SetSyncPending(true); - marked_for_unregister.emplace_back(surface); - } - } - } + /// Return a constant reference to the given image view id + [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; - void SyncGuestHost() { - std::lock_guard lock{mutex}; + /// Return a reference to the given image view id + [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; - for (const auto& surface : marked_for_unregister) { - if (surface->IsRegistered()) { - surface->SetSyncPending(false); - Unregister(surface); - } - } - marked_for_unregister.clear(); - } + /// Fill image_view_ids with the graphics images in indices + void FillGraphicsImageViews(std::span indices, + std::span image_view_ids); - /** - * Guarantees that rendertargets don't unregister themselves if the - * collide. Protection is currently only done on 3D slices. - */ - void GuardRenderTargets(bool new_guard) { - guard_render_targets = new_guard; - } + /// Fill image_view_ids with the compute images in indices + void FillComputeImageViews(std::span indices, std::span image_view_ids); - void GuardSamplers(bool new_guard) { - guard_samplers = new_guard; - } + /// Get the sampler from the graphics descriptor table in the specified index + Sampler* GetGraphicsSampler(u32 index); - void FlushRegion(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; + /// Get the sampler from the compute descriptor table in the specified index + Sampler* GetComputeSampler(u32 index); - auto surfaces = GetSurfacesInRegion(addr, size); - if (surfaces.empty()) { - return; - } - std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) { - return a->GetModificationTick() < b->GetModificationTick(); - }); - for (const auto& surface : surfaces) { - mutex.unlock(); - FlushSurface(surface); - mutex.lock(); - } - } + /// Refresh the state for graphics image view and sampler descriptors + void SynchronizeGraphicsDescriptors(); - bool MustFlushRegion(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; + /// Refresh the state for compute image view and sampler descriptors + void SynchronizeComputeDescriptors(); - const auto surfaces = GetSurfacesInRegion(addr, size); - return std::any_of(surfaces.cbegin(), surfaces.cend(), - [](const TSurface& surface) { return surface->IsModified(); }); - } + /// Update bound render targets and upload memory if necessary + /// @param is_clear True when the render targets are being used for clears + void UpdateRenderTargets(bool is_clear); - TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Sampler& entry) { - std::lock_guard lock{mutex}; - const auto gpu_addr{tic.Address()}; - if (!gpu_addr) { - return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); - } + /// Find a framebuffer with the currently bound render targets + /// UpdateRenderTargets should be called before this + Framebuffer* GetFramebuffer(); - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); - } + /// Mark images in a range as modified from the CPU + void WriteMemory(VAddr cpu_addr, size_t size); - if (!IsTypeCompatible(tic.texture_type, entry)) { - return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); - } + /// Download contents of host images to guest memory in a region + void DownloadMemory(VAddr cpu_addr, size_t size); - const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; - const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); - if (guard_samplers) { - sampled_textures.push_back(surface); - } - return view; - } + /// Remove images in a region + void UnmapMemory(VAddr cpu_addr, size_t size); - TView GetImageSurface(const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Image& entry) { - std::lock_guard lock{mutex}; - const auto gpu_addr{tic.Address()}; - if (!gpu_addr) { - return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); - } - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); - } - const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; - const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); - if (guard_samplers) { - sampled_textures.push_back(surface); - } - return view; - } + /// Blit an image with the given parameters + void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy); - bool TextureBarrier() { - const bool any_rt = - std::any_of(sampled_textures.begin(), sampled_textures.end(), - [](const auto& surface) { return surface->IsRenderTarget(); }); - sampled_textures.clear(); - return any_rt; - } + /// Invalidate the contents of the color buffer index + /// These contents become unspecified, the cache can assume aggressive optimizations. + void InvalidateColorBuffer(size_t index); - TView GetDepthBufferSurface(bool preserve_contents) { - std::lock_guard lock{mutex}; - auto& dirty = maxwell3d.dirty; - if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { - return depth_buffer.view; - } - dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false; + /// Invalidate the contents of the depth buffer + /// These contents become unspecified, the cache can assume aggressive optimizations. + void InvalidateDepthBuffer(); - const auto& regs{maxwell3d.regs}; - const auto gpu_addr{regs.zeta.Address()}; - if (!gpu_addr || !regs.zeta_enable) { - SetEmptyDepthBuffer(); - return {}; - } - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - SetEmptyDepthBuffer(); - return {}; - } - const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)}; - auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); - if (depth_buffer.target) - depth_buffer.target->MarkAsRenderTarget(false, NO_RT); - depth_buffer.target = surface_view.first; - depth_buffer.view = surface_view.second; - if (depth_buffer.target) - depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT); - return surface_view.second; - } + /// Try to find a cached image view in the given CPU address + [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); - TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { - std::lock_guard lock{mutex}; - ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); - if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) { - return render_targets[index].view; - } - maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false; + /// Return true when there are uncommitted images to be downloaded + [[nodiscard]] bool HasUncommittedFlushes() const noexcept; - const auto& regs{maxwell3d.regs}; - if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || - regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { - SetEmptyColorBuffer(index); - return {}; - } + /// Return true when the caller should wait for async downloads + [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; - const auto& config{regs.rt[index]}; - const auto gpu_addr{config.Address()}; - if (!gpu_addr) { - SetEmptyColorBuffer(index); - return {}; - } + /// Commit asynchronous downloads + void CommitAsyncFlushes(); - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - SetEmptyColorBuffer(index); - return {}; - } + /// Pop asynchronous downloads + void PopAsyncFlushes(); - auto surface_view = - GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index), - preserve_contents, true); - if (render_targets[index].target) { - auto& surface = render_targets[index].target; - surface->MarkAsRenderTarget(false, NO_RT); - const auto& cr_params = surface->GetSurfaceParams(); - if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) { - AsyncFlushSurface(surface); - } - } - render_targets[index].target = surface_view.first; - render_targets[index].view = surface_view.second; - if (render_targets[index].target) - render_targets[index].target->MarkAsRenderTarget(true, static_cast(index)); - return surface_view.second; - } - - void MarkColorBufferInUse(std::size_t index) { - if (auto& render_target = render_targets[index].target) { - render_target->MarkAsModified(true, Tick()); - } - } - - void MarkDepthBufferInUse() { - if (depth_buffer.target) { - depth_buffer.target->MarkAsModified(true, Tick()); - } - } - - void SetEmptyDepthBuffer() { - if (depth_buffer.target == nullptr) { - return; - } - depth_buffer.target->MarkAsRenderTarget(false, NO_RT); - depth_buffer.target = nullptr; - depth_buffer.view = nullptr; - } - - void SetEmptyColorBuffer(std::size_t index) { - if (render_targets[index].target == nullptr) { - return; - } - render_targets[index].target->MarkAsRenderTarget(false, NO_RT); - render_targets[index].target = nullptr; - render_targets[index].view = nullptr; - } - - void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, - const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const Tegra::Engines::Fermi2D::Config& copy_config) { - std::lock_guard lock{mutex}; - SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config); - SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); - const GPUVAddr src_gpu_addr = src_config.Address(); - const GPUVAddr dst_gpu_addr = dst_config.Address(); - DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); - - const std::optional dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr); - const std::optional src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr); - std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); - TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; - ImageBlit(src_surface, dst_surface.second, copy_config); - dst_surface.first->MarkAsModified(true, Tick()); - } - - TSurface TryFindFramebufferSurface(VAddr addr) const { - if (!addr) { - return nullptr; - } - const VAddr page = addr >> registry_page_bits; - const auto it = registry.find(page); - if (it == registry.end()) { - return nullptr; - } - const auto& list = it->second; - const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { - return surface->GetCpuAddr() == addr; - }); - return found != list.end() ? *found : nullptr; - } - - u64 Tick() { - return ++ticks; - } - - void CommitAsyncFlushes() { - committed_flushes.push_back(uncommitted_flushes); - uncommitted_flushes.reset(); - } - - bool HasUncommittedFlushes() const { - return uncommitted_flushes != nullptr; - } - - bool ShouldWaitAsyncFlushes() const { - return !committed_flushes.empty() && committed_flushes.front() != nullptr; - } - - void PopAsyncFlushes() { - if (committed_flushes.empty()) { - return; - } - auto& flush_list = committed_flushes.front(); - if (!flush_list) { - committed_flushes.pop_front(); - return; - } - for (TSurface& surface : *flush_list) { - FlushSurface(surface); - } - committed_flushes.pop_front(); - } - -protected: - explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, - bool is_astc_supported_) - : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, - gpu_memory{gpu_memory_} { - for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { - SetEmptyColorBuffer(i); - } - - SetEmptyDepthBuffer(); - staging_cache.SetSize(2); - - const auto make_siblings = [this](PixelFormat a, PixelFormat b) { - siblings_table[static_cast(a)] = b; - siblings_table[static_cast(b)] = a; - }; - std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); - make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM); - make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT); - make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT); - - sampled_textures.reserve(64); - } - - ~TextureCache() = default; - - virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; - - virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, - const CopyParams& copy_params) = 0; - - virtual void ImageBlit(TView& src_view, TView& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) = 0; - - // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture - // and reading it from a separate buffer. - virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; - - void ManageRenderTargetUnregister(TSurface& surface) { - auto& dirty = maxwell3d.dirty; - const u32 index = surface->GetRenderTarget(); - if (index == DEPTH_RT) { - dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; - } else { - dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true; - } - dirty.flags[VideoCommon::Dirty::RenderTargets] = true; - } - - void Register(TSurface surface) { - const GPUVAddr gpu_addr = surface->GetGpuAddr(); - const std::size_t size = surface->GetSizeInBytes(); - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", - gpu_addr); - return; - } - surface->SetCpuAddr(*cpu_addr); - RegisterInnerCache(surface); - surface->MarkAsRegistered(true); - surface->SetMemoryMarked(true); - rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); - } - - void UnmarkMemory(TSurface surface) { - if (!surface->IsMemoryMarked()) { - return; - } - const std::size_t size = surface->GetSizeInBytes(); - const VAddr cpu_addr = surface->GetCpuAddr(); - rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); - surface->SetMemoryMarked(false); - } - - void Unregister(TSurface surface) { - if (guard_render_targets && surface->IsProtected()) { - return; - } - if (!guard_render_targets && surface->IsRenderTarget()) { - ManageRenderTargetUnregister(surface); - } - UnmarkMemory(surface); - if (surface->IsSyncPending()) { - marked_for_unregister.remove(surface); - surface->SetSyncPending(false); - } - UnregisterInnerCache(surface); - surface->MarkAsRegistered(false); - ReserveSurface(surface->GetSurfaceParams(), surface); - } - - TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { - if (const auto surface = TryGetReservedSurface(params); surface) { - surface->SetGpuAddr(gpu_addr); - return surface; - } - // No reserved surface available, create a new one and reserve it - auto new_surface{CreateSurface(gpu_addr, params)}; - return new_surface; - } - - const bool is_astc_supported; + /// Return true when a CPU region is modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); private: - enum class RecycleStrategy : u32 { - Ignore = 0, - Flush = 1, - BufferCopy = 3, - }; - - enum class DeductionType : u32 { - DeductionComplete, - DeductionIncomplete, - DeductionFailed, - }; - - struct Deduction { - DeductionType type{DeductionType::DeductionFailed}; - TSurface surface{}; - - bool Failed() const { - return type == DeductionType::DeductionFailed; - } - - bool Incomplete() const { - return type == DeductionType::DeductionIncomplete; - } - - bool IsDepth() const { - return surface->GetSurfaceParams().IsPixelFormatZeta(); - } - }; - - /** - * Takes care of selecting a proper strategy to deal with a texture recycle. - * - * @param overlaps The overlapping surfaces registered in the cache. - * @param params The parameters on the new surface. - * @param gpu_addr The starting address of the new surface. - * @param untopological Indicates to the recycler that the texture has no way - * to match the overlaps due to topological reasons. - **/ - RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { - if (Settings::IsGPULevelExtreme()) { - return RecycleStrategy::Flush; - } - // 3D Textures decision - if (params.target == SurfaceTarget::Texture3D) { - return RecycleStrategy::Flush; - } - for (const auto& s : overlaps) { - const auto& s_params = s->GetSurfaceParams(); - if (s_params.target == SurfaceTarget::Texture3D) { - return RecycleStrategy::Flush; - } - } - // Untopological decision - if (untopological == MatchTopologyResult::CompressUnmatch) { - return RecycleStrategy::Flush; - } - if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) { - return RecycleStrategy::Flush; - } - return RecycleStrategy::Ignore; - } - - /** - * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented - * strategies: Ignore and Flush. - * - * - Ignore: Just unregisters all the overlaps and loads the new texture. - * - Flush: Flushes all the overlaps into memory and loads the new surface from that data. - * - * @param overlaps The overlapping surfaces registered in the cache. - * @param params The parameters for the new surface. - * @param gpu_addr The starting address of the new surface. - * @param preserve_contents Indicates that the new surface should be loaded from memory or left - * blank. - * @param untopological Indicates to the recycler that the texture has no way to match the - * overlaps due to topological reasons. - **/ - std::pair RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr, const bool preserve_contents, - const MatchTopologyResult untopological) { - const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); - for (auto& surface : overlaps) { - Unregister(surface); - } - switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { - case RecycleStrategy::Ignore: { - return InitializeSurface(gpu_addr, params, do_load); - } - case RecycleStrategy::Flush: { - std::sort(overlaps.begin(), overlaps.end(), - [](const TSurface& a, const TSurface& b) -> bool { - return a->GetModificationTick() < b->GetModificationTick(); - }); - for (auto& surface : overlaps) { - FlushSurface(surface); - } - return InitializeSurface(gpu_addr, params, preserve_contents); - } - case RecycleStrategy::BufferCopy: { - auto new_surface = GetUncachedSurface(gpu_addr, params); - BufferCopy(overlaps[0], new_surface); - return {new_surface, new_surface->GetMainView()}; - } - default: { - UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); - return InitializeSurface(gpu_addr, params, do_load); - } - } - } - - /** - * Takes a single surface and recreates into another that may differ in - * format, target or width alignment. - * - * @param current_surface The registered surface in the cache which we want to convert. - * @param params The new surface params which we'll use to recreate the surface. - * @param is_render Whether or not the surface is a render target. - **/ - std::pair RebuildSurface(TSurface current_surface, const SurfaceParams& params, - bool is_render) { - const auto gpu_addr = current_surface->GetGpuAddr(); - const auto& cr_params = current_surface->GetSurfaceParams(); - TSurface new_surface; - if (cr_params.pixel_format != params.pixel_format && !is_render && - GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) { - SurfaceParams new_params = params; - new_params.pixel_format = cr_params.pixel_format; - new_params.type = cr_params.type; - new_surface = GetUncachedSurface(gpu_addr, new_params); - } else { - new_surface = GetUncachedSurface(gpu_addr, params); - } - const SurfaceParams& final_params = new_surface->GetSurfaceParams(); - if (cr_params.type != final_params.type) { - if (Settings::IsGPULevelExtreme()) { - BufferCopy(current_surface, new_surface); - } - } else { - std::vector bricks = current_surface->BreakDown(final_params); - for (auto& brick : bricks) { - TryCopyImage(current_surface, new_surface, brick); - } - } - Unregister(current_surface); - Register(new_surface); - new_surface->MarkAsModified(current_surface->IsModified(), Tick()); - return {new_surface, new_surface->GetMainView()}; - } - - /** - * Takes a single surface and checks with the new surface's params if it's an exact - * match, we return the main view of the registered surface. If its formats don't - * match, we rebuild the surface. We call this last method a `Mirage`. If formats - * match but the targets don't, we create an overview View of the registered surface. - * - * @param current_surface The registered surface in the cache which we want to convert. - * @param params The new surface params which we want to check. - * @param is_render Whether or not the surface is a render target. - **/ - std::pair ManageStructuralMatch(TSurface current_surface, - const SurfaceParams& params, bool is_render) { - const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); - const bool matches_target = current_surface->MatchTarget(params.target); - const auto match_check = [&]() -> std::pair { - if (matches_target) { - return {current_surface, current_surface->GetMainView()}; - } - return {current_surface, current_surface->EmplaceOverview(params)}; - }; - if (!is_mirage) { - return match_check(); - } - if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) { - return match_check(); - } - return RebuildSurface(current_surface, params, is_render); - } - - /** - * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate - * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps - * of the new surface, if they all match we end up recreating a surface for them, - * else we return nothing. - * - * @param overlaps The overlapping surfaces registered in the cache. - * @param params The parameters on the new surface. - * @param gpu_addr The starting address of the new surface. - **/ - std::optional> TryReconstructSurface(VectorSurface& overlaps, - const SurfaceParams& params, - GPUVAddr gpu_addr) { - if (params.target == SurfaceTarget::Texture3D) { - return std::nullopt; - } - const auto test_modified = [](TSurface& surface) { return surface->IsModified(); }; - TSurface new_surface = GetUncachedSurface(gpu_addr, params); - - if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { - LoadSurface(new_surface); - for (const auto& surface : overlaps) { - Unregister(surface); - } - Register(new_surface); - return {{new_surface, new_surface->GetMainView()}}; - } - - std::size_t passed_tests = 0; - for (auto& surface : overlaps) { - const SurfaceParams& src_params = surface->GetSurfaceParams(); - const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; - if (!mipmap_layer) { - continue; - } - const auto [base_layer, base_mipmap] = *mipmap_layer; - if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { - continue; - } - ++passed_tests; - - // Copy all mipmaps and layers - const u32 block_width = params.GetDefaultBlockWidth(); - const u32 block_height = params.GetDefaultBlockHeight(); - for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) { - const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); - const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); - if (width < block_width || height < block_height) { - // Current APIs forbid copying small compressed textures, avoid errors + /// Iterate over all page indices in a range + template + static void ForEachPage(VAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; + const u64 page_end = (addr + size - 1) >> PAGE_SHIFT; + for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { break; } - const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, - src_params.depth); - TryCopyImage(surface, new_surface, copy_params); + } else { + func(page); } } - if (passed_tests == 0) { - return std::nullopt; - } - if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { - // In Accurate GPU all tests should pass, else we recycle - return std::nullopt; - } - - const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); - for (const auto& surface : overlaps) { - Unregister(surface); - } - - new_surface->MarkAsModified(modified, Tick()); - Register(new_surface); - return {{new_surface, new_surface->GetMainView()}}; - } - - /** - * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D - * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of - * the HLE methods. - * - * @param overlaps The overlapping surfaces registered in the cache. - * @param params The parameters on the new surface. - * @param gpu_addr The starting address of the new surface. - * @param cpu_addr The starting address of the new surface on physical memory. - * @param preserve_contents Indicates that the new surface should be loaded from memory or - * left blank. - */ - std::optional> Manage3DSurfaces(VectorSurface& overlaps, - const SurfaceParams& params, - GPUVAddr gpu_addr, VAddr cpu_addr, - bool preserve_contents) { - if (params.target != SurfaceTarget::Texture3D) { - for (const auto& surface : overlaps) { - if (!surface->MatchTarget(params.target)) { - if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { - if (Settings::IsGPULevelExtreme()) { - return std::nullopt; - } - Unregister(surface); - return InitializeSurface(gpu_addr, params, preserve_contents); - } - return std::nullopt; - } - if (surface->GetCpuAddr() != cpu_addr) { - continue; - } - if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { - return std::make_pair(surface, surface->GetMainView()); - } - } - return InitializeSurface(gpu_addr, params, preserve_contents); - } - - if (params.num_levels > 1) { - // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach - return std::nullopt; - } - - if (overlaps.size() == 1) { - const auto& surface = overlaps[0]; - const SurfaceParams& overlap_params = surface->GetSurfaceParams(); - // Don't attempt to render to textures with more than one level for now - // The texture has to be to the right or the sample address if we want to render to it - if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { - const u32 offset = static_cast(cpu_addr - surface->GetCpuAddr()); - const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); - if (slice < overlap_params.depth) { - auto view = surface->Emplace3DView(slice, params.depth, 0, 1); - return std::make_pair(std::move(surface), std::move(view)); - } - } - } - - TSurface new_surface = GetUncachedSurface(gpu_addr, params); - bool modified = false; - - for (auto& surface : overlaps) { - const SurfaceParams& src_params = surface->GetSurfaceParams(); - if (src_params.target != SurfaceTarget::Texture2D || - src_params.height != params.height || - src_params.block_depth != params.block_depth || - src_params.block_height != params.block_height) { - return std::nullopt; - } - modified |= surface->IsModified(); - - const u32 offset = static_cast(surface->GetCpuAddr() - cpu_addr); - const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); - const u32 width = params.width; - const u32 height = params.height; - const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); - TryCopyImage(surface, new_surface, copy_params); - } - for (const auto& surface : overlaps) { - Unregister(surface); - } - new_surface->MarkAsModified(modified, Tick()); - Register(new_surface); - - TView view = new_surface->GetMainView(); - return std::make_pair(std::move(new_surface), std::move(view)); - } - - /** - * Gets the starting address and parameters of a candidate surface and tries - * to find a matching surface within the cache. This is done in 3 big steps: - * - * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. - * - * 2. Check if there are any overlaps at all, if there are none, we just load the texture from - * memory else we move to step 3. - * - * 3. Consists of figuring out the relationship between the candidate texture and the - * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If - * there's many, we just try to reconstruct a new surface out of them based on the - * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we - * have to check if the candidate is a view (layer/mipmap) of the overlap or if the - * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct - * a new surface. - * - * @param gpu_addr The starting address of the candidate surface. - * @param params The parameters on the candidate surface. - * @param preserve_contents Indicates that the new surface should be loaded from memory or - * left blank. - * @param is_render Whether or not the surface is a render target. - **/ - std::pair GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, - const SurfaceParams& params, bool preserve_contents, - bool is_render) { - // Step 1 - // Check Level 1 Cache for a fast structural match. If candidate surface - // matches at certain level we are pretty much done. - if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) { - TSurface& current_surface = iter->second; - const auto topological_result = current_surface->MatchesTopology(params); - if (topological_result != MatchTopologyResult::FullMatch) { - VectorSurface overlaps{current_surface}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - topological_result); - } - - const auto struct_result = current_surface->MatchesStructure(params); - if (struct_result != MatchStructureResult::None) { - const auto& old_params = current_surface->GetSurfaceParams(); - const bool not_3d = params.target != SurfaceTarget::Texture3D && - old_params.target != SurfaceTarget::Texture3D; - if (not_3d || current_surface->MatchTarget(params.target)) { - if (struct_result == MatchStructureResult::FullMatch) { - return ManageStructuralMatch(current_surface, params, is_render); - } else { - return RebuildSurface(current_surface, params, is_render); - } - } - } - } - - // Step 2 - // Obtain all possible overlaps in the memory region - const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; - - // If none are found, we are done. we just load the surface and create it. - if (overlaps.empty()) { - return InitializeSurface(gpu_addr, params, preserve_contents); - } - - // Step 3 - // Now we need to figure the relationship between the texture and its overlaps - // we do a topological test to ensure we can find some relationship. If it fails - // immediately recycle the texture - for (const auto& surface : overlaps) { - const auto topological_result = surface->MatchesTopology(params); - if (topological_result != MatchTopologyResult::FullMatch) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - topological_result); - } - } - - // Manage 3D textures - if (params.block_depth > 0) { - auto surface = - Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); - if (surface) { - return *surface; - } - } - - // Split cases between 1 overlap or many. - if (overlaps.size() == 1) { - TSurface current_surface = overlaps[0]; - // First check if the surface is within the overlap. If not, it means - // two things either the candidate surface is a supertexture of the overlap - // or they don't match in any known way. - if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { - const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); - if (view) { - return *view; - } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); - } - // Now we check if the candidate is a mipmap/layer of the overlap - std::optional view = - current_surface->EmplaceView(params, gpu_addr, candidate_size); - if (view) { - const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); - if (is_mirage) { - // On a mirage view, we need to recreate the surface under this new view - // and then obtain a view again. - SurfaceParams new_params = current_surface->GetSurfaceParams(); - const u32 wh = SurfaceParams::ConvertWidth( - new_params.width, new_params.pixel_format, params.pixel_format); - const u32 hh = SurfaceParams::ConvertHeight( - new_params.height, new_params.pixel_format, params.pixel_format); - new_params.width = wh; - new_params.height = hh; - new_params.pixel_format = params.pixel_format; - std::pair pair = - RebuildSurface(current_surface, new_params, is_render); - std::optional mirage_view = - pair.first->EmplaceView(params, gpu_addr, candidate_size); - if (mirage_view) - return {pair.first, *mirage_view}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); - } - return {current_surface, *view}; - } - } else { - // If there are many overlaps, odds are they are subtextures of the candidate - // surface. We try to construct a new surface based on the candidate parameters, - // using the overlaps. If a single overlap fails, this will fail. - std::optional> view = - TryReconstructSurface(overlaps, params, gpu_addr); - if (view) { - return *view; - } - } - // We failed all the tests, recycle the overlaps into a new texture. - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); - } - - /** - * Gets the starting address and parameters of a candidate surface and tries to find a - * matching surface within the cache that's similar to it. If there are many textures - * or the texture found if entirely incompatible, it will fail. If no texture is found, the - * blit will be unsuccessful. - * - * @param gpu_addr The starting address of the candidate surface. - * @param params The parameters on the candidate surface. - **/ - Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - - if (!cpu_addr) { - Deduction result{}; - result.type = DeductionType::DeductionFailed; - return result; - } - - if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { - TSurface& current_surface = iter->second; - const auto topological_result = current_surface->MatchesTopology(params); - if (topological_result != MatchTopologyResult::FullMatch) { - Deduction result{}; - result.type = DeductionType::DeductionFailed; - return result; - } - const auto struct_result = current_surface->MatchesStructure(params); - if (struct_result != MatchStructureResult::None && - current_surface->MatchTarget(params.target)) { - Deduction result{}; - result.type = DeductionType::DeductionComplete; - result.surface = current_surface; - return result; - } - } - - const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; - - if (overlaps.empty()) { - Deduction result{}; - result.type = DeductionType::DeductionIncomplete; - return result; - } - - if (overlaps.size() > 1) { - Deduction result{}; - result.type = DeductionType::DeductionFailed; - return result; - } else { - Deduction result{}; - result.type = DeductionType::DeductionComplete; - result.surface = overlaps[0]; - return result; - } } - /** - * Gets a null surface based on a target texture. - * @param target The target of the null surface. - */ - TView GetNullSurface(SurfaceTarget target) { - const u32 i_target = static_cast(target); - if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) { - return it->second->GetMainView(); - } - SurfaceParams params{}; - params.target = target; - params.is_tiled = false; - params.srgb_conversion = false; - params.is_layered = - target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray || - target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray; - params.block_width = 0; - params.block_height = 0; - params.block_depth = 0; - params.tile_width_spacing = 1; - params.width = 1; - params.height = 1; - params.depth = 1; - if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { - params.depth = 6; - } - params.pitch = 4; - params.num_levels = 1; - params.emulated_levels = 1; - params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM; - params.type = VideoCore::Surface::SurfaceType::ColorTexture; - auto surface = CreateSurface(0ULL, params); - invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); - surface->UploadTexture(invalid_memory); - surface->MarkAsModified(false, Tick()); - invalid_cache.emplace(i_target, surface); - return surface->GetMainView(); - } + /// Fills image_view_ids in the image views in indices + void FillImageViews(DescriptorTable& table, + std::span cached_image_view_ids, std::span indices, + std::span image_view_ids); - /** - * Gets the a source and destination starting address and parameters, - * and tries to deduce if they are supposed to be depth textures. If so, their - * parameters are modified and fixed into so. - * - * @param src_params The parameters of the candidate surface. - * @param dst_params The parameters of the destination surface. - * @param src_gpu_addr The starting address of the candidate surface. - * @param dst_gpu_addr The starting address of the destination surface. - **/ - void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, - const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { - auto deduced_src = DeduceSurface(src_gpu_addr, src_params); - auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params); - if (deduced_src.Failed() || deduced_dst.Failed()) { - return; - } + /// Find or create an image view in the guest descriptor table + ImageViewId VisitImageView(DescriptorTable& table, + std::span cached_image_view_ids, u32 index); - const bool incomplete_src = deduced_src.Incomplete(); - const bool incomplete_dst = deduced_dst.Incomplete(); + /// Find or create a framebuffer with the given render target parameters + FramebufferId GetFramebufferId(const RenderTargets& key); - if (incomplete_src && incomplete_dst) { - return; - } + /// Refresh the contents (pixel data) of an image + void RefreshContents(Image& image); - const bool any_incomplete = incomplete_src || incomplete_dst; + /// Upload data from guest to an image + template + void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset); - if (!any_incomplete) { - if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) { - return; - } - } else { - if (incomplete_src && !(deduced_dst.IsDepth())) { - return; - } + /// Find or create an image view from a guest descriptor + [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); - if (incomplete_dst && !(deduced_src.IsDepth())) { - return; - } - } + /// Create a new image view from a guest descriptor + [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); - const auto inherit_format = [](SurfaceParams& to, TSurface from) { - const SurfaceParams& params = from->GetSurfaceParams(); - to.pixel_format = params.pixel_format; - to.type = params.type; - }; - // Now we got the cases where one or both is Depth and the other is not known - if (!incomplete_src) { - inherit_format(src_params, deduced_src.surface); - } else { - inherit_format(src_params, deduced_dst.surface); - } - if (!incomplete_dst) { - inherit_format(dst_params, deduced_dst.surface); - } else { - inherit_format(dst_params, deduced_src.surface); - } - } + /// Find or create an image from the given parameters + [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options = RelaxedOptions{}); - std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, - bool preserve_contents) { - auto new_surface{GetUncachedSurface(gpu_addr, params)}; - Register(new_surface); - if (preserve_contents) { - LoadSurface(new_surface); - } - return {new_surface, new_surface->GetMainView()}; - } + /// Find an image from the given parameters + [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options); - void LoadSurface(const TSurface& surface) { - staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); - surface->LoadBuffer(gpu_memory, staging_cache); - surface->UploadTexture(staging_cache.GetBuffer(0)); - surface->MarkAsModified(false, Tick()); - } + /// Create an image from the given parameters + [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options); - void FlushSurface(const TSurface& surface) { - if (!surface->IsModified()) { - return; - } - staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); - surface->DownloadTexture(staging_cache.GetBuffer(0)); - surface->FlushBuffer(gpu_memory, staging_cache); - surface->MarkAsModified(false, Tick()); - } + /// Create a new image and join perfectly matching existing images + /// Remove joined images from the cache + [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); - void RegisterInnerCache(TSurface& surface) { - const VAddr cpu_addr = surface->GetCpuAddr(); - VAddr start = cpu_addr >> registry_page_bits; - const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; - l1_cache[cpu_addr] = surface; - while (start <= end) { - registry[start].push_back(surface); - start++; - } - } + /// Return a blit image pair from the given guest blit parameters + [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src); - void UnregisterInnerCache(TSurface& surface) { - const VAddr cpu_addr = surface->GetCpuAddr(); - VAddr start = cpu_addr >> registry_page_bits; - const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; - l1_cache.erase(cpu_addr); - while (start <= end) { - auto& reg{registry[start]}; - reg.erase(std::find(reg.begin(), reg.end(), surface)); - start++; - } - } + /// Find or create a sampler from a guest descriptor sampler + [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); - VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { - if (size == 0) { - return {}; - } - const VAddr cpu_addr_end = cpu_addr + size; - const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; - VectorSurface surfaces; - for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { - const auto it = registry.find(start); - if (it == registry.end()) { - continue; - } - for (auto& surface : it->second) { - if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { - continue; - } - surface->MarkAsPicked(true); - surfaces.push_back(surface); - } - } - for (auto& surface : surfaces) { - surface->MarkAsPicked(false); - } - return surfaces; - } + /// Find or create an image view for the given color buffer index + [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); - void ReserveSurface(const SurfaceParams& params, TSurface surface) { - surface_reserve[params].push_back(std::move(surface)); - } + /// Find or create an image view for the depth buffer + [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); - TSurface TryGetReservedSurface(const SurfaceParams& params) { - auto search{surface_reserve.find(params)}; - if (search == surface_reserve.end()) { - return {}; - } - for (auto& surface : search->second) { - if (!surface->IsRegistered()) { - return surface; - } - } - return {}; - } + /// Find or create a view for a render target with the given image parameters + [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, + bool is_clear); - /// Try to do an image copy logging when formats are incompatible. - void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { - const SurfaceParams& src_params = src->GetSurfaceParams(); - const SurfaceParams& dst_params = dst->GetSurfaceParams(); - if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { - LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", dst_params.pixel_format, - src_params.pixel_format); - return; - } - ImageCopy(src, dst, copy); - } + /// Iterates over all the images in a region calling func + template + void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); - constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { - return siblings_table[static_cast(format)]; - } + /// Find or create an image view in the given image with the passed parameters + [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); - /// Returns true the shader sampler entry is compatible with the TIC texture type. - static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, - const VideoCommon::Shader::Sampler& entry) { - const auto shader_type = entry.type; - switch (tic_type) { - case Tegra::Texture::TextureType::Texture1D: - case Tegra::Texture::TextureType::Texture1DArray: - return shader_type == Tegra::Shader::TextureType::Texture1D; - case Tegra::Texture::TextureType::Texture1DBuffer: - // TODO(Rodrigo): Assume as valid for now - return true; - case Tegra::Texture::TextureType::Texture2D: - case Tegra::Texture::TextureType::Texture2DNoMipmap: - return shader_type == Tegra::Shader::TextureType::Texture2D; - case Tegra::Texture::TextureType::Texture2DArray: - return shader_type == Tegra::Shader::TextureType::Texture2D || - shader_type == Tegra::Shader::TextureType::TextureCube; - case Tegra::Texture::TextureType::Texture3D: - return shader_type == Tegra::Shader::TextureType::Texture3D; - case Tegra::Texture::TextureType::TextureCubeArray: - case Tegra::Texture::TextureType::TextureCubemap: - if (shader_type == Tegra::Shader::TextureType::TextureCube) { - return true; - } - return shader_type == Tegra::Shader::TextureType::Texture2D && entry.is_array; - } - UNREACHABLE(); - return true; - } + /// Register image in the page table + void RegisterImage(ImageId image); - struct FramebufferTargetInfo { - TSurface target; - TView view; - }; + /// Unregister image from the page table + void UnregisterImage(ImageId image); - void AsyncFlushSurface(TSurface& surface) { - if (!uncommitted_flushes) { - uncommitted_flushes = std::make_shared>(); - } - uncommitted_flushes->push_back(surface); - } + /// Track CPU reads and writes for image + void TrackImage(ImageBase& image); + /// Stop tracking CPU reads and writes for image + void UntrackImage(ImageBase& image); + + /// Delete image from the cache + void DeleteImage(ImageId image); + + /// Remove image views references from the cache + void RemoveImageViewReferences(std::span removed_views); + + /// Remove framebuffers using the given image views from the cache + void RemoveFramebuffers(std::span removed_views); + + /// Mark an image as modified from the GPU + void MarkModification(ImageBase& image) noexcept; + + /// Synchronize image aliases, copying data if needed + void SynchronizeAliases(ImageId image_id); + + /// Prepare an image to be used + void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); + + /// Prepare an image view to be used + void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); + + /// Execute copies from one image to the other, even if they are incompatible + void CopyImage(ImageId dst_id, ImageId src_id, std::span copies); + + /// Bind an image view as render target, downloading resources preemtively if needed + void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); + + /// Create a render target from a given image and image view parameters + [[nodiscard]] std::pair RenderTargetFromImage( + ImageId, const ImageViewInfo& view_info); + + /// Returns true if the current clear parameters clear the whole image of a given image view + [[nodiscard]] bool IsFullClear(ImageViewId id); + + Runtime& runtime; VideoCore::RasterizerInterface& rasterizer; Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; Tegra::MemoryManager& gpu_memory; - FormatLookupTable format_lookup_table; - FormatCompatibility format_compatibility; + DescriptorTable graphics_image_table{gpu_memory}; + DescriptorTable graphics_sampler_table{gpu_memory}; + std::vector graphics_sampler_ids; + std::vector graphics_image_view_ids; - u64 ticks{}; + DescriptorTable compute_image_table{gpu_memory}; + DescriptorTable compute_sampler_table{gpu_memory}; + std::vector compute_sampler_ids; + std::vector compute_image_view_ids; - // Guards the cache for protection conflicts. - bool guard_render_targets{}; - bool guard_samplers{}; + RenderTargets render_targets; - // The siblings table is for formats that can inter exchange with one another - // without causing issues. This is only valid when a conflict occurs on a non - // rendering use. - std::array(PixelFormat::Max)> siblings_table; + std::mutex mutex; - // The internal Cache is different for the Texture Cache. It's based on buckets - // of 1MB. This fits better for the purpose of this cache as textures are normaly - // large in size. - static constexpr u64 registry_page_bits{20}; - static constexpr u64 registry_page_size{1 << registry_page_bits}; - std::unordered_map> registry; + std::unordered_map image_views; + std::unordered_map samplers; + std::unordered_map framebuffers; - static constexpr u32 DEPTH_RT = 8; - static constexpr u32 NO_RT = 0xFFFFFFFF; + std::unordered_map, IdentityHash> page_table; - // The L1 Cache is used for fast texture lookup before checking the overlaps - // This avoids calculating size and other stuffs. - std::unordered_map l1_cache; + bool has_deleted_images = false; - /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have - /// previously been used. This is to prevent surfaces from being constantly created and - /// destroyed when used with different surface parameters. - std::unordered_map> surface_reserve; - std::array - render_targets; - FramebufferTargetInfo depth_buffer; + SlotVector slot_images; + SlotVector slot_image_views; + SlotVector slot_image_allocs; + SlotVector slot_samplers; + SlotVector slot_framebuffers; - std::vector sampled_textures; + // TODO: This data structure is not optimal and it should be reworked + std::vector uncommitted_downloads; + std::queue> committed_downloads; - /// This cache stores null surfaces in order to be used as a placeholder - /// for invalid texture calls. - std::unordered_map invalid_cache; - std::vector invalid_memory; + static constexpr size_t TICKS_TO_DESTROY = 6; + DelayedDestructionRing sentenced_images; + DelayedDestructionRing sentenced_image_view; + DelayedDestructionRing sentenced_framebuffers; - std::list marked_for_unregister; + std::unordered_map image_allocs_table; - std::shared_ptr> uncommitted_flushes{}; - std::list>> committed_flushes; - - StagingCache staging_cache; - std::recursive_mutex mutex; + u64 modification_tick = 0; + u64 frame_tick = 0; }; +template +TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_) + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { + // Configure null sampler + TSCEntry sampler_descriptor{}; + sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); + sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); + sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); + sampler_descriptor.cubemap_anisotropy.Assign(1); + + // Make sure the first index is reserved for the null resources + // This way the null resource becomes a compile time constant + void(slot_image_views.insert(runtime, NullImageParams{})); + void(slot_samplers.insert(runtime, sampler_descriptor)); +} + +template +void TextureCache

::TickFrame() { + // Tick sentenced resources in this order to ensure they are destroyed in the right order + sentenced_images.Tick(); + sentenced_framebuffers.Tick(); + sentenced_image_view.Tick(); + ++frame_tick; +} + +template +std::unique_lock TextureCache

::AcquireLock() { + return std::unique_lock{mutex}; +} + +template +const typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) const noexcept { + return slot_image_views[id]; +} + +template +typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { + return slot_image_views[id]; +} + +template +void TextureCache

::FillGraphicsImageViews(std::span indices, + std::span image_view_ids) { + FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); +} + +template +void TextureCache

::FillComputeImageViews(std::span indices, + std::span image_view_ids) { + FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); +} + +template +typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { + [[unlikely]] if (index > graphics_sampler_table.Limit()) { + LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); + return &slot_samplers[NULL_SAMPLER_ID]; + } + const auto [descriptor, is_new] = graphics_sampler_table.Read(index); + SamplerId& id = graphics_sampler_ids[index]; + [[unlikely]] if (is_new) { + id = FindSampler(descriptor); + } + return &slot_samplers[id]; +} + +template +typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { + [[unlikely]] if (index > compute_sampler_table.Limit()) { + LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); + return &slot_samplers[NULL_SAMPLER_ID]; + } + const auto [descriptor, is_new] = compute_sampler_table.Read(index); + SamplerId& id = compute_sampler_ids[index]; + [[unlikely]] if (is_new) { + id = FindSampler(descriptor); + } + return &slot_samplers[id]; +} + +template +void TextureCache

::SynchronizeGraphicsDescriptors() { + using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; + const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; + const u32 tic_limit = maxwell3d.regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; + if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { + graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + } + if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { + graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + } +} + +template +void TextureCache

::SynchronizeComputeDescriptors() { + const bool linked_tsc = kepler_compute.launch_description.linked_tsc; + const u32 tic_limit = kepler_compute.regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; + const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); + if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { + compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + } + if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { + compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + } +} + +template +void TextureCache

::UpdateRenderTargets(bool is_clear) { + using namespace VideoCommon::Dirty; + auto& flags = maxwell3d.dirty.flags; + if (!flags[Dirty::RenderTargets]) { + return; + } + flags[Dirty::RenderTargets] = false; + + // Render target control is used on all render targets, so force look ups when this one is up + const bool force = flags[Dirty::RenderTargetControl]; + flags[Dirty::RenderTargetControl] = false; + + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + if (flags[Dirty::ColorBuffer0 + index] || force) { + flags[Dirty::ColorBuffer0 + index] = false; + BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); + } + PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); + } + if (flags[Dirty::ZetaBuffer] || force) { + flags[Dirty::ZetaBuffer] = false; + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); + } + const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; + PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); + + for (size_t index = 0; index < NUM_RT; ++index) { + render_targets.draw_buffers[index] = static_cast(maxwell3d.regs.rt_control.Map(index)); + } + render_targets.size = Extent2D{ + maxwell3d.regs.render_area.width, + maxwell3d.regs.render_area.height, + }; +} + +template +typename P::Framebuffer* TextureCache

::GetFramebuffer() { + return &slot_framebuffers[GetFramebufferId(render_targets)]; +} + +template +void TextureCache

::FillImageViews(DescriptorTable& table, + std::span cached_image_view_ids, + std::span indices, + std::span image_view_ids) { + ASSERT(indices.size() <= image_view_ids.size()); + do { + has_deleted_images = false; + std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { + return VisitImageView(table, cached_image_view_ids, index); + }); + } while (has_deleted_images); +} + +template +ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, + std::span cached_image_view_ids, + u32 index) { + if (index > table.Limit()) { + LOG_ERROR(HW_GPU, "Invalid image view index={}", index); + return NULL_IMAGE_VIEW_ID; + } + const auto [descriptor, is_new] = table.Read(index); + ImageViewId& image_view_id = cached_image_view_ids[index]; + if (is_new) { + image_view_id = FindImageView(descriptor); + } + if (image_view_id != NULL_IMAGE_VIEW_ID) { + PrepareImageView(image_view_id, false, false); + } + return image_view_id; +} + +template +FramebufferId TextureCache

::GetFramebufferId(const RenderTargets& key) { + const auto [pair, is_new] = framebuffers.try_emplace(key); + FramebufferId& framebuffer_id = pair->second; + if (!is_new) { + return framebuffer_id; + } + std::array color_buffers; + std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), + [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); + ImageView* const depth_buffer = + key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; + framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); + return framebuffer_id; +} + +template +void TextureCache

::WriteMemory(VAddr cpu_addr, size_t size) { + ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { + if (True(image.flags & ImageFlagBits::CpuModified)) { + return; + } + image.flags |= ImageFlagBits::CpuModified; + UntrackImage(image); + }); +} + +template +void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { + std::vector images; + ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { + // Skip images that were not modified from the GPU + if (False(image.flags & ImageFlagBits::GpuModified)) { + return; + } + // Skip images that .are. modified from the CPU + // We don't want to write sensitive data from the guest + if (True(image.flags & ImageFlagBits::CpuModified)) { + return; + } + if (image.info.num_samples > 1) { + LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); + return; + } + image.flags &= ~ImageFlagBits::GpuModified; + images.push_back(image_id); + }); + if (images.empty()) { + return; + } + std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { + return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; + }); + for (const ImageId image_id : images) { + Image& image = slot_images[image_id]; + auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(map, 0, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span()); + } +} + +template +void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { + std::vector deleted_images; + ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image); + } + UnregisterImage(id); + DeleteImage(id); + } +} + +template +void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy) { + const BlitImages images = GetBlitImages(dst, src); + const ImageId dst_id = images.dst_id; + const ImageId src_id = images.src_id; + PrepareImage(src_id, false, false); + PrepareImage(dst_id, true, false); + + ImageBase& dst_image = slot_images[dst_id]; + const ImageBase& src_image = slot_images[src_id]; + + // TODO: Deduplicate + const std::optional dst_base = dst_image.TryFindBase(dst.Address()); + const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); + const std::array src_region{ + Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, + Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, + }; + + const std::optional src_base = src_image.TryFindBase(src.Address()); + const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; + const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); + const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); + const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); + const std::array dst_region{ + Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, + Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, + }; + + // Always call this after src_framebuffer_id was queried, as the address might be invalidated. + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + if constexpr (FRAMEBUFFER_BLITS) { + // OpenGL blits from framebuffers, not images + Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; + runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, + copy.filter, copy.operation); + } else { + // Vulkan can blit images, but it lacks format reinterpretations + // Provide a framebuffer in case it's necessary + ImageView& dst_view = slot_image_views[dst_view_id]; + ImageView& src_view = slot_image_views[src_view_id]; + runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, + copy.operation); + } +} + +template +void TextureCache

::InvalidateColorBuffer(size_t index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + color_buffer_id = FindColorBuffer(index, false); + if (!color_buffer_id) { + LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index); + return; + } + // When invalidating a color buffer, the old contents are no longer relevant + ImageView& color_buffer = slot_image_views[color_buffer_id]; + Image& image = slot_images[color_buffer.image_id]; + image.flags &= ~ImageFlagBits::CpuModified; + image.flags &= ~ImageFlagBits::GpuModified; + + runtime.InvalidateColorBuffer(color_buffer, index); +} + +template +void TextureCache

::InvalidateDepthBuffer() { + ImageViewId& depth_buffer_id = render_targets.depth_buffer_id; + depth_buffer_id = FindDepthBuffer(false); + if (!depth_buffer_id) { + LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer"); + return; + } + // When invalidating the depth buffer, the old contents are no longer relevant + ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id]; + image.flags &= ~ImageFlagBits::CpuModified; + image.flags &= ~ImageFlagBits::GpuModified; + + ImageView& depth_buffer = slot_image_views[depth_buffer_id]; + runtime.InvalidateDepthBuffer(depth_buffer); +} + +template +typename P::ImageView* TextureCache

::TryFindFramebufferImageView(VAddr cpu_addr) { + // TODO: Properly implement this + const auto it = page_table.find(cpu_addr >> PAGE_SHIFT); + if (it == page_table.end()) { + return nullptr; + } + const auto& image_ids = it->second; + for (const ImageId image_id : image_ids) { + const ImageBase& image = slot_images[image_id]; + if (image.cpu_addr != cpu_addr) { + continue; + } + if (image.image_view_ids.empty()) { + continue; + } + return &slot_image_views[image.image_view_ids.at(0)]; + } + return nullptr; +} + +template +bool TextureCache

::HasUncommittedFlushes() const noexcept { + return !uncommitted_downloads.empty(); +} + +template +bool TextureCache

::ShouldWaitAsyncFlushes() const noexcept { + return !committed_downloads.empty() && !committed_downloads.front().empty(); +} + +template +void TextureCache

::CommitAsyncFlushes() { + // This is intentionally passing the value by copy + committed_downloads.push(uncommitted_downloads); + uncommitted_downloads.clear(); +} + +template +void TextureCache

::PopAsyncFlushes() { + if (committed_downloads.empty()) { + return; + } + const std::span download_ids = committed_downloads.front(); + if (download_ids.empty()) { + committed_downloads.pop(); + return; + } + size_t total_size_bytes = 0; + for (const ImageId image_id : download_ids) { + total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + } + auto download_map = runtime.MapDownloadBuffer(total_size_bytes); + size_t buffer_offset = 0; + for (const ImageId image_id : download_ids) { + Image& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(download_map, buffer_offset, copies); + buffer_offset += image.unswizzled_size_bytes; + } + // Wait for downloads to finish + runtime.Finish(); + + buffer_offset = 0; + const std::span download_span = download_map.Span(); + for (const ImageId image_id : download_ids) { + const ImageBase& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + const std::span image_download_span = download_span.subspan(buffer_offset); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); + buffer_offset += image.unswizzled_size_bytes; + } + committed_downloads.pop(); +} + +template +bool TextureCache

::IsRegionGpuModified(VAddr addr, size_t size) { + bool is_modified = false; + ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { + if (False(image.flags & ImageFlagBits::GpuModified)) { + return false; + } + is_modified = true; + return true; + }); + return is_modified; +} + +template +void TextureCache

::RefreshContents(Image& image) { + if (False(image.flags & ImageFlagBits::CpuModified)) { + // Only upload modified images + return; + } + image.flags &= ~ImageFlagBits::CpuModified; + TrackImage(image); + + if (image.info.num_samples > 1) { + LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); + return; + } + auto map = runtime.MapUploadBuffer(MapSizeBytes(image)); + UploadImageContents(image, map, 0); + runtime.InsertUploadMemoryBarrier(); +} + +template +template +void TextureCache

::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { + const std::span mapped_span = map.Span().subspan(buffer_offset); + const GPUVAddr gpu_addr = image.gpu_addr; + + if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { + gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); + const auto uploads = FullUploadSwizzles(image.info); + runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); + } else if (True(image.flags & ImageFlagBits::Converted)) { + std::vector unswizzled_data(image.unswizzled_size_bytes); + auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); + ConvertImage(unswizzled_data, image.info, mapped_span, copies); + image.UploadMemory(map, buffer_offset, copies); + } else if (image.info.type == ImageType::Buffer) { + const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; + image.UploadMemory(map, buffer_offset, copies); + } else { + const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); + image.UploadMemory(map, buffer_offset, copies); + } +} + +template +ImageViewId TextureCache

::FindImageView(const TICEntry& config) { + if (!IsValidAddress(gpu_memory, config)) { + return NULL_IMAGE_VIEW_ID; + } + const auto [pair, is_new] = image_views.try_emplace(config); + ImageViewId& image_view_id = pair->second; + if (is_new) { + image_view_id = CreateImageView(config); + } + return image_view_id; +} + +template +ImageViewId TextureCache

::CreateImageView(const TICEntry& config) { + const ImageInfo info(config); + const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride; + const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; + } + ImageBase& image = slot_images[image_id]; + const SubresourceBase base = image.TryFindBase(config.Address()).value(); + ASSERT(base.level == 0); + const ImageViewInfo view_info(config, base.layer); + const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); + ImageViewBase& image_view = slot_image_views[image_view_id]; + image_view.flags |= ImageViewFlagBits::Strong; + image.flags |= ImageFlagBits::Strong; + return image_view_id; +} + +template +ImageId TextureCache

::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { + return image_id; + } + return InsertImage(info, gpu_addr, options); +} + +template +ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + return ImageId{}; + } + ImageId image_id; + const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { + if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { + const bool strict_size = False(options & RelaxedOptions::Size) && + True(existing_image.flags & ImageFlagBits::Strong); + const ImageInfo& existing = existing_image.info; + if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && + existing.pitch == info.pitch && + IsPitchLinearSameSize(existing, info, strict_size) && + IsViewCompatible(existing.format, info.format)) { + image_id = existing_image_id; + return true; + } + } else if (IsSubresource(info, existing_image, gpu_addr, options)) { + image_id = existing_image_id; + return true; + } + return false; + }; + ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); + return image_id; +} + +template +ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); + const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); + const Image& image = slot_images[image_id]; + // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different + const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); + if (is_new) { + it->second = slot_image_allocs.insert(); + } + slot_image_allocs[it->second].images.push_back(image_id); + return image_id; +} + +template +ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { + ImageInfo new_info = info; + const size_t size_bytes = CalculateGuestSizeInBytes(new_info); + std::vector overlap_ids; + std::vector left_aliased_ids; + std::vector right_aliased_ids; + ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { + if (info.type != overlap.info.type) { + return; + } + if (info.type == ImageType::Linear) { + if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { + // Alias linear images with the same pitch + left_aliased_ids.push_back(overlap_id); + } + return; + } + const auto solution = ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, true); + if (solution) { + gpu_addr = solution->gpu_addr; + cpu_addr = solution->cpu_addr; + new_info.resources = solution->resources; + overlap_ids.push_back(overlap_id); + return; + } + static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; + const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); + if (IsSubresource(new_info, overlap, gpu_addr, options)) { + left_aliased_ids.push_back(overlap_id); + } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options)) { + right_aliased_ids.push_back(overlap_id); + } + }); + const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); + Image& new_image = slot_images[new_image_id]; + + // TODO: Only upload what we need + RefreshContents(new_image); + + for (const ImageId overlap_id : overlap_ids) { + Image& overlap = slot_images[overlap_id]; + if (overlap.info.num_samples != new_image.info.num_samples) { + LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); + } else { + const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); + const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); + runtime.CopyImage(new_image, overlap, copies); + } + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap); + } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } + ImageBase& new_image_base = new_image; + for (const ImageId aliased_id : right_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); + } + for (const ImageId aliased_id : left_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); + } + RegisterImage(new_image_id); + return new_image_id; +} + +template +typename TextureCache

::BlitImages TextureCache

::GetBlitImages( + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { + static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; + const GPUVAddr dst_addr = dst.Address(); + const GPUVAddr src_addr = src.Address(); + ImageInfo dst_info(dst); + ImageInfo src_info(src); + ImageId dst_id; + ImageId src_id; + do { + has_deleted_images = false; + dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); + src_id = FindImage(src_info, src_addr, FIND_OPTIONS); + const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; + const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; + DeduceBlitImages(dst_info, src_info, dst_image, src_image); + if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { + continue; + } + if (!dst_id) { + dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); + } + if (!src_id) { + src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); + } + } while (has_deleted_images); + return BlitImages{ + .dst_id = dst_id, + .src_id = src_id, + .dst_format = dst_info.format, + .src_format = src_info.format, + }; +} + +template +SamplerId TextureCache

::FindSampler(const TSCEntry& config) { + if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { + return NULL_SAMPLER_ID; + } + const auto [pair, is_new] = samplers.try_emplace(config); + if (is_new) { + pair->second = slot_samplers.insert(runtime, config); + } + return pair->second; +} + +template +ImageViewId TextureCache

::FindColorBuffer(size_t index, bool is_clear) { + const auto& regs = maxwell3d.regs; + if (index >= regs.rt_control.count) { + return ImageViewId{}; + } + const auto& rt = regs.rt[index]; + const GPUVAddr gpu_addr = rt.Address(); + if (gpu_addr == 0) { + return ImageViewId{}; + } + if (rt.format == Tegra::RenderTargetFormat::NONE) { + return ImageViewId{}; + } + const ImageInfo info(regs, index); + return FindRenderTargetView(info, gpu_addr, is_clear); +} + +template +ImageViewId TextureCache

::FindDepthBuffer(bool is_clear) { + const auto& regs = maxwell3d.regs; + if (!regs.zeta_enable) { + return ImageViewId{}; + } + const GPUVAddr gpu_addr = regs.zeta.Address(); + if (gpu_addr == 0) { + return ImageViewId{}; + } + const ImageInfo info(regs); + return FindRenderTargetView(info, gpu_addr, is_clear); +} + +template +ImageViewId TextureCache

::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, + bool is_clear) { + const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; + const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; + } + Image& image = slot_images[image_id]; + const ImageViewType view_type = RenderTargetImageViewType(info); + SubresourceBase base; + if (image.info.type == ImageType::Linear) { + base = SubresourceBase{.level = 0, .layer = 0}; + } else { + base = image.TryFindBase(gpu_addr).value(); + } + const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; + const SubresourceRange range{ + .base = base, + .extent = {.levels = 1, .layers = layers}, + }; + return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); +} + +template +template +void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector images; + ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { + const auto it = page_table.find(page); + if (it == page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.Overlaps(cpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } +} + +template +ImageViewId TextureCache

::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { + Image& image = slot_images[image_id]; + if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { + return image_view_id; + } + const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); + image.InsertView(info, image_view_id); + return image_view_id; +} + +template +void TextureCache

::RegisterImage(ImageId image_id) { + ImageBase& image = slot_images[image_id]; + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), + "Trying to register an already registered image"); + image.flags |= ImageFlagBits::Registered; + ForEachPage(image.cpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { page_table[page].push_back(image_id); }); +} + +template +void TextureCache

::UnregisterImage(ImageId image_id) { + Image& image = slot_images[image_id]; + ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), + "Trying to unregister an already registered image"); + image.flags &= ~ImageFlagBits::Registered; + ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_SHIFT); + return; + } + std::vector& image_ids = page_it->second; + const auto vector_it = std::ranges::find(image_ids, image_id); + if (vector_it == image_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_SHIFT); + return; + } + image_ids.erase(vector_it); + }); +} + +template +void TextureCache

::TrackImage(ImageBase& image) { + ASSERT(False(image.flags & ImageFlagBits::Tracked)); + image.flags |= ImageFlagBits::Tracked; + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); +} + +template +void TextureCache

::UntrackImage(ImageBase& image) { + ASSERT(True(image.flags & ImageFlagBits::Tracked)); + image.flags &= ~ImageFlagBits::Tracked; + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); +} + +template +void TextureCache

::DeleteImage(ImageId image_id) { + ImageBase& image = slot_images[image_id]; + const GPUVAddr gpu_addr = image.gpu_addr; + const auto alloc_it = image_allocs_table.find(gpu_addr); + if (alloc_it == image_allocs_table.end()) { + UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", + gpu_addr); + return; + } + const ImageAllocId alloc_id = alloc_it->second; + std::vector& alloc_images = slot_image_allocs[alloc_id].images; + const auto alloc_image_it = std::ranges::find(alloc_images, image_id); + if (alloc_image_it == alloc_images.end()) { + UNREACHABLE_MSG("Trying to delete an image that does not exist"); + return; + } + ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); + + // Mark render targets as dirty + auto& dirty = maxwell3d.dirty.flags; + dirty[Dirty::RenderTargets] = true; + dirty[Dirty::ZetaBuffer] = true; + for (size_t rt = 0; rt < NUM_RT; ++rt) { + dirty[Dirty::ColorBuffer0 + rt] = true; + } + const std::span image_view_ids = image.image_view_ids; + for (const ImageViewId image_view_id : image_view_ids) { + std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); + if (render_targets.depth_buffer_id == image_view_id) { + render_targets.depth_buffer_id = ImageViewId{}; + } + } + RemoveImageViewReferences(image_view_ids); + RemoveFramebuffers(image_view_ids); + + for (const AliasedImage& alias : image.aliased_images) { + ImageBase& other_image = slot_images[alias.id]; + [[maybe_unused]] const size_t num_removed_aliases = + std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { + return other_alias.id == image_id; + }); + ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", + num_removed_aliases); + } + for (const ImageViewId image_view_id : image_view_ids) { + sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + slot_image_views.erase(image_view_id); + } + sentenced_images.Push(std::move(slot_images[image_id])); + slot_images.erase(image_id); + + alloc_images.erase(alloc_image_it); + if (alloc_images.empty()) { + image_allocs_table.erase(alloc_it); + } + if constexpr (ENABLE_VALIDATION) { + std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(compute_image_view_ids, CORRUPT_ID); + } + graphics_image_table.Invalidate(); + compute_image_table.Invalidate(); + has_deleted_images = true; +} + +template +void TextureCache

::RemoveImageViewReferences(std::span removed_views) { + auto it = image_views.begin(); + while (it != image_views.end()) { + const auto found = std::ranges::find(removed_views, it->second); + if (found != removed_views.end()) { + it = image_views.erase(it); + } else { + ++it; + } + } +} + +template +void TextureCache

::RemoveFramebuffers(std::span removed_views) { + auto it = framebuffers.begin(); + while (it != framebuffers.end()) { + if (it->first.Contains(removed_views)) { + it = framebuffers.erase(it); + } else { + ++it; + } + } +} + +template +void TextureCache

::MarkModification(ImageBase& image) noexcept { + image.flags |= ImageFlagBits::GpuModified; + image.modification_tick = ++modification_tick; +} + +template +void TextureCache

::SynchronizeAliases(ImageId image_id) { + boost::container::small_vector aliased_images; + ImageBase& image = slot_images[image_id]; + u64 most_recent_tick = image.modification_tick; + for (const AliasedImage& aliased : image.aliased_images) { + ImageBase& aliased_image = slot_images[aliased.id]; + if (image.modification_tick < aliased_image.modification_tick) { + most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); + aliased_images.push_back(&aliased); + } + } + if (aliased_images.empty()) { + return; + } + image.modification_tick = most_recent_tick; + std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { + const ImageBase& lhs_image = slot_images[lhs->id]; + const ImageBase& rhs_image = slot_images[rhs->id]; + return lhs_image.modification_tick < rhs_image.modification_tick; + }); + for (const AliasedImage* const aliased : aliased_images) { + CopyImage(image_id, aliased->id, aliased->copies); + } +} + +template +void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { + Image& image = slot_images[image_id]; + if (invalidate) { + image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); + if (False(image.flags & ImageFlagBits::Tracked)) { + TrackImage(image); + } + } else { + RefreshContents(image); + SynchronizeAliases(image_id); + } + if (is_modification) { + MarkModification(image); + } + image.frame_tick = frame_tick; +} + +template +void TextureCache

::PrepareImageView(ImageViewId image_view_id, bool is_modification, + bool invalidate) { + if (!image_view_id) { + return; + } + const ImageViewBase& image_view = slot_image_views[image_view_id]; + PrepareImage(image_view.image_id, is_modification, invalidate); +} + +template +void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::span copies) { + Image& dst = slot_images[dst_id]; + Image& src = slot_images[src_id]; + const auto dst_format_type = GetFormatType(dst.info.format); + const auto src_format_type = GetFormatType(src.info.format); + if (src_format_type == dst_format_type) { + if constexpr (HAS_EMULATED_COPIES) { + if (!runtime.CanImageBeCopied(dst, src)) { + return runtime.EmulateCopyImage(dst, src, copies); + } + } + return runtime.CopyImage(dst, src, copies); + } + UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); + UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); + for (const ImageCopy& copy : copies) { + UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); + UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); + + const SubresourceBase dst_base{ + .level = copy.dst_subresource.base_level, + .layer = copy.dst_subresource.base_layer, + }; + const SubresourceBase src_base{ + .level = copy.src_subresource.base_level, + .layer = copy.src_subresource.base_layer, + }; + const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; + const SubresourceExtent src_extent{.levels = 1, .layers = 1}; + const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; + const SubresourceRange src_range{.base = src_base, .extent = src_extent}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); + const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); + ImageView& dst_view = slot_image_views[dst_view_id]; + ImageView& src_view = slot_image_views[src_view_id]; + [[maybe_unused]] const Extent3D expected_size{ + .width = std::min(dst_view.size.width, src_view.size.width), + .height = std::min(dst_view.size.height, src_view.size.height), + .depth = std::min(dst_view.size.depth, src_view.size.depth), + }; + UNIMPLEMENTED_IF(copy.extent != expected_size); + + runtime.ConvertImage(dst_framebuffer, dst_view, src_view); + } +} + +template +void TextureCache

::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { + if (*old_id == new_id) { + return; + } + if (*old_id) { + const ImageViewBase& old_view = slot_image_views[*old_id]; + if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { + uncommitted_downloads.push_back(old_view.image_id); + } + } + *old_id = new_id; +} + +template +std::pair TextureCache

::RenderTargetFromImage( + ImageId image_id, const ImageViewInfo& view_info) { + const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); + const ImageBase& image = slot_images[image_id]; + const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; + const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; + const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; + const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); + const u32 num_samples = image.info.num_samples; + const auto [samples_x, samples_y] = SamplesLog2(num_samples); + const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ + .color_buffer_ids = {color_view_id}, + .depth_buffer_id = depth_view_id, + .size = {extent.width >> samples_x, extent.height >> samples_y}, + }); + return {framebuffer_id, view_id}; +} + +template +bool TextureCache

::IsFullClear(ImageViewId id) { + if (!id) { + return true; + } + const ImageViewBase& image_view = slot_image_views[id]; + const ImageBase& image = slot_images[image_view.image_id]; + const Extent3D size = image_view.size; + const auto& regs = maxwell3d.regs; + const auto& scissor = regs.scissor_test[0]; + if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { + // Images with multiple resources can't be cleared in a single call + return false; + } + if (regs.clear_flags.scissor == 0) { + // If scissor testing is disabled, the clear is always full + return true; + } + // Make sure the clear covers all texels in the subresource + return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && + scissor.max_y >= size.height; +} + } // namespace VideoCommon diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h new file mode 100644 index 000000000..2ad2d72a6 --- /dev/null +++ b/src/video_core/texture_cache/types.h @@ -0,0 +1,140 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "video_core/texture_cache/slot_vector.h" + +namespace VideoCommon { + +constexpr size_t NUM_RT = 8; +constexpr size_t MAX_MIP_LEVELS = 14; + +constexpr SlotId CORRUPT_ID{0xfffffffe}; + +using ImageId = SlotId; +using ImageViewId = SlotId; +using ImageAllocId = SlotId; +using SamplerId = SlotId; +using FramebufferId = SlotId; + +enum class ImageType : u32 { + e1D, + e2D, + e3D, + Linear, + Buffer, +}; + +enum class ImageViewType : u32 { + e1D, + e2D, + Cube, + e3D, + e1DArray, + e2DArray, + CubeArray, + Rect, + Buffer, +}; +constexpr size_t NUM_IMAGE_VIEW_TYPES = 9; + +enum class RelaxedOptions : u32 { + Size = 1 << 0, + Format = 1 << 1, + Samples = 1 << 2, +}; +DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) + +struct Offset2D { + constexpr auto operator<=>(const Offset2D&) const noexcept = default; + + s32 x; + s32 y; +}; + +struct Offset3D { + constexpr auto operator<=>(const Offset3D&) const noexcept = default; + + s32 x; + s32 y; + s32 z; +}; + +struct Extent2D { + constexpr auto operator<=>(const Extent2D&) const noexcept = default; + + u32 width; + u32 height; +}; + +struct Extent3D { + constexpr auto operator<=>(const Extent3D&) const noexcept = default; + + u32 width; + u32 height; + u32 depth; +}; + +struct SubresourceLayers { + s32 base_level = 0; + s32 base_layer = 0; + s32 num_layers = 1; +}; + +struct SubresourceBase { + constexpr auto operator<=>(const SubresourceBase&) const noexcept = default; + + s32 level = 0; + s32 layer = 0; +}; + +struct SubresourceExtent { + constexpr auto operator<=>(const SubresourceExtent&) const noexcept = default; + + s32 levels = 1; + s32 layers = 1; +}; + +struct SubresourceRange { + constexpr auto operator<=>(const SubresourceRange&) const noexcept = default; + + SubresourceBase base; + SubresourceExtent extent; +}; + +struct ImageCopy { + SubresourceLayers src_subresource; + SubresourceLayers dst_subresource; + Offset3D src_offset; + Offset3D dst_offset; + Extent3D extent; +}; + +struct BufferImageCopy { + size_t buffer_offset; + size_t buffer_size; + u32 buffer_row_length; + u32 buffer_image_height; + SubresourceLayers image_subresource; + Offset3D image_offset; + Extent3D image_extent; +}; + +struct BufferCopy { + size_t src_offset; + size_t dst_offset; + size_t size; +}; + +struct SwizzleParameters { + Extent3D num_tiles; + Extent3D block; + size_t buffer_offset; + s32 level; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp new file mode 100644 index 000000000..9ed1fc007 --- /dev/null +++ b/src/video_core/texture_cache/util.cpp @@ -0,0 +1,1232 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This files contains code from Ryujinx +// A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx +// The sections using code from Ryujinx are marked with a link to the original version + +// MIT License +// +// Copyright (c) Ryujinx Team and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +// associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// + +#include +#include +#include +#include +#include +#include + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "video_core/compatible_formats.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/decode_bc4.h" +#include "video_core/texture_cache/format_lookup_table.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/samples_helper.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/astc.h" +#include "video_core/textures/decoders.h" + +namespace VideoCommon { + +namespace { + +using Tegra::Texture::GOB_SIZE; +using Tegra::Texture::GOB_SIZE_SHIFT; +using Tegra::Texture::GOB_SIZE_X; +using Tegra::Texture::GOB_SIZE_X_SHIFT; +using Tegra::Texture::GOB_SIZE_Y; +using Tegra::Texture::GOB_SIZE_Y_SHIFT; +using Tegra::Texture::GOB_SIZE_Z; +using Tegra::Texture::GOB_SIZE_Z_SHIFT; +using Tegra::Texture::MsaaMode; +using Tegra::Texture::SwizzleTexture; +using Tegra::Texture::TextureFormat; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::UnswizzleTexture; +using VideoCore::Surface::BytesPerBlock; +using VideoCore::Surface::DefaultBlockHeight; +using VideoCore::Surface::DefaultBlockWidth; +using VideoCore::Surface::IsCopyCompatible; +using VideoCore::Surface::IsPixelFormatASTC; +using VideoCore::Surface::IsViewCompatible; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::SurfaceType; + +constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); + +struct LevelInfo { + Extent3D size; + Extent3D block; + Extent2D tile_size; + u32 bpp_log2; + u32 tile_width_spacing; +}; + +[[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) { + if (shift == 0) { + return 0; + } + u32 x = unit_factor << (shift - 1); + if (x >= dimension) { + while (--shift) { + x >>= 1; + if (x < dimension) { + break; + } + } + } + return shift; +} + +[[nodiscard]] constexpr u32 AdjustMipSize(u32 size, u32 level) { + return std::max(size >> level, 1); +} + +[[nodiscard]] constexpr Extent3D AdjustMipSize(Extent3D size, s32 level) { + return Extent3D{ + .width = AdjustMipSize(size.width, level), + .height = AdjustMipSize(size.height, level), + .depth = AdjustMipSize(size.depth, level), + }; +} + +[[nodiscard]] Extent3D AdjustSamplesSize(Extent3D size, s32 num_samples) { + const auto [samples_x, samples_y] = SamplesLog2(num_samples); + return Extent3D{ + .width = size.width >> samples_x, + .height = size.height >> samples_y, + .depth = size.depth, + }; +} + +template +[[nodiscard]] constexpr u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level) { + do { + while (block_size > 0 && num_tiles <= (1U << (block_size - 1)) * GOB_EXTENT) { + --block_size; + } + } while (level--); + return block_size; +} + +[[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size, + u32 level) { + return { + .width = AdjustMipBlockSize(num_tiles.width, block_size.width, level), + .height = AdjustMipBlockSize(num_tiles.height, block_size.height, level), + .depth = AdjustMipBlockSize(num_tiles.depth, block_size.depth, level), + }; +} + +[[nodiscard]] constexpr Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size) { + return { + .width = Common::DivCeil(size.width, tile_size.width), + .height = Common::DivCeil(size.height, tile_size.height), + .depth = size.depth, + }; +} + +[[nodiscard]] constexpr u32 BytesPerBlockLog2(u32 bytes_per_block) { + return std::countl_zero(bytes_per_block) ^ 0x1F; +} + +[[nodiscard]] constexpr u32 BytesPerBlockLog2(PixelFormat format) { + return BytesPerBlockLog2(BytesPerBlock(format)); +} + +[[nodiscard]] constexpr u32 NumBlocks(Extent3D size, Extent2D tile_size) { + const Extent3D num_blocks = AdjustTileSize(size, tile_size); + return num_blocks.width * num_blocks.height * num_blocks.depth; +} + +[[nodiscard]] constexpr u32 AdjustSize(u32 size, u32 level, u32 block_size) { + return Common::DivCeil(AdjustMipSize(size, level), block_size); +} + +[[nodiscard]] constexpr u32 LayerSize(const TICEntry& config, PixelFormat format) { + return config.Width() * config.Height() * BytesPerBlock(format); +} + +[[nodiscard]] constexpr bool HasTwoDimsPerLayer(TextureType type) { + switch (type) { + case TextureType::Texture2D: + case TextureType::Texture2DArray: + case TextureType::Texture2DNoMipmap: + case TextureType::Texture3D: + case TextureType::TextureCubeArray: + case TextureType::TextureCubemap: + return true; + case TextureType::Texture1D: + case TextureType::Texture1DArray: + case TextureType::Texture1DBuffer: + return false; + } + return false; +} + +[[nodiscard]] constexpr bool HasTwoDimsPerLayer(ImageType type) { + switch (type) { + case ImageType::e2D: + case ImageType::e3D: + case ImageType::Linear: + return true; + case ImageType::e1D: + case ImageType::Buffer: + return false; + } + UNREACHABLE_MSG("Invalid image type={}", static_cast(type)); +} + +[[nodiscard]] constexpr std::pair Samples(int num_samples) { + switch (num_samples) { + case 1: + return {1, 1}; + case 2: + return {2, 1}; + case 4: + return {2, 2}; + case 8: + return {4, 2}; + case 16: + return {4, 4}; + } + UNREACHABLE_MSG("Invalid number of samples={}", num_samples); + return {1, 1}; +} + +[[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) { + return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; +} + +[[nodiscard]] constexpr Extent3D NumLevelBlocks(const LevelInfo& info, u32 level) { + return Extent3D{ + .width = AdjustSize(info.size.width, level, info.tile_size.width) << info.bpp_log2, + .height = AdjustSize(info.size.height, level, info.tile_size.height), + .depth = AdjustMipSize(info.size.depth, level), + }; +} + +[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) { + const Extent3D blocks = NumLevelBlocks(info, level); + return Extent3D{ + .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width), + .height = AdjustTileSize(info.block.height, GOB_SIZE_Y, blocks.height), + .depth = AdjustTileSize(info.block.depth, GOB_SIZE_Z, blocks.depth), + }; +} + +[[nodiscard]] constexpr Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing) { + return Extent2D{ + .width = GOB_SIZE_X_SHIFT - bpp_log2 + tile_width_spacing, + .height = GOB_SIZE_Y_SHIFT + block_height, + }; +} + +[[nodiscard]] constexpr bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob, + u32 block_depth) { + return num_tiles.width <= (1U << gob.width) || num_tiles.height <= (1U << gob.height) || + num_tiles.depth < (1U << block_depth); +} + +[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob, + u32 bpp_log2) { + if (IsSmallerThanGobSize(num_tiles, gob, block.depth)) { + return GOB_SIZE_X_SHIFT - bpp_log2; + } else { + return gob.width; + } +} + +[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2, + u32 tile_width_spacing) { + const Extent2D gob = GobSize(bpp_log2, block.height, tile_width_spacing); + return StrideAlignment(num_tiles, block, gob, bpp_log2); +} + +[[nodiscard]] constexpr Extent2D NumGobs(const LevelInfo& info, u32 level) { + const Extent3D blocks = NumLevelBlocks(info, level); + const Extent2D gobs{ + .width = Common::DivCeilLog2(blocks.width, GOB_SIZE_X_SHIFT), + .height = Common::DivCeilLog2(blocks.height, GOB_SIZE_Y_SHIFT), + }; + const Extent2D gob = GobSize(info.bpp_log2, info.block.height, info.tile_width_spacing); + const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth); + const u32 alignment = is_small ? 0 : info.tile_width_spacing; + return Extent2D{ + .width = Common::AlignBits(gobs.width, alignment), + .height = gobs.height, + }; +} + +[[nodiscard]] constexpr Extent3D LevelTiles(const LevelInfo& info, u32 level) { + const Extent3D blocks = NumLevelBlocks(info, level); + const Extent3D tile_shift = TileShift(info, level); + const Extent2D gobs = NumGobs(info, level); + return Extent3D{ + .width = Common::DivCeilLog2(gobs.width, tile_shift.width), + .height = Common::DivCeilLog2(gobs.height, tile_shift.height), + .depth = Common::DivCeilLog2(blocks.depth, tile_shift.depth), + }; +} + +[[nodiscard]] constexpr u32 CalculateLevelSize(const LevelInfo& info, u32 level) { + const Extent3D tile_shift = TileShift(info, level); + const Extent3D tiles = LevelTiles(info, level); + const u32 num_tiles = tiles.width * tiles.height * tiles.depth; + const u32 shift = GOB_SIZE_SHIFT + tile_shift.width + tile_shift.height + tile_shift.depth; + return num_tiles << shift; +} + +[[nodiscard]] constexpr std::array CalculateLevelSizes(const LevelInfo& info, + u32 num_levels) { + ASSERT(num_levels <= MAX_MIP_LEVELS); + std::array sizes{}; + for (u32 level = 0; level < num_levels; ++level) { + sizes[level] = CalculateLevelSize(info, level); + } + return sizes; +} + +[[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block, + u32 num_samples, u32 tile_width_spacing) { + const auto [samples_x, samples_y] = Samples(num_samples); + const u32 bytes_per_block = BytesPerBlock(format); + return { + .size = + { + .width = size.width * samples_x, + .height = size.height * samples_y, + .depth = size.depth, + }, + .block = block, + .tile_size = DefaultBlockSize(format), + .bpp_log2 = BytesPerBlockLog2(bytes_per_block), + .tile_width_spacing = tile_width_spacing, + }; +} + +[[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) { + return MakeLevelInfo(info.format, info.size, info.block, info.num_samples, + info.tile_width_spacing); +} + +[[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block, + u32 num_samples, u32 tile_width_spacing, + u32 level) { + const LevelInfo info = MakeLevelInfo(format, size, block, num_samples, tile_width_spacing); + u32 offset = 0; + for (u32 current_level = 0; current_level < level; ++current_level) { + offset += CalculateLevelSize(info, current_level); + } + return offset; +} + +[[nodiscard]] constexpr u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block, + u32 tile_size_y, u32 tile_width_spacing) { + // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134 + if (tile_width_spacing > 0) { + const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth; + return Common::AlignBits(size_bytes, alignment_log2); + } + const u32 aligned_height = Common::AlignUp(size.height, tile_size_y); + while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) { + --block.height; + } + while (block.depth != 0 && size.depth <= (1U << (block.depth - 1))) { + --block.depth; + } + const u32 block_shift = GOB_SIZE_SHIFT + block.height + block.depth; + const u32 num_blocks = size_bytes >> block_shift; + if (size_bytes != num_blocks << block_shift) { + return (num_blocks + 1) << block_shift; + } + return size_bytes; +} + +[[nodiscard]] std::optional ResolveOverlapEqualAddress(const ImageInfo& new_info, + const ImageBase& overlap, + bool strict_size) { + const ImageInfo& info = overlap.info; + if (!IsBlockLinearSizeCompatible(new_info, info, 0, 0, strict_size)) { + return std::nullopt; + } + if (new_info.block != info.block) { + return std::nullopt; + } + const SubresourceExtent resources = new_info.resources; + return SubresourceExtent{ + .levels = std::max(resources.levels, info.resources.levels), + .layers = std::max(resources.layers, info.resources.layers), + }; +} + +[[nodiscard]] std::optional ResolveOverlapRightAddress3D( + const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { + const std::vector slice_offsets = CalculateSliceOffsets(new_info); + const u32 diff = static_cast(overlap.gpu_addr - gpu_addr); + const auto it = std::ranges::find(slice_offsets, diff); + if (it == slice_offsets.end()) { + return std::nullopt; + } + const std::vector subresources = CalculateSliceSubresources(new_info); + const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; + const ImageInfo& info = overlap.info; + if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { + return std::nullopt; + } + const u32 mip_depth = std::max(1U, new_info.size.depth << base.level); + if (mip_depth < info.size.depth + base.layer) { + return std::nullopt; + } + if (MipBlockSize(new_info, base.level) != info.block) { + return std::nullopt; + } + return SubresourceExtent{ + .levels = std::max(new_info.resources.levels, info.resources.levels + base.level), + .layers = 1, + }; +} + +[[nodiscard]] std::optional ResolveOverlapRightAddress2D( + const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { + const u32 layer_stride = new_info.layer_stride; + const s32 new_size = layer_stride * new_info.resources.layers; + const s32 diff = static_cast(overlap.gpu_addr - gpu_addr); + if (diff > new_size) { + return std::nullopt; + } + const s32 base_layer = diff / layer_stride; + const s32 mip_offset = diff % layer_stride; + const std::array offsets = CalculateMipLevelOffsets(new_info); + const auto end = offsets.begin() + new_info.resources.levels; + const auto it = std::find(offsets.begin(), end, mip_offset); + if (it == end) { + // Mipmap is not aligned to any valid size + return std::nullopt; + } + const SubresourceBase base{ + .level = static_cast(std::distance(offsets.begin(), it)), + .layer = base_layer, + }; + const ImageInfo& info = overlap.info; + if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { + return std::nullopt; + } + if (MipBlockSize(new_info, base.level) != info.block) { + return std::nullopt; + } + return SubresourceExtent{ + .levels = std::max(new_info.resources.levels, info.resources.levels + base.level), + .layers = std::max(new_info.resources.layers, info.resources.layers + base.layer), + }; +} + +[[nodiscard]] std::optional ResolveOverlapRightAddress(const ImageInfo& new_info, + GPUVAddr gpu_addr, + VAddr cpu_addr, + const ImageBase& overlap, + bool strict_size) { + std::optional resources; + if (new_info.type != ImageType::e3D) { + resources = ResolveOverlapRightAddress2D(new_info, gpu_addr, overlap, strict_size); + } else { + resources = ResolveOverlapRightAddress3D(new_info, gpu_addr, overlap, strict_size); + } + if (!resources) { + return std::nullopt; + } + return OverlapResult{ + .gpu_addr = gpu_addr, + .cpu_addr = cpu_addr, + .resources = *resources, + }; +} + +[[nodiscard]] std::optional ResolveOverlapLeftAddress(const ImageInfo& new_info, + GPUVAddr gpu_addr, + VAddr cpu_addr, + const ImageBase& overlap, + bool strict_size) { + const std::optional base = overlap.TryFindBase(gpu_addr); + if (!base) { + return std::nullopt; + } + const ImageInfo& info = overlap.info; + if (!IsBlockLinearSizeCompatible(new_info, info, base->level, 0, strict_size)) { + return std::nullopt; + } + if (new_info.block != MipBlockSize(info, base->level)) { + return std::nullopt; + } + const SubresourceExtent resources = new_info.resources; + s32 layers = 1; + if (info.type != ImageType::e3D) { + layers = std::max(resources.layers, info.resources.layers + base->layer); + } + return OverlapResult{ + .gpu_addr = overlap.gpu_addr, + .cpu_addr = overlap.cpu_addr, + .resources = + { + .levels = std::max(resources.levels + base->level, info.resources.levels), + .layers = layers, + }, + }; +} + +[[nodiscard]] Extent2D PitchLinearAlignedSize(const ImageInfo& info) { + // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L212 + static constexpr u32 STRIDE_ALIGNMENT = 32; + ASSERT(info.type == ImageType::Linear); + const Extent2D num_tiles{ + .width = Common::DivCeil(info.size.width, DefaultBlockWidth(info.format)), + .height = Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)), + }; + const u32 width_alignment = STRIDE_ALIGNMENT / BytesPerBlock(info.format); + return Extent2D{ + .width = Common::AlignUp(num_tiles.width, width_alignment), + .height = num_tiles.height, + }; +} + +[[nodiscard]] Extent3D BlockLinearAlignedSize(const ImageInfo& info, u32 level) { + // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L176 + ASSERT(info.type != ImageType::Linear); + const Extent3D size = AdjustMipSize(info.size, level); + const Extent3D num_tiles{ + .width = Common::DivCeil(size.width, DefaultBlockWidth(info.format)), + .height = Common::DivCeil(size.height, DefaultBlockHeight(info.format)), + .depth = size.depth, + }; + const u32 bpp_log2 = BytesPerBlockLog2(info.format); + const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing); + const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0); + return Extent3D{ + .width = Common::AlignBits(num_tiles.width, alignment), + .height = Common::AlignBits(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height), + .depth = Common::AlignBits(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth), + }; +} + +[[nodiscard]] constexpr u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept { + u32 num_blocks = 0; + for (s32 level = 0; level < info.resources.levels; ++level) { + const Extent3D mip_size = AdjustMipSize(info.size, level); + num_blocks += NumBlocks(mip_size, tile_size); + } + return num_blocks; +} + +[[nodiscard]] u32 NumSlices(const ImageInfo& info) noexcept { + ASSERT(info.type == ImageType::e3D); + u32 num_slices = 0; + for (s32 level = 0; level < info.resources.levels; ++level) { + num_slices += AdjustMipSize(info.size.depth, level); + } + return num_slices; +} + +void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageInfo& info, const BufferImageCopy& copy, + std::span memory) { + ASSERT(copy.image_offset.z == 0); + ASSERT(copy.image_extent.depth == 1); + ASSERT(copy.image_subresource.base_level == 0); + ASSERT(copy.image_subresource.base_layer == 0); + ASSERT(copy.image_subresource.num_layers == 1); + + const u32 bytes_per_block = BytesPerBlock(info.format); + const u32 row_length = copy.image_extent.width * bytes_per_block; + const u32 guest_offset_x = copy.image_offset.x * bytes_per_block; + + for (u32 line = 0; line < copy.image_extent.height; ++line) { + const u32 host_offset_y = line * info.pitch; + const u32 guest_offset_y = (copy.image_offset.y + line) * info.pitch; + const u32 guest_offset = guest_offset_x + guest_offset_y; + gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, memory.data() + host_offset_y, + row_length); + } +} + +void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageInfo& info, const BufferImageCopy& copy, + std::span input) { + const Extent3D size = info.size; + const LevelInfo level_info = MakeLevelInfo(info); + const Extent2D tile_size = DefaultBlockSize(info.format); + const u32 bytes_per_block = BytesPerBlock(info.format); + + const s32 level = copy.image_subresource.base_level; + const Extent3D level_size = AdjustMipSize(size, level); + const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); + const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block; + + UNIMPLEMENTED_IF(info.tile_width_spacing > 0); + + UNIMPLEMENTED_IF(copy.image_offset.x != 0); + UNIMPLEMENTED_IF(copy.image_offset.y != 0); + UNIMPLEMENTED_IF(copy.image_offset.z != 0); + UNIMPLEMENTED_IF(copy.image_extent != level_size); + + const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); + const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); + + size_t host_offset = copy.buffer_offset; + + const u32 num_levels = info.resources.levels; + const std::array sizes = CalculateLevelSizes(level_info, num_levels); + size_t guest_offset = std::reduce(sizes.begin(), sizes.begin() + level, 0); + const size_t layer_stride = + AlignLayerSize(std::reduce(sizes.begin(), sizes.begin() + num_levels, 0), size, + level_info.block, tile_size.height, info.tile_width_spacing); + const size_t subresource_size = sizes[level]; + + const auto dst_data = std::make_unique(subresource_size); + const std::span dst(dst_data.get(), subresource_size); + + for (s32 layer = 0; layer < info.resources.layers; ++layer) { + const std::span src = input.subspan(host_offset); + SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, + num_tiles.depth, block.height, block.depth); + + gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); + + host_offset += host_bytes_per_layer; + guest_offset += layer_stride; + } + ASSERT(host_offset - copy.buffer_offset == copy.buffer_size); +} + +} // Anonymous namespace + +u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept { + if (info.type == ImageType::Buffer) { + return info.size.width * BytesPerBlock(info.format); + } + if (info.type == ImageType::Linear) { + return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); + } + if (info.resources.layers > 1) { + ASSERT(info.layer_stride != 0); + return info.layer_stride * info.resources.layers; + } else { + return CalculateLayerSize(info); + } +} + +u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept { + if (info.type == ImageType::Buffer) { + return info.size.width * BytesPerBlock(info.format); + } + if (info.num_samples > 1) { + // Multisample images can't be uploaded or downloaded to the host + return 0; + } + if (info.type == ImageType::Linear) { + return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); + } + const Extent2D tile_size = DefaultBlockSize(info.format); + return NumBlocksPerLayer(info, tile_size) * info.resources.layers * BytesPerBlock(info.format); +} + +u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept { + if (info.type == ImageType::Buffer) { + return info.size.width * BytesPerBlock(info.format); + } + static constexpr Extent2D TILE_SIZE{1, 1}; + return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK; +} + +u32 CalculateLayerStride(const ImageInfo& info) noexcept { + ASSERT(info.type != ImageType::Linear); + const u32 layer_size = CalculateLayerSize(info); + const Extent3D size = info.size; + const Extent3D block = info.block; + const u32 tile_size_y = DefaultBlockHeight(info.format); + return AlignLayerSize(layer_size, size, block, tile_size_y, info.tile_width_spacing); +} + +u32 CalculateLayerSize(const ImageInfo& info) noexcept { + ASSERT(info.type != ImageType::Linear); + return CalculateLevelOffset(info.format, info.size, info.block, info.num_samples, + info.tile_width_spacing, info.resources.levels); +} + +std::array CalculateMipLevelOffsets(const ImageInfo& info) noexcept { + ASSERT(info.resources.levels <= MAX_MIP_LEVELS); + const LevelInfo level_info = MakeLevelInfo(info); + std::array offsets{}; + u32 offset = 0; + for (s32 level = 0; level < info.resources.levels; ++level) { + offsets[level] = offset; + offset += CalculateLevelSize(level_info, level); + } + return offsets; +} + +std::vector CalculateSliceOffsets(const ImageInfo& info) { + ASSERT(info.type == ImageType::e3D); + std::vector offsets; + offsets.reserve(NumSlices(info)); + + const LevelInfo level_info = MakeLevelInfo(info); + u32 mip_offset = 0; + for (s32 level = 0; level < info.resources.levels; ++level) { + const Extent3D tile_shift = TileShift(level_info, level); + const Extent3D tiles = LevelTiles(level_info, level); + const u32 gob_size_shift = tile_shift.height + GOB_SIZE_SHIFT; + const u32 slice_size = (tiles.width * tiles.height) << gob_size_shift; + const u32 z_mask = (1U << tile_shift.depth) - 1; + const u32 depth = AdjustMipSize(info.size.depth, level); + for (u32 slice = 0; slice < depth; ++slice) { + const u32 z_low = slice & z_mask; + const u32 z_high = slice & ~z_mask; + offsets.push_back(mip_offset + (z_low << gob_size_shift) + (z_high * slice_size)); + } + mip_offset += CalculateLevelSize(level_info, level); + } + return offsets; +} + +std::vector CalculateSliceSubresources(const ImageInfo& info) { + ASSERT(info.type == ImageType::e3D); + std::vector subresources; + subresources.reserve(NumSlices(info)); + for (s32 level = 0; level < info.resources.levels; ++level) { + const s32 depth = AdjustMipSize(info.size.depth, level); + for (s32 slice = 0; slice < depth; ++slice) { + subresources.emplace_back(SubresourceBase{ + .level = level, + .layer = slice, + }); + } + } + return subresources; +} + +u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) { + const Extent2D tile_size = DefaultBlockSize(info.format); + const Extent3D level_size = AdjustMipSize(info.size, level); + const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); + const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level); + const u32 bpp_log2 = BytesPerBlockLog2(info.format); + return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing); +} + +PixelFormat PixelFormatFromTIC(const TICEntry& config) noexcept { + return PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, + config.a_type, config.srgb_conversion); +} + +ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { + switch (info.type) { + case ImageType::e2D: + return info.resources.layers > 1 ? ImageViewType::e2DArray : ImageViewType::e2D; + case ImageType::e3D: + return ImageViewType::e2DArray; + case ImageType::Linear: + return ImageViewType::e2D; + default: + UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast(info.type)); + return ImageViewType{}; + } +} + +std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, + SubresourceBase base) { + ASSERT(dst.resources.levels >= src.resources.levels); + ASSERT(dst.num_samples == src.num_samples); + + const bool is_dst_3d = dst.type == ImageType::e3D; + if (is_dst_3d) { + ASSERT(src.type == ImageType::e3D); + ASSERT(src.resources.levels == 1); + } + + std::vector copies; + copies.reserve(src.resources.levels); + for (s32 level = 0; level < src.resources.levels; ++level) { + ImageCopy& copy = copies.emplace_back(); + copy.src_subresource = SubresourceLayers{ + .base_level = level, + .base_layer = 0, + .num_layers = src.resources.layers, + }; + copy.dst_subresource = SubresourceLayers{ + .base_level = base.level + level, + .base_layer = is_dst_3d ? 0 : base.layer, + .num_layers = is_dst_3d ? 1 : src.resources.layers, + }; + copy.src_offset = Offset3D{ + .x = 0, + .y = 0, + .z = 0, + }; + copy.dst_offset = Offset3D{ + .x = 0, + .y = 0, + .z = is_dst_3d ? base.layer : 0, + }; + const Extent3D mip_size = AdjustMipSize(dst.size, base.level + level); + copy.extent = AdjustSamplesSize(mip_size, dst.num_samples); + if (is_dst_3d) { + copy.extent.depth = src.size.depth; + } + } + return copies; +} + +bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { + if (config.Address() == 0) { + return false; + } + if (config.Address() > (u64(1) << 48)) { + return false; + } + return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); +} + +std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageInfo& info, std::span output) { + const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); + const u32 bpp_log2 = BytesPerBlockLog2(info.format); + const Extent3D size = info.size; + + if (info.type == ImageType::Linear) { + gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); + + ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); + return {{ + .buffer_offset = 0, + .buffer_size = guest_size_bytes, + .buffer_row_length = info.pitch >> bpp_log2, + .buffer_image_height = size.height, + .image_subresource = + { + .base_level = 0, + .base_layer = 0, + .num_layers = 1, + }, + .image_offset = {0, 0, 0}, + .image_extent = size, + }}; + } + const auto input_data = std::make_unique(guest_size_bytes); + gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes); + const std::span input(input_data.get(), guest_size_bytes); + + const LevelInfo level_info = MakeLevelInfo(info); + const s32 num_layers = info.resources.layers; + const s32 num_levels = info.resources.levels; + const Extent2D tile_size = DefaultBlockSize(info.format); + const std::array level_sizes = CalculateLevelSizes(level_info, num_levels); + const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing); + const u32 layer_size = std::reduce(level_sizes.begin(), level_sizes.begin() + num_levels, 0); + const u32 layer_stride = AlignLayerSize(layer_size, size, level_info.block, tile_size.height, + info.tile_width_spacing); + size_t guest_offset = 0; + u32 host_offset = 0; + std::vector copies(num_levels); + + for (s32 level = 0; level < num_levels; ++level) { + const Extent3D level_size = AdjustMipSize(size, level); + const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); + const u32 host_bytes_per_layer = num_blocks_per_layer << bpp_log2; + copies[level] = BufferImageCopy{ + .buffer_offset = host_offset, + .buffer_size = static_cast(host_bytes_per_layer) * num_layers, + .buffer_row_length = Common::AlignUp(level_size.width, tile_size.width), + .buffer_image_height = Common::AlignUp(level_size.height, tile_size.height), + .image_subresource = + { + .base_level = level, + .base_layer = 0, + .num_layers = info.resources.layers, + }, + .image_offset = {0, 0, 0}, + .image_extent = level_size, + }; + const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); + const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); + const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2); + size_t guest_layer_offset = 0; + + for (s32 layer = 0; layer < info.resources.layers; ++layer) { + const std::span dst = output.subspan(host_offset); + const std::span src = input.subspan(guest_offset + guest_layer_offset); + UnswizzleTexture(dst, src, 1U << bpp_log2, num_tiles.width, num_tiles.height, + num_tiles.depth, block.height, block.depth, stride_alignment); + guest_layer_offset += layer_stride; + host_offset += host_bytes_per_layer; + } + guest_offset += level_sizes[level]; + } + return copies; +} + +BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageBase& image, std::span output) { + gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes); + return BufferCopy{ + .src_offset = 0, + .dst_offset = 0, + .size = image.guest_size_bytes, + }; +} + +void ConvertImage(std::span input, const ImageInfo& info, std::span output, + std::span copies) { + u32 output_offset = 0; + + const Extent2D tile_size = DefaultBlockSize(info.format); + for (BufferImageCopy& copy : copies) { + const u32 level = copy.image_subresource.base_level; + const Extent3D mip_size = AdjustMipSize(info.size, level); + ASSERT(copy.image_offset == Offset3D{}); + ASSERT(copy.image_subresource.base_layer == 0); + ASSERT(copy.image_extent == mip_size); + ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); + ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); + + if (IsPixelFormatASTC(info.format)) { + ASSERT(copy.image_extent.depth == 1); + Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset), + copy.image_extent.width, copy.image_extent.height, + copy.image_subresource.num_layers, tile_size.width, + tile_size.height, output.subspan(output_offset)); + } else { + DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, + output.subspan(output_offset)); + } + copy.buffer_offset = output_offset; + copy.buffer_row_length = mip_size.width; + copy.buffer_image_height = mip_size.height; + + output_offset += copy.image_extent.width * copy.image_extent.height * + copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; + } +} + +std::vector FullDownloadCopies(const ImageInfo& info) { + const Extent3D size = info.size; + const u32 bytes_per_block = BytesPerBlock(info.format); + if (info.type == ImageType::Linear) { + ASSERT(info.pitch % bytes_per_block == 0); + return {{ + .buffer_offset = 0, + .buffer_size = static_cast(info.pitch) * size.height, + .buffer_row_length = info.pitch / bytes_per_block, + .buffer_image_height = size.height, + .image_subresource = + { + .base_level = 0, + .base_layer = 0, + .num_layers = 1, + }, + .image_offset = {0, 0, 0}, + .image_extent = size, + }}; + } + UNIMPLEMENTED_IF(info.tile_width_spacing > 0); + + const s32 num_layers = info.resources.layers; + const s32 num_levels = info.resources.levels; + const Extent2D tile_size = DefaultBlockSize(info.format); + + u32 host_offset = 0; + + std::vector copies(num_levels); + for (s32 level = 0; level < num_levels; ++level) { + const Extent3D level_size = AdjustMipSize(size, level); + const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); + const u32 host_bytes_per_level = num_blocks_per_layer * bytes_per_block * num_layers; + copies[level] = BufferImageCopy{ + .buffer_offset = host_offset, + .buffer_size = host_bytes_per_level, + .buffer_row_length = level_size.width, + .buffer_image_height = level_size.height, + .image_subresource = + { + .base_level = level, + .base_layer = 0, + .num_layers = info.resources.layers, + }, + .image_offset = {0, 0, 0}, + .image_extent = level_size, + }; + host_offset += host_bytes_per_level; + } + return copies; +} + +Extent3D MipSize(Extent3D size, u32 level) { + return AdjustMipSize(size, level); +} + +Extent3D MipBlockSize(const ImageInfo& info, u32 level) { + const LevelInfo level_info = MakeLevelInfo(info); + const Extent2D tile_size = DefaultBlockSize(info.format); + const Extent3D level_size = AdjustMipSize(info.size, level); + const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); + return AdjustMipBlockSize(num_tiles, level_info.block, level); +} + +std::vector FullUploadSwizzles(const ImageInfo& info) { + const Extent2D tile_size = DefaultBlockSize(info.format); + if (info.type == ImageType::Linear) { + return std::vector{SwizzleParameters{ + .num_tiles = AdjustTileSize(info.size, tile_size), + .block = {}, + .buffer_offset = 0, + .level = 0, + }}; + } + const LevelInfo level_info = MakeLevelInfo(info); + const Extent3D size = info.size; + const s32 num_levels = info.resources.levels; + + u32 guest_offset = 0; + std::vector params(num_levels); + for (s32 level = 0; level < num_levels; ++level) { + const Extent3D level_size = AdjustMipSize(size, level); + const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); + const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); + params[level] = SwizzleParameters{ + .num_tiles = num_tiles, + .block = block, + .buffer_offset = guest_offset, + .level = level, + }; + guest_offset += CalculateLevelSize(level_info, level); + } + return params; +} + +void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, + std::span copies, std::span memory) { + const bool is_pitch_linear = info.type == ImageType::Linear; + for (const BufferImageCopy& copy : copies) { + if (is_pitch_linear) { + SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory); + } else { + SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory); + } + } +} + +bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32 lhs_level, + u32 rhs_level, bool strict_size) noexcept { + ASSERT(lhs.type != ImageType::Linear); + ASSERT(rhs.type != ImageType::Linear); + if (strict_size) { + const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level); + const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level); + return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height; + } else { + const Extent3D lhs_size = BlockLinearAlignedSize(lhs, lhs_level); + const Extent3D rhs_size = BlockLinearAlignedSize(rhs, rhs_level); + return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height; + } +} + +bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept { + ASSERT(lhs.type == ImageType::Linear); + ASSERT(rhs.type == ImageType::Linear); + if (strict_size) { + return lhs.size.width == rhs.size.width && lhs.size.height == rhs.size.height; + } else { + const Extent2D lhs_size = PitchLinearAlignedSize(lhs); + const Extent2D rhs_size = PitchLinearAlignedSize(rhs); + return lhs_size == rhs_size; + } +} + +std::optional ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, + VAddr cpu_addr, const ImageBase& overlap, + bool strict_size) { + ASSERT(new_info.type != ImageType::Linear); + ASSERT(overlap.info.type != ImageType::Linear); + if (!IsLayerStrideCompatible(new_info, overlap.info)) { + return std::nullopt; + } + if (!IsViewCompatible(overlap.info.format, new_info.format)) { + return std::nullopt; + } + if (gpu_addr == overlap.gpu_addr) { + const std::optional solution = ResolveOverlapEqualAddress(new_info, overlap, strict_size); + if (!solution) { + return std::nullopt; + } + return OverlapResult{ + .gpu_addr = gpu_addr, + .cpu_addr = cpu_addr, + .resources = *solution, + }; + } + if (overlap.gpu_addr > gpu_addr) { + return ResolveOverlapRightAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size); + } + // if overlap.gpu_addr < gpu_addr + return ResolveOverlapLeftAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size); +} + +bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) { + // If either of the layer strides is zero, we can assume they are compatible + // These images generally come from rendertargets + if (lhs.layer_stride == 0) { + return true; + } + if (rhs.layer_stride == 0) { + return true; + } + // It's definitely compatible if the layer stride matches + if (lhs.layer_stride == rhs.layer_stride) { + return true; + } + // Although we also have to compare for cases where it can be unaligned + // This can happen if the image doesn't have layers, so the stride is not aligned + if (lhs.maybe_unaligned_layer_stride == rhs.maybe_unaligned_layer_stride) { + return true; + } + return false; +} + +std::optional FindSubresource(const ImageInfo& candidate, const ImageBase& image, + GPUVAddr candidate_addr, RelaxedOptions options) { + const std::optional base = image.TryFindBase(candidate_addr); + if (!base) { + return std::nullopt; + } + const ImageInfo& existing = image.info; + if (False(options & RelaxedOptions::Format)) { + if (!IsViewCompatible(existing.format, candidate.format)) { + return std::nullopt; + } + } + if (!IsLayerStrideCompatible(existing, candidate)) { + return std::nullopt; + } + if (existing.type != candidate.type) { + return std::nullopt; + } + if (False(options & RelaxedOptions::Samples)) { + if (existing.num_samples != candidate.num_samples) { + return std::nullopt; + } + } + if (existing.resources.levels < candidate.resources.levels + base->level) { + return std::nullopt; + } + if (existing.type == ImageType::e3D) { + const u32 mip_depth = std::max(1U, existing.size.depth << base->level); + if (mip_depth < candidate.size.depth + base->layer) { + return std::nullopt; + } + } else { + if (existing.resources.layers < candidate.resources.layers + base->layer) { + return std::nullopt; + } + } + const bool strict_size = False(options & RelaxedOptions::Size); + if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) { + return std::nullopt; + } + // TODO: compare block sizes + return base; +} + +bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr, + RelaxedOptions options) { + return FindSubresource(candidate, image, candidate_addr, options).has_value(); +} + +void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, + const ImageBase* src) { + if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { + src_info.format = src->info.format; + } + if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { + dst_info.format = dst->info.format; + } + if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { + dst_info.format = src->info.format; + } + if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { + src_info.format = src->info.format; + } +} + +u32 MapSizeBytes(const ImageBase& image) { + if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { + return image.guest_size_bytes; + } else if (True(image.flags & ImageFlagBits::Converted)) { + return image.converted_size_bytes; + } else { + return image.unswizzled_size_bytes; + } +} + +using P = PixelFormat; + +static_assert(CalculateLevelSize(LevelInfo{{1920, 1080}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == 0x7f8000); +static_assert(CalculateLevelSize(LevelInfo{{32, 32}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000); + +static_assert(CalculateLevelOffset(P::R8_SINT, {1920, 1080}, {0, 2}, 1, 0, 7) == 0x2afc00); +static_assert(CalculateLevelOffset(P::ASTC_2D_12X12_UNORM, {8192, 4096}, {0, 2}, 1, 0, 12) == + 0x50d200); + +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 0) == 0); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 1) == 0x400000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 2) == 0x500000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 3) == 0x540000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 4) == 0x550000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 5) == 0x554000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 6) == 0x555000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 7) == 0x555400); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 8) == 0x555600); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 9) == 0x555800); + +constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height, + u32 tile_width_spacing, u32 level) { + const Extent3D size{width, height, 1}; + const Extent3D block{0, block_height, 0}; + const u32 offset = CalculateLevelOffset(format, size, block, 1, tile_width_spacing, level); + return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing); +} + +static_assert(ValidateLayerSize(P::ASTC_2D_12X12_UNORM, 8192, 4096, 2, 0, 12) == 0x50d800); +static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 1024, 1024, 2, 0, 10) == 0x556000); +static_assert(ValidateLayerSize(P::BC3_UNORM, 128, 128, 2, 0, 8) == 0x6000); + +static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 518, 572, 4, 3, 1) == 0x190000, + "Tile width spacing is not working"); +static_assert(ValidateLayerSize(P::BC5_UNORM, 1024, 1024, 3, 4, 11) == 0x160000, + "Compressed tile width spacing is not working"); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h new file mode 100644 index 000000000..dbbbd33cd --- /dev/null +++ b/src/video_core/texture_cache/util.h @@ -0,0 +1,107 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/common_types.h" + +#include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +using Tegra::Texture::TICEntry; + +struct OverlapResult { + GPUVAddr gpu_addr; + VAddr cpu_addr; + SubresourceExtent resources; +}; + +[[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept; + +[[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept; + +[[nodiscard]] u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept; + +[[nodiscard]] u32 CalculateLayerStride(const ImageInfo& info) noexcept; + +[[nodiscard]] u32 CalculateLayerSize(const ImageInfo& info) noexcept; + +[[nodiscard]] std::array CalculateMipLevelOffsets( + const ImageInfo& info) noexcept; + +[[nodiscard]] std::vector CalculateSliceOffsets(const ImageInfo& info); + +[[nodiscard]] std::vector CalculateSliceSubresources(const ImageInfo& info); + +[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); + +[[nodiscard]] VideoCore::Surface::PixelFormat PixelFormatFromTIC( + const Tegra::Texture::TICEntry& config) noexcept; + +[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; + +[[nodiscard]] std::vector MakeShrinkImageCopies(const ImageInfo& dst, + const ImageInfo& src, + SubresourceBase base); + +[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); + +[[nodiscard]] std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, + GPUVAddr gpu_addr, const ImageInfo& info, + std::span output); + +[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageBase& image, std::span output); + +void ConvertImage(std::span input, const ImageInfo& info, std::span output, + std::span copies); + +[[nodiscard]] std::vector FullDownloadCopies(const ImageInfo& info); + +[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); + +[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); + +[[nodiscard]] std::vector FullUploadSwizzles(const ImageInfo& info); + +void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, + std::span copies, std::span memory); + +[[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info, + const ImageInfo& overlap_info, u32 new_level, + u32 overlap_level, bool strict_size) noexcept; + +[[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, + bool strict_size) noexcept; + +[[nodiscard]] std::optional ResolveOverlap(const ImageInfo& new_info, + GPUVAddr gpu_addr, VAddr cpu_addr, + const ImageBase& overlap, + bool strict_size); + +[[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs); + +[[nodiscard]] std::optional FindSubresource(const ImageInfo& candidate, + const ImageBase& image, + GPUVAddr candidate_addr, + RelaxedOptions options); + +[[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, + GPUVAddr candidate_addr, RelaxedOptions options); + +void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, + const ImageBase* src); + +[[nodiscard]] u32 MapSizeBytes(const ImageBase& image); + +} // namespace VideoCommon diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 365bde2f1..acd5bdd78 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -600,7 +601,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { return params; } -static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, +static void FillVoidExtentLDR(InputBitStream& strm, std::span outBuf, u32 blockWidth, u32 blockHeight) { // Don't actually care about the void extent, just read the bits... for (s32 i = 0; i < 4; ++i) { @@ -623,7 +624,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block } } -static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { +static void FillError(std::span outBuf, u32 blockWidth, u32 blockHeight) { for (u32 j = 0; j < blockHeight; j++) { for (u32 i = 0; i < blockWidth; i++) { outBuf[j * blockWidth + i] = 0xFFFF00FF; @@ -1438,9 +1439,9 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues, #undef READ_INT_VALUES } -static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, - u32* outBuf) { - InputBitStream strm(inBuf); +static void DecompressBlock(std::span inBuf, const u32 blockWidth, + const u32 blockHeight, std::span outBuf) { + InputBitStream strm(inBuf.data()); TexelWeightParams weightParams = DecodeBlockInfo(strm); // Was there an error? @@ -1601,8 +1602,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 } // Read the texel weight data.. - u8 texelWeightData[16]; - memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); + std::array texelWeightData; + std::ranges::copy(inBuf, texelWeightData.begin()); // Reverse everything for (u32 i = 0; i < 8; i++) { @@ -1618,14 +1619,15 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 // Make sure that higher non-texel bits are set to zero const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; - texelWeightData[clearByteStart - 1] = - texelWeightData[clearByteStart - 1] & - static_cast((1 << (weightParams.GetPackedBitSize() % 8)) - 1); - memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); + if (clearByteStart > 0) { + texelWeightData[clearByteStart - 1] &= + static_cast((1 << (weightParams.GetPackedBitSize() % 8)) - 1); + } + std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U)); IntegerEncodedVector texelWeightValues; - InputBitStream weightStream(texelWeightData); + InputBitStream weightStream(texelWeightData.data()); DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, weightParams.GetNumWeightValues()); @@ -1672,36 +1674,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 namespace Tegra::Texture::ASTC { -std::vector Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, - u32 block_height) { - u32 blockIdx = 0; +void Decompress(std::span data, uint32_t width, uint32_t height, uint32_t depth, + uint32_t block_width, uint32_t block_height, std::span output) { + u32 block_index = 0; std::size_t depth_offset = 0; - std::vector outData(height * width * depth * 4); - for (u32 k = 0; k < depth; k++) { - for (u32 j = 0; j < height; j += block_height) { - for (u32 i = 0; i < width; i += block_width) { - - const u8* blockPtr = data + blockIdx * 16; + for (u32 z = 0; z < depth; z++) { + for (u32 y = 0; y < height; y += block_height) { + for (u32 x = 0; x < width; x += block_width) { + const std::span blockPtr{data.subspan(block_index * 16, 16)}; // Blocks can be at most 12x12 - u32 uncompData[144]; + std::array uncompData; ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); - u32 decompWidth = std::min(block_width, width - i); - u32 decompHeight = std::min(block_height, height - j); + u32 decompWidth = std::min(block_width, width - x); + u32 decompHeight = std::min(block_height, height - y); - u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; + const std::span outRow = output.subspan(depth_offset + (y * width + x) * 4); for (u32 jj = 0; jj < decompHeight; jj++) { - memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); + std::memcpy(outRow.data() + jj * width * 4, + uncompData.data() + jj * block_width, decompWidth * 4); } - - blockIdx++; + ++block_index; } } depth_offset += height * width * 4; } - - return outData; } } // namespace Tegra::Texture::ASTC diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 991cdba72..9105119bc 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -5,11 +5,10 @@ #pragma once #include -#include namespace Tegra::Texture::ASTC { -std::vector Decompress(const uint8_t* data, uint32_t width, uint32_t height, - uint32_t depth, uint32_t block_width, uint32_t block_height); +void Decompress(std::span data, uint32_t width, uint32_t height, uint32_t depth, + uint32_t block_width, uint32_t block_height, std::span output); } // namespace Tegra::Texture::ASTC diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp deleted file mode 100644 index bd1aebf02..000000000 --- a/src/video_core/textures/convert.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/surface.h" -#include "video_core/textures/astc.h" -#include "video_core/textures/convert.h" - -namespace Tegra::Texture { - -using VideoCore::Surface::PixelFormat; - -template -void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) { - union S8Z24 { - BitField<0, 24, u32> z24; - BitField<24, 8, u32> s8; - }; - static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); - - union Z24S8 { - BitField<0, 8, u32> s8; - BitField<8, 24, u32> z24; - }; - static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); - - S8Z24 s8z24_pixel{}; - Z24S8 z24s8_pixel{}; - constexpr auto bpp{ - VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)}; - for (std::size_t y = 0; y < height; ++y) { - for (std::size_t x = 0; x < width; ++x) { - const std::size_t offset{bpp * (y * width + x)}; - if constexpr (reverse) { - std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); - s8z24_pixel.s8.Assign(z24s8_pixel.s8); - s8z24_pixel.z24.Assign(z24s8_pixel.z24); - std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); - } else { - std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); - z24s8_pixel.s8.Assign(s8z24_pixel.s8); - z24s8_pixel.z24.Assign(s8z24_pixel.z24); - std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); - } - } - } -} - -static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) { - SwapS8Z24ToZ24S8(data, width, height); -} - -static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) { - SwapS8Z24ToZ24S8(data, width, height); -} - -void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width, - u32 height, u32 depth, bool convert_astc, bool convert_s8z24) { - if (convert_astc && IsPixelFormatASTC(pixel_format)) { - // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. - u32 block_width{}; - u32 block_height{}; - std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); - const std::vector rgba8_data = Tegra::Texture::ASTC::Decompress( - in_data, width, height, depth, block_width, block_height); - std::copy(rgba8_data.begin(), rgba8_data.end(), out_data); - - } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { - Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height); - } -} - -void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, - bool convert_astc, bool convert_s8z24) { - if (convert_astc && IsPixelFormatASTC(pixel_format)) { - LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", - pixel_format); - UNREACHABLE(); - - } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { - Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height); - } -} - -} // namespace Tegra::Texture diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h deleted file mode 100644 index d5d6c77bb..000000000 --- a/src/video_core/textures/convert.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace VideoCore::Surface { -enum class PixelFormat; -} - -namespace Tegra::Texture { - -void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format, - u32 width, u32 height, u32 depth, bool convert_astc, - bool convert_s8z24); - -void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, - u32 height, u32 depth, bool convert_astc, bool convert_s8z24); - -} // namespace Tegra::Texture diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 16d46a018..9f5181318 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -2,204 +2,111 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include +#include +#include + #include "common/alignment.h" #include "common/assert.h" #include "common/bit_util.h" +#include "common/div_ceil.h" #include "video_core/gpu.h" #include "video_core/textures/decoders.h" #include "video_core/textures/texture.h" namespace Tegra::Texture { -namespace { +namespace { /** - * This table represents the internal swizzle of a gob, - * in format 16 bytes x 2 sector packing. + * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing. * Calculates the offset of an (x, y) position within a swizzled texture. * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 */ -template -struct alignas(64) SwizzleTable { - static_assert(M * Align == 64, "Swizzle Table does not align to GOB"); - constexpr SwizzleTable() { - for (u32 y = 0; y < N; ++y) { - for (u32 x = 0; x < M; ++x) { - const u32 x2 = x * Align; - values[y][x] = static_cast(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 + - ((x2 % 32) / 16) * 32 + (y % 2) * 16 + (x2 % 16)); - } +constexpr SwizzleTable MakeSwizzleTableConst() { + SwizzleTable table{}; + for (u32 y = 0; y < table.size(); ++y) { + for (u32 x = 0; x < table[0].size(); ++x) { + table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + + (y % 2) * 16 + (x % 16); } } - const std::array& operator[](std::size_t index) const { - return values[index]; - } - std::array, N> values{}; -}; - -constexpr u32 FAST_SWIZZLE_ALIGN = 16; - -constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable(); -constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable(); - -/** - * This function manages ALL the GOBs(Group of Bytes) Inside a single block. - * Instead of going gob by gob, we map the coordinates inside a block and manage from - * those. Block_Width is assumed to be 1. - */ -void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, - const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, - const u32 y_end, const u32 z_end, const u32 tile_offset, - const u32 xy_block_size, const u32 layer_z, const u32 stride_x, - const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { - std::array data_ptrs; - u32 z_address = tile_offset; - - for (u32 z = z_start; z < z_end; z++) { - u32 y_address = z_address; - u32 pixel_base = layer_z * z + y_start * stride_x; - for (u32 y = y_start; y < y_end; y++) { - const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; - for (u32 x = x_start; x < x_end; x++) { - const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]}; - const u32 pixel_index{x * out_bytes_per_pixel + pixel_base}; - data_ptrs[unswizzle] = swizzled_data + swizzle_offset; - data_ptrs[!unswizzle] = unswizzled_data + pixel_index; - std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); - } - pixel_base += stride_x; - if ((y + 1) % GOB_SIZE_Y == 0) - y_address += GOB_SIZE; - } - z_address += xy_block_size; - } + return table; } -/** - * This function manages ALL the GOBs(Group of Bytes) Inside a single block. - * Instead of going gob by gob, we map the coordinates inside a block and manage from - * those. Block_Width is assumed to be 1. - */ -void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, - const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, - const u32 y_end, const u32 z_end, const u32 tile_offset, - const u32 xy_block_size, const u32 layer_z, const u32 stride_x, - const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { - std::array data_ptrs; - u32 z_address = tile_offset; - const u32 x_startb = x_start * bytes_per_pixel; - const u32 x_endb = x_end * bytes_per_pixel; +constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst(); - for (u32 z = z_start; z < z_end; z++) { - u32 y_address = z_address; - u32 pixel_base = layer_z * z + y_start * stride_x; - for (u32 y = y_start; y < y_end; y++) { - const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y]; - for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) { - const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]}; - const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; - const u32 pixel_index{out_x + pixel_base}; - data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset; - data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index; - std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN); - } - pixel_base += stride_x; - if ((y + 1) % GOB_SIZE_Y == 0) - y_address += GOB_SIZE; - } - z_address += xy_block_size; - } -} +template +void Swizzle(std::span output, std::span input, u32 bytes_per_pixel, u32 width, + u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { + // The origin of the transformation can be configured here, leave it as zero as the current API + // doesn't expose it. + static constexpr u32 origin_x = 0; + static constexpr u32 origin_y = 0; + static constexpr u32 origin_z = 0; -/** - * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue. - * The body of this function takes care of splitting the swizzled texture into blocks, - * and managing the extents of it. Once all the parameters of a single block are obtained, - * the function calls 'ProcessBlock' to process that particular Block. - * - * Documentation for the memory layout and decoding can be found at: - * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces - */ -template -void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, - const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel, - const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth, - const u32 width_spacing) { - auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; - const u32 stride_x = width * out_bytes_per_pixel; - const u32 layer_z = height * stride_x; - const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel; - constexpr u32 gob_elements_y = GOB_SIZE_Y; - constexpr u32 gob_elements_z = GOB_SIZE_Z; - const u32 block_x_elements = gob_elements_x; - const u32 block_y_elements = gob_elements_y * block_height; - const u32 block_z_elements = gob_elements_z * block_depth; - const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing); - const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); - const u32 blocks_on_y = div_ceil(height, block_y_elements); - const u32 blocks_on_z = div_ceil(depth, block_z_elements); - const u32 xy_block_size = GOB_SIZE * block_height; - const u32 block_size = xy_block_size * block_depth; - u32 tile_offset = 0; - for (u32 zb = 0; zb < blocks_on_z; zb++) { - const u32 z_start = zb * block_z_elements; - const u32 z_end = std::min(depth, z_start + block_z_elements); - for (u32 yb = 0; yb < blocks_on_y; yb++) { - const u32 y_start = yb * block_y_elements; - const u32 y_end = std::min(height, y_start + block_y_elements); - for (u32 xb = 0; xb < blocks_on_x; xb++) { - const u32 x_start = xb * block_x_elements; - const u32 x_end = std::min(width, x_start + block_x_elements); - if constexpr (fast) { - FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, - z_start, x_end, y_end, z_end, tile_offset, xy_block_size, - layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); - } else { - PreciseProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, - z_start, x_end, y_end, z_end, tile_offset, xy_block_size, - layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); - } - tile_offset += block_size; + // We can configure here a custom pitch + // As it's not exposed 'width * bpp' will be the expected pitch. + const u32 pitch = width * bytes_per_pixel; + const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel; + + const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); + const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); + const u32 slice_size = + Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size; + + const u32 block_height_mask = (1U << block_height) - 1; + const u32 block_depth_mask = (1U << block_depth) - 1; + const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth; + + for (u32 slice = 0; slice < depth; ++slice) { + const u32 z = slice + origin_z; + const u32 offset_z = (z >> block_depth) * slice_size + + ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height)); + for (u32 line = 0; line < height; ++line) { + const u32 y = line + origin_y; + const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y]; + + const u32 block_y = y >> GOB_SIZE_Y_SHIFT; + const u32 offset_y = (block_y >> block_height) * block_size + + ((block_y & block_height_mask) << GOB_SIZE_SHIFT); + + for (u32 column = 0; column < width; ++column) { + const u32 x = (column + origin_x) * bytes_per_pixel; + const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift; + + const u32 base_swizzled_offset = offset_z + offset_y + offset_x; + const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X]; + + const u32 unswizzled_offset = + slice * pitch * height + line * pitch + column * bytes_per_pixel; + + u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset]; + const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset]; + std::memcpy(dst, src, bytes_per_pixel); } } } } - } // Anonymous namespace -void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, - u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, - bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { - const u32 block_height_size{1U << block_height}; - const u32 block_depth_size{1U << block_depth}; - if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) { - SwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, depth, - bytes_per_pixel, out_bytes_per_pixel, block_height_size, - block_depth_size, width_spacing); - } else { - SwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, depth, - bytes_per_pixel, out_bytes_per_pixel, block_height_size, - block_depth_size, width_spacing); - } +SwizzleTable MakeSwizzleTable() { + return SWIZZLE_TABLE; } -void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, - u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, - u32 block_depth, u32 width_spacing) { - CopySwizzledData((width + tile_size_x - 1) / tile_size_x, - (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, - bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth, - width_spacing); +void UnswizzleTexture(std::span output, std::span input, u32 bytes_per_pixel, + u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, + u32 stride_alignment) { + Swizzle(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, + stride_alignment); } -std::vector UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, - u32 width, u32 height, u32 depth, u32 block_height, - u32 block_depth, u32 width_spacing) { - std::vector unswizzled_data(width * height * depth * bytes_per_pixel); - UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, - width, height, depth, block_height, block_depth, width_spacing); - return unswizzled_data; +void SwizzleTexture(std::span output, std::span input, u32 bytes_per_pixel, u32 width, + u32 height, u32 depth, u32 block_height, u32 block_depth, + u32 stride_alignment) { + Swizzle(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, + stride_alignment); } void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, @@ -213,7 +120,7 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 const u32 gob_address_y = (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; - const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; + const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; for (u32 x = 0; x < subrect_width; ++x) { const u32 dst_x = x + offset_x; const u32 gob_address = @@ -235,11 +142,11 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); const u32 block_height_mask = (1U << block_height) - 1; - const u32 x_shift = static_cast(GOB_SIZE_SHIFT) + block_height; + const u32 x_shift = GOB_SIZE_SHIFT + block_height; for (u32 line = 0; line < line_count; ++line) { const u32 src_y = line + origin_y; - const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; + const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; const u32 src_offset_y = (block_y >> block_height) * block_size + @@ -270,7 +177,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt const u32 x_shift = static_cast(GOB_SIZE_SHIFT) + block_height + block_depth; for (u32 line = 0; line < line_count; ++line) { - const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; + const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y]; const u32 block_y = line / GOB_SIZE_Y; const u32 dst_offset_y = (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; @@ -293,7 +200,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 const std::size_t gob_address_y = (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; - const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; + const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y]; for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { const std::size_t gob_address = gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 01e156bc8..d7cdc81e8 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -4,7 +4,8 @@ #pragma once -#include +#include + #include "common/common_types.h" #include "video_core/textures/texture.h" @@ -15,28 +16,25 @@ constexpr u32 GOB_SIZE_Y = 8; constexpr u32 GOB_SIZE_Z = 1; constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; -constexpr std::size_t GOB_SIZE_X_SHIFT = 6; -constexpr std::size_t GOB_SIZE_Y_SHIFT = 3; -constexpr std::size_t GOB_SIZE_Z_SHIFT = 0; -constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; +constexpr u32 GOB_SIZE_X_SHIFT = 6; +constexpr u32 GOB_SIZE_Y_SHIFT = 3; +constexpr u32 GOB_SIZE_Z_SHIFT = 0; +constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; -/// Unswizzles a swizzled texture without changing its format. -void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, - u32 bytes_per_pixel, u32 width, u32 height, u32 depth, - u32 block_height = TICEntry::DefaultBlockHeight, - u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); +using SwizzleTable = std::array, GOB_SIZE_Y>; -/// Unswizzles a swizzled texture without changing its format. -std::vector UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, - u32 width, u32 height, u32 depth, - u32 block_height = TICEntry::DefaultBlockHeight, - u32 block_depth = TICEntry::DefaultBlockHeight, - u32 width_spacing = 0); +/// Returns a z-order swizzle table +SwizzleTable MakeSwizzleTable(); -/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. -void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, - u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, - bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); +/// Unswizzles a block linear texture into linear memory. +void UnswizzleTexture(std::span output, std::span input, u32 bytes_per_pixel, + u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, + u32 stride_alignment = 1); + +/// Swizzles linear memory into a block linear texture. +void SwizzleTexture(std::span output, std::span input, u32 bytes_per_pixel, u32 width, + u32 height, u32 depth, u32 block_height, u32 block_depth, + u32 stride_alignment = 1); /// This function calculates the correct size of a texture depending if it's tiled or not. std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index 4171e3ef2..ae5621a7d 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp @@ -5,9 +5,13 @@ #include #include +#include "common/cityhash.h" #include "core/settings.h" #include "video_core/textures/texture.h" +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; + namespace Tegra::Texture { namespace { @@ -65,7 +69,7 @@ unsigned SettingsMinimumAnisotropy() noexcept { } // Anonymous namespace -std::array TSCEntry::GetBorderColor() const noexcept { +std::array TSCEntry::BorderColor() const noexcept { if (!srgb_conversion) { return border_color; } @@ -73,8 +77,16 @@ std::array TSCEntry::GetBorderColor() const noexcept { SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; } -float TSCEntry::GetMaxAnisotropy() const noexcept { +float TSCEntry::MaxAnisotropy() const noexcept { return static_cast(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); } } // namespace Tegra::Texture + +size_t std::hash::operator()(const TICEntry& tic) const noexcept { + return Common::CityHash64(reinterpret_cast(&tic), sizeof tic); +} + +size_t std::hash::operator()(const TSCEntry& tsc) const noexcept { + return Common::CityHash64(reinterpret_cast(&tsc), sizeof tsc); +} diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index bbc7e3eaf..c1d14335e 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -53,27 +53,27 @@ enum class TextureFormat : u32 { BC4 = 0x27, BC5 = 0x28, S8D24 = 0x29, - X8Z24 = 0x2a, + X8D24 = 0x2a, D24S8 = 0x2b, - X4V4Z24__COV4R4V = 0x2c, - X4V4Z24__COV8R8V = 0x2d, - V8Z24__COV4R12V = 0x2e, + X4V4D24__COV4R4V = 0x2c, + X4V4D24__COV8R8V = 0x2d, + V8D24__COV4R12V = 0x2e, D32 = 0x2f, D32S8 = 0x30, - X8Z24_X20V4S8__COV4R4V = 0x31, - X8Z24_X20V4S8__COV8R8V = 0x32, - ZF32_X20V4X8__COV4R4V = 0x33, - ZF32_X20V4X8__COV8R8V = 0x34, - ZF32_X20V4S8__COV4R4V = 0x35, - ZF32_X20V4S8__COV8R8V = 0x36, - X8Z24_X16V8S8__COV4R12V = 0x37, - ZF32_X16V8X8__COV4R12V = 0x38, - ZF32_X16V8S8__COV4R12V = 0x39, + X8D24_X20V4S8__COV4R4V = 0x31, + X8D24_X20V4S8__COV8R8V = 0x32, + D32_X20V4X8__COV4R4V = 0x33, + D32_X20V4X8__COV8R8V = 0x34, + D32_X20V4S8__COV4R4V = 0x35, + D32_X20V4S8__COV8R8V = 0x36, + X8D24_X16V8S8__COV4R12V = 0x37, + D32_X16V8X8__COV4R12V = 0x38, + D32_X16V8S8__COV4R12V = 0x39, D16 = 0x3a, - V8Z24__COV8R24V = 0x3b, - X8Z24_X16V8S8__COV8R24V = 0x3c, - ZF32_X16V8X8__COV8R24V = 0x3d, - ZF32_X16V8S8__COV8R24V = 0x3e, + V8D24__COV8R24V = 0x3b, + X8D24_X16V8S8__COV8R24V = 0x3c, + D32_X16V8X8__COV8R24V = 0x3d, + D32_X16V8S8__COV8R24V = 0x3e, ASTC_2D_4X4 = 0x40, ASTC_2D_5X5 = 0x41, ASTC_2D_6X6 = 0x42, @@ -146,7 +146,7 @@ enum class MsaaMode : u32 { }; union TextureHandle { - /* implicit */ TextureHandle(u32 raw_) : raw{raw_} {} + /* implicit */ constexpr TextureHandle(u32 raw_) : raw{raw_} {} u32 raw; BitField<0, 20, u32> tic_id; @@ -155,124 +155,124 @@ union TextureHandle { static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); struct TICEntry { - static constexpr u32 DefaultBlockHeight = 16; - static constexpr u32 DefaultBlockDepth = 1; - union { - u32 raw; - BitField<0, 7, TextureFormat> format; - BitField<7, 3, ComponentType> r_type; - BitField<10, 3, ComponentType> g_type; - BitField<13, 3, ComponentType> b_type; - BitField<16, 3, ComponentType> a_type; + struct { + union { + BitField<0, 7, TextureFormat> format; + BitField<7, 3, ComponentType> r_type; + BitField<10, 3, ComponentType> g_type; + BitField<13, 3, ComponentType> b_type; + BitField<16, 3, ComponentType> a_type; - BitField<19, 3, SwizzleSource> x_source; - BitField<22, 3, SwizzleSource> y_source; - BitField<25, 3, SwizzleSource> z_source; - BitField<28, 3, SwizzleSource> w_source; - }; - u32 address_low; - union { - BitField<0, 16, u32> address_high; - BitField<21, 3, TICHeaderVersion> header_version; - }; - union { - BitField<0, 3, u32> block_width; - BitField<3, 3, u32> block_height; - BitField<6, 3, u32> block_depth; + BitField<19, 3, SwizzleSource> x_source; + BitField<22, 3, SwizzleSource> y_source; + BitField<25, 3, SwizzleSource> z_source; + BitField<28, 3, SwizzleSource> w_source; + }; + u32 address_low; + union { + BitField<0, 16, u32> address_high; + BitField<16, 5, u32> layer_base_3_7; + BitField<21, 3, TICHeaderVersion> header_version; + BitField<24, 1, u32> load_store_hint; + BitField<25, 4, u32> view_coherency_hash; + BitField<29, 3, u32> layer_base_8_10; + }; + union { + BitField<0, 3, u32> block_width; + BitField<3, 3, u32> block_height; + BitField<6, 3, u32> block_depth; - BitField<10, 3, u32> tile_width_spacing; + BitField<10, 3, u32> tile_width_spacing; - // High 16 bits of the pitch value - BitField<0, 16, u32> pitch_high; - BitField<26, 1, u32> use_header_opt_control; - BitField<27, 1, u32> depth_texture; - BitField<28, 4, u32> max_mip_level; + // High 16 bits of the pitch value + BitField<0, 16, u32> pitch_high; + BitField<26, 1, u32> use_header_opt_control; + BitField<27, 1, u32> depth_texture; + BitField<28, 4, u32> max_mip_level; - BitField<0, 16, u32> buffer_high_width_minus_one; - }; - union { - BitField<0, 16, u32> width_minus_1; - BitField<22, 1, u32> srgb_conversion; - BitField<23, 4, TextureType> texture_type; - BitField<29, 3, u32> border_size; + BitField<0, 16, u32> buffer_high_width_minus_one; + }; + union { + BitField<0, 16, u32> width_minus_one; + BitField<16, 3, u32> layer_base_0_2; + BitField<22, 1, u32> srgb_conversion; + BitField<23, 4, TextureType> texture_type; + BitField<29, 3, u32> border_size; - BitField<0, 16, u32> buffer_low_width_minus_one; - }; - union { - BitField<0, 16, u32> height_minus_1; - BitField<16, 14, u32> depth_minus_1; - }; - union { - BitField<6, 13, u32> mip_lod_bias; - BitField<27, 3, u32> max_anisotropy; + BitField<0, 16, u32> buffer_low_width_minus_one; + }; + union { + BitField<0, 16, u32> height_minus_1; + BitField<16, 14, u32> depth_minus_1; + BitField<30, 1, u32> is_sparse; + BitField<31, 1, u32> normalized_coords; + }; + union { + BitField<6, 13, u32> mip_lod_bias; + BitField<27, 3, u32> max_anisotropy; + }; + union { + BitField<0, 4, u32> res_min_mip_level; + BitField<4, 4, u32> res_max_mip_level; + BitField<8, 4, MsaaMode> msaa_mode; + BitField<12, 12, u32> min_lod_clamp; + }; + }; + std::array raw; }; - union { - BitField<0, 4, u32> res_min_mip_level; - BitField<4, 4, u32> res_max_mip_level; - BitField<8, 4, MsaaMode> msaa_mode; - BitField<12, 12, u32> min_lod_clamp; - }; + constexpr bool operator==(const TICEntry& rhs) const noexcept { + return raw == rhs.raw; + } - GPUVAddr Address() const { + constexpr bool operator!=(const TICEntry& rhs) const noexcept { + return raw != rhs.raw; + } + + constexpr GPUVAddr Address() const { return static_cast((static_cast(address_high) << 32) | address_low); } - u32 Pitch() const { + constexpr u32 Pitch() const { ASSERT(header_version == TICHeaderVersion::Pitch || header_version == TICHeaderVersion::PitchColorKey); // The pitch value is 21 bits, and is 32B aligned. return pitch_high << 5; } - u32 Width() const { + constexpr u32 Width() const { if (header_version != TICHeaderVersion::OneDBuffer) { - return width_minus_1 + 1; + return width_minus_one + 1; } - return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1; + return (buffer_high_width_minus_one << 16 | buffer_low_width_minus_one) + 1; } - u32 Height() const { + constexpr u32 Height() const { return height_minus_1 + 1; } - u32 Depth() const { + constexpr u32 Depth() const { return depth_minus_1 + 1; } - u32 BlockWidth() const { - ASSERT(IsTiled()); - return block_width; + constexpr u32 BaseLayer() const { + return layer_base_0_2 | layer_base_3_7 << 3 | layer_base_8_10 << 8; } - u32 BlockHeight() const { - ASSERT(IsTiled()); - return block_height; - } - - u32 BlockDepth() const { - ASSERT(IsTiled()); - return block_depth; - } - - bool IsTiled() const { + constexpr bool IsBlockLinear() const { return header_version == TICHeaderVersion::BlockLinear || header_version == TICHeaderVersion::BlockLinearColorKey; } - bool IsLineal() const { + constexpr bool IsPitchLinear() const { return header_version == TICHeaderVersion::Pitch || header_version == TICHeaderVersion::PitchColorKey; } - bool IsBuffer() const { + constexpr bool IsBuffer() const { return header_version == TICHeaderVersion::OneDBuffer; } - - bool IsSrgbConversionEnabled() const { - return srgb_conversion != 0; - } }; static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); @@ -309,6 +309,12 @@ enum class TextureMipmapFilter : u32 { Linear = 3, }; +enum class SamplerReduction : u32 { + WeightedAverage = 0, + Min = 1, + Max = 2, +}; + enum class Anisotropy { Default, Filter2x, @@ -333,8 +339,12 @@ struct TSCEntry { BitField<0, 2, TextureFilter> mag_filter; BitField<4, 2, TextureFilter> min_filter; BitField<6, 2, TextureMipmapFilter> mipmap_filter; + BitField<8, 1, u32> cubemap_anisotropy; BitField<9, 1, u32> cubemap_interface_filtering; + BitField<10, 2, SamplerReduction> reduction_filter; BitField<12, 13, u32> mip_lod_bias; + BitField<25, 1, u32> float_coord_normalization; + BitField<26, 5, u32> trilin_opt; }; union { BitField<0, 12, u32> min_lod_clamp; @@ -347,32 +357,45 @@ struct TSCEntry { }; std::array border_color; }; - std::array raw; + std::array raw; }; - std::array GetBorderColor() const noexcept; + constexpr bool operator==(const TSCEntry& rhs) const noexcept { + return raw == rhs.raw; + } - float GetMaxAnisotropy() const noexcept; + constexpr bool operator!=(const TSCEntry& rhs) const noexcept { + return raw != rhs.raw; + } - float GetMinLod() const { + std::array BorderColor() const noexcept; + + float MaxAnisotropy() const noexcept; + + float MinLod() const { return static_cast(min_lod_clamp) / 256.0f; } - float GetMaxLod() const { + float MaxLod() const { return static_cast(max_lod_clamp) / 256.0f; } - float GetLodBias() const { + float LodBias() const { // Sign extend the 13-bit value. - constexpr u32 mask = 1U << (13 - 1); + static constexpr u32 mask = 1U << (13 - 1); return static_cast(static_cast((mip_lod_bias ^ mask) - mask)) / 256.0f; } }; static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); -struct FullTextureInfo { - TICEntry tic; - TSCEntry tsc; +} // namespace Tegra::Texture + +template <> +struct std::hash { + size_t operator()(const Tegra::Texture::TICEntry& tic) const noexcept; }; -} // namespace Tegra::Texture +template <> +struct std::hash { + size_t operator()(const Tegra::Texture::TSCEntry& tsc) const noexcept; +};