Merge pull request #3694 from ReinUsesLisp/indexed-quads
vk_compute_pass: Implement indexed quads
This commit is contained in:
commit
85c17a2c35
5 changed files with 280 additions and 12 deletions
50
src/video_core/renderer_vulkan/shaders/quad_indexed.comp
Normal file
50
src/video_core/renderer_vulkan/shaders/quad_indexed.comp
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Build instructions:
|
||||||
|
* $ glslangValidator -V quad_indexed.comp -o output.spv
|
||||||
|
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
|
||||||
|
* $ xxd -i optimized.spv
|
||||||
|
*
|
||||||
|
* Then copy that bytecode to the C++ file
|
||||||
|
*/
|
||||||
|
|
||||||
|
#version 460 core
|
||||||
|
|
||||||
|
layout (local_size_x = 1024) in;
|
||||||
|
|
||||||
|
layout (std430, set = 0, binding = 0) readonly buffer InputBuffer {
|
||||||
|
uint input_indexes[];
|
||||||
|
};
|
||||||
|
|
||||||
|
layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
|
||||||
|
uint output_indexes[];
|
||||||
|
};
|
||||||
|
|
||||||
|
layout (push_constant) uniform PushConstants {
|
||||||
|
uint base_vertex;
|
||||||
|
int index_shift; // 0: uint8, 1: uint16, 2: uint32
|
||||||
|
};
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
int primitive = int(gl_GlobalInvocationID.x);
|
||||||
|
if (primitive * 6 >= output_indexes.length()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int index_size = 8 << index_shift;
|
||||||
|
int flipped_shift = 2 - index_shift;
|
||||||
|
int mask = (1 << flipped_shift) - 1;
|
||||||
|
|
||||||
|
const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3);
|
||||||
|
for (uint vertex = 0; vertex < 6; ++vertex) {
|
||||||
|
int offset = primitive * 4 + quad_swizzle[vertex];
|
||||||
|
int int_offset = offset >> flipped_shift;
|
||||||
|
int bit_offset = (offset & mask) * index_size;
|
||||||
|
uint packed_input = input_indexes[int_offset];
|
||||||
|
uint index = bitfieldExtract(packed_input, bit_offset, index_size);
|
||||||
|
output_indexes[primitive * 6 + vertex] = index + base_vertex;
|
||||||
|
}
|
||||||
|
}
|
|
@ -135,11 +135,11 @@ VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEnt
|
||||||
return entry;
|
return entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
VkPushConstantRange BuildQuadArrayPassPushConstantRange() {
|
VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
|
||||||
VkPushConstantRange range;
|
VkPushConstantRange range;
|
||||||
range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||||
range.offset = 0;
|
range.offset = 0;
|
||||||
range.size = sizeof(u32);
|
range.size = static_cast<u32>(size);
|
||||||
return range;
|
return range;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -220,7 +220,130 @@ constexpr u8 uint8_pass[] = {
|
||||||
0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
|
0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
|
||||||
0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
|
0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
|
||||||
|
|
||||||
std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings() {
|
// Quad indexed SPIR-V module. Generated from the "shaders/" directory.
|
||||||
|
constexpr u8 QUAD_INDEXED_SPV[] = {
|
||||||
|
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
|
||||||
|
0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||||
|
0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||||
|
0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||||
|
0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
|
||||||
|
0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||||
|
0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
|
||||||
|
0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||||
|
0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||||
|
0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||||
|
0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00,
|
||||||
|
0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||||
|
0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
|
||||||
|
0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||||
|
0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
|
||||||
|
0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||||
|
0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||||
|
0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
|
||||||
|
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
|
||||||
|
0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||||
|
0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||||
|
0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
|
||||||
|
0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
|
||||||
|
0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||||
|
0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||||
|
0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||||
|
0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
|
||||||
|
0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||||
|
0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||||
|
0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
|
||||||
|
0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||||
|
0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||||
|
0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||||
|
0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00,
|
||||||
|
0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||||
|
0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
|
||||||
|
0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||||
|
0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00,
|
||||||
|
0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||||
|
0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||||
|
0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00,
|
||||||
|
0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
|
||||||
|
0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
||||||
|
0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
|
||||||
|
0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
|
||||||
|
0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||||
|
0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00,
|
||||||
|
0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||||
|
0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
|
||||||
|
0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
|
||||||
|
0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00,
|
||||||
|
0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
|
||||||
|
0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||||
|
0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00,
|
||||||
|
0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00,
|
||||||
|
0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00,
|
||||||
|
0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00,
|
||||||
|
0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00,
|
||||||
|
0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
|
||||||
|
0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
|
||||||
|
0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
|
||||||
|
0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
|
||||||
|
0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
|
||||||
|
0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
|
||||||
|
0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
|
||||||
|
0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
|
||||||
|
0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
|
||||||
|
0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
|
||||||
|
0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
|
||||||
|
0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
|
||||||
|
0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
|
||||||
|
0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||||
|
0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00,
|
||||||
|
0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
|
||||||
|
0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00,
|
||||||
|
0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
|
||||||
|
0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
|
||||||
|
0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00,
|
||||||
|
0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00,
|
||||||
|
0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||||
|
0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
|
||||||
|
0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||||
|
0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
|
||||||
|
0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
|
||||||
|
0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
|
||||||
|
0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
|
||||||
|
0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00,
|
||||||
|
0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||||
|
0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
|
||||||
|
0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
|
||||||
|
0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00,
|
||||||
|
0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00,
|
||||||
|
0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
|
||||||
|
0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
|
||||||
|
0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00,
|
||||||
|
0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
|
||||||
|
0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00,
|
||||||
|
0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
|
||||||
|
0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00,
|
||||||
|
0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00,
|
||||||
|
0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
|
||||||
|
0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
|
||||||
|
0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00,
|
||||||
|
0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00,
|
||||||
|
0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00,
|
||||||
|
0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
|
||||||
|
|
||||||
|
std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
|
||||||
std::array<VkDescriptorSetLayoutBinding, 2> bindings;
|
std::array<VkDescriptorSetLayoutBinding, 2> bindings;
|
||||||
bindings[0].binding = 0;
|
bindings[0].binding = 0;
|
||||||
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||||
|
@ -235,7 +358,7 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings(
|
||||||
return bindings;
|
return bindings;
|
||||||
}
|
}
|
||||||
|
|
||||||
VkDescriptorUpdateTemplateEntryKHR BuildUint8PassDescriptorUpdateTemplateEntry() {
|
VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
|
||||||
VkDescriptorUpdateTemplateEntryKHR entry;
|
VkDescriptorUpdateTemplateEntryKHR entry;
|
||||||
entry.dstBinding = 0;
|
entry.dstBinding = 0;
|
||||||
entry.dstArrayElement = 0;
|
entry.dstArrayElement = 0;
|
||||||
|
@ -337,14 +460,14 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
|
||||||
VKUpdateDescriptorQueue& update_descriptor_queue)
|
VKUpdateDescriptorQueue& update_descriptor_queue)
|
||||||
: VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(),
|
: VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(),
|
||||||
BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
|
BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
|
||||||
BuildQuadArrayPassPushConstantRange(), std::size(quad_array), quad_array),
|
BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array),
|
||||||
scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
|
scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
|
||||||
update_descriptor_queue{update_descriptor_queue} {}
|
update_descriptor_queue{update_descriptor_queue} {}
|
||||||
|
|
||||||
QuadArrayPass::~QuadArrayPass() = default;
|
QuadArrayPass::~QuadArrayPass() = default;
|
||||||
|
|
||||||
std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
|
std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
|
||||||
const u32 num_triangle_vertices = num_vertices * 6 / 4;
|
const u32 num_triangle_vertices = (num_vertices / 4) * 6;
|
||||||
const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
|
const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
|
||||||
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
|
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
|
||||||
|
|
||||||
|
@ -383,8 +506,8 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32
|
||||||
Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
|
Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
|
||||||
VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
|
VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
|
||||||
VKUpdateDescriptorQueue& update_descriptor_queue)
|
VKUpdateDescriptorQueue& update_descriptor_queue)
|
||||||
: VKComputePass(device, descriptor_pool, BuildUint8PassDescriptorSetBindings(),
|
: VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
|
||||||
BuildUint8PassDescriptorUpdateTemplateEntry(), {}, std::size(uint8_pass),
|
BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass),
|
||||||
uint8_pass),
|
uint8_pass),
|
||||||
scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
|
scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
|
||||||
update_descriptor_queue{update_descriptor_queue} {}
|
update_descriptor_queue{update_descriptor_queue} {}
|
||||||
|
@ -425,4 +548,70 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff
|
||||||
return {*buffer.handle, 0};
|
return {*buffer.handle, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
QuadIndexedPass::QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler,
|
||||||
|
VKDescriptorPool& descriptor_pool,
|
||||||
|
VKStagingBufferPool& staging_buffer_pool,
|
||||||
|
VKUpdateDescriptorQueue& update_descriptor_queue)
|
||||||
|
: VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
|
||||||
|
BuildInputOutputDescriptorUpdateTemplate(),
|
||||||
|
BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV),
|
||||||
|
QUAD_INDEXED_SPV),
|
||||||
|
scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
|
||||||
|
update_descriptor_queue{update_descriptor_queue} {}
|
||||||
|
|
||||||
|
QuadIndexedPass::~QuadIndexedPass() = default;
|
||||||
|
|
||||||
|
std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
|
||||||
|
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
|
||||||
|
VkBuffer src_buffer, u64 src_offset) {
|
||||||
|
const u32 index_shift = [index_format] {
|
||||||
|
switch (index_format) {
|
||||||
|
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
|
||||||
|
return 0;
|
||||||
|
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedShort:
|
||||||
|
return 1;
|
||||||
|
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedInt:
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return 2;
|
||||||
|
}();
|
||||||
|
const u32 input_size = num_vertices << index_shift;
|
||||||
|
const u32 num_tri_vertices = (num_vertices / 4) * 6;
|
||||||
|
|
||||||
|
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
|
||||||
|
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
|
||||||
|
|
||||||
|
update_descriptor_queue.Acquire();
|
||||||
|
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
|
||||||
|
update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
|
||||||
|
const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
|
||||||
|
|
||||||
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
|
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
|
||||||
|
num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
|
||||||
|
static constexpr u32 dispatch_size = 1024;
|
||||||
|
const std::array push_constants = {base_vertex, index_shift};
|
||||||
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||||
|
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
|
||||||
|
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
|
||||||
|
&push_constants);
|
||||||
|
cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1);
|
||||||
|
|
||||||
|
VkBufferMemoryBarrier barrier;
|
||||||
|
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||||
|
barrier.pNext = nullptr;
|
||||||
|
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||||
|
barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
|
||||||
|
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
|
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
|
barrier.buffer = buffer;
|
||||||
|
barrier.offset = 0;
|
||||||
|
barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32));
|
||||||
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
|
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
|
||||||
|
});
|
||||||
|
return {*buffer.handle, 0};
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||||
#include "video_core/renderer_vulkan/wrapper.h"
|
#include "video_core/renderer_vulkan/wrapper.h"
|
||||||
|
|
||||||
|
@ -73,4 +74,22 @@ private:
|
||||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class QuadIndexedPass final : public VKComputePass {
|
||||||
|
public:
|
||||||
|
explicit QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler,
|
||||||
|
VKDescriptorPool& descriptor_pool,
|
||||||
|
VKStagingBufferPool& staging_buffer_pool,
|
||||||
|
VKUpdateDescriptorQueue& update_descriptor_queue);
|
||||||
|
~QuadIndexedPass();
|
||||||
|
|
||||||
|
std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
|
||||||
|
u32 num_vertices, u32 base_vertex, VkBuffer src_buffer,
|
||||||
|
u64 src_offset);
|
||||||
|
|
||||||
|
private:
|
||||||
|
VKScheduler& scheduler;
|
||||||
|
VKStagingBufferPool& staging_buffer_pool;
|
||||||
|
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -293,6 +293,7 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
|
||||||
update_descriptor_queue(device, scheduler), renderpass_cache(device),
|
update_descriptor_queue(device, scheduler), renderpass_cache(device),
|
||||||
quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
|
quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
|
||||||
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
|
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
|
||||||
|
quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
|
||||||
texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
|
texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
|
||||||
staging_pool),
|
staging_pool),
|
||||||
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
|
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
|
||||||
|
@ -844,18 +845,26 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
|
||||||
bool is_indexed) {
|
bool is_indexed) {
|
||||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||||
switch (regs.draw.topology) {
|
switch (regs.draw.topology) {
|
||||||
case Maxwell::PrimitiveTopology::Quads:
|
case Maxwell::PrimitiveTopology::Quads: {
|
||||||
if (params.is_indexed) {
|
if (!params.is_indexed) {
|
||||||
UNIMPLEMENTED();
|
|
||||||
} else {
|
|
||||||
const auto [buffer, offset] =
|
const auto [buffer, offset] =
|
||||||
quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
|
quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
|
||||||
buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
|
buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
|
||||||
params.base_vertex = 0;
|
params.base_vertex = 0;
|
||||||
params.num_vertices = params.num_vertices * 6 / 4;
|
params.num_vertices = params.num_vertices * 6 / 4;
|
||||||
params.is_indexed = true;
|
params.is_indexed = true;
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
|
||||||
|
auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
|
||||||
|
std::tie(buffer, offset) = quad_indexed_pass.Assemble(
|
||||||
|
regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
|
||||||
|
|
||||||
|
buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
|
||||||
|
params.num_vertices = (params.num_vertices / 4) * 6;
|
||||||
|
params.base_vertex = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
default: {
|
default: {
|
||||||
if (!is_indexed) {
|
if (!is_indexed) {
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -254,6 +254,7 @@ private:
|
||||||
VKUpdateDescriptorQueue update_descriptor_queue;
|
VKUpdateDescriptorQueue update_descriptor_queue;
|
||||||
VKRenderPassCache renderpass_cache;
|
VKRenderPassCache renderpass_cache;
|
||||||
QuadArrayPass quad_array_pass;
|
QuadArrayPass quad_array_pass;
|
||||||
|
QuadIndexedPass quad_indexed_pass;
|
||||||
Uint8Pass uint8_pass;
|
Uint8Pass uint8_pass;
|
||||||
|
|
||||||
VKTextureCache texture_cache;
|
VKTextureCache texture_cache;
|
||||||
|
|
Loading…
Reference in a new issue