textures/decoders: Fix block linear to pitch copies

There were two issues with block linear copies. First the swizzling was
wrong and this commit reimplements them.

The other issue was that these copies are generally used to download
render targets from the GPU and yuzu was not downloading them from
host GPU memory unless the extreme GPU accuracy setting was selected.
This commit enables cached memory reads for all accuracy levels.

- Fixes level thumbnails in Super Mario Maker 2.
This commit is contained in:
ReinUsesLisp 2020-07-30 01:38:44 -03:00
parent 257b1d2c4b
commit f00641459e
3 changed files with 34 additions and 34 deletions

View file

@ -94,7 +94,8 @@ void MaxwellDMA::CopyPitchToPitch() {
} }
void MaxwellDMA::CopyBlockLinearToPitch() { void MaxwellDMA::CopyBlockLinearToPitch() {
ASSERT(regs.src_params.block_size.depth == 0); UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
UNIMPLEMENTED_IF(regs.src_params.layer != 0);
// Optimized path for micro copies. // Optimized path for micro copies.
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
@ -123,17 +124,12 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
write_buffer.resize(dst_size); write_buffer.resize(dst_size);
} }
if (Settings::IsGPULevelExtreme()) {
memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
} else {
memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size);
memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
}
UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel, UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel,
read_buffer.data() + src_layer_size * src_params.layer, write_buffer.data(), block_height, src_params.origin.x, src_params.origin.y, write_buffer.data(),
block_height, src_params.origin.x, src_params.origin.y); read_buffer.data());
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
} }
@ -198,7 +194,6 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
if (read_buffer.size() < src_size) { if (read_buffer.size() < src_size) {
read_buffer.resize(src_size); read_buffer.resize(src_size);
} }
if (write_buffer.size() < dst_size) { if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size); write_buffer.resize(dst_size);
} }
@ -212,8 +207,8 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
} }
UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width, UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width,
bytes_per_pixel, read_buffer.data(), write_buffer.data(), bytes_per_pixel, regs.src_params.block_size.height, pos_x, pos_y,
regs.src_params.block_size.height, pos_x, pos_y); write_buffer.data(), read_buffer.data());
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
} }

View file

@ -228,24 +228,30 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
} }
} }
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) {
u32 block_height_bit, u32 offset_x, u32 offset_y) { const u32 stride = width * bytes_per_pixel;
const u32 block_height = 1U << block_height_bit; const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
for (u32 line = 0; line < subrect_height; ++line) { const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
const u32 y2 = line + offset_y;
const u32 gob_address_y = (y2 / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height +
((y2 % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
const auto& table = LEGACY_SWIZZLE_TABLE[y2 % GOB_SIZE_Y];
for (u32 x = 0; x < subrect_width; ++x) {
const u32 x2 = (x + offset_x) * bytes_per_pixel;
const u32 gob_address = gob_address_y + (x2 / GOB_SIZE_X) * GOB_SIZE * block_height;
const u32 swizzled_offset = gob_address + table[x2 % GOB_SIZE_X];
const u32 unswizzled_offset = line * dest_pitch + x * bytes_per_pixel;
u8* dest_line = unswizzled_data + unswizzled_offset;
u8* source_addr = swizzled_data + swizzled_offset;
std::memcpy(dest_line, source_addr, bytes_per_pixel); const u32 block_height_mask = (1U << block_height) - 1;
const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height;
for (u32 line = 0; line < line_count; ++line) {
const u32 src_y = line + origin_y;
const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
const u32 src_offset_y = (block_y >> block_height) * block_size +
((block_y & block_height_mask) << GOB_SIZE_SHIFT);
for (u32 column = 0; column < line_length_in; ++column) {
const u32 src_x = (column + origin_x) * bytes_per_pixel;
const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X];
const u32 unswizzled_offset = line * pitch + column * bytes_per_pixel;
std::memcpy(output + unswizzled_offset, input + swizzled_offset, bytes_per_pixel);
} }
} }
} }
@ -261,7 +267,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
const u32 block_height_mask = (1U << block_height) - 1; const u32 block_height_mask = (1U << block_height) - 1;
const u32 x_shift = Common::CountTrailingZeroes32(GOB_SIZE << (block_height + block_depth)); const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
for (u32 line = 0; line < line_count; ++line) { for (u32 line = 0; line < line_count; ++line) {
const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y];

View file

@ -48,9 +48,8 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
u32 block_height_bit, u32 offset_x, u32 offset_y); u32 block_height_bit, u32 offset_x, u32 offset_y);
/// Copies a tiled subrectangle into a linear surface. /// Copies a tiled subrectangle into a linear surface.
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input);
u32 offset_x, u32 offset_y);
/// @brief Swizzles a 2D array of pixels into a 3D texture /// @brief Swizzles a 2D array of pixels into a 3D texture
/// @param line_length_in Number of pixels per line /// @param line_length_in Number of pixels per line