mirror of
https://git.citron-emu.org/Citron/Citron.git
synced 2025-01-31 21:26:57 +01:00
buffer_cache: Simplify storage buffer binding logic
Reverts overly restrictive storage buffer validation and size calculation that was causing rendering issues in The Legend of Zelda: Tears of the Kingdom, particularly in underground/depth areas. The simplified approach: - Uses GetMemoryLayoutSize() instead of manual page probing - Removes unnecessary 4GB memory bounds validation - Streamlines address translation and alignment handling This fixes numerous reported cases of missing or corrupted rendering in TOTK's underground areas where storage buffer operations are heavily used for depth-related effects.
This commit is contained in:
parent
a5d62fa4ec
commit
be191f740a
1 changed files with 20 additions and 63 deletions
|
@ -1696,87 +1696,44 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
|
||||||
template <class P>
|
template <class P>
|
||||||
Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
|
Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
|
||||||
bool is_written) const {
|
bool is_written) const {
|
||||||
// Read the GPU address from the storage buffer
|
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
|
||||||
GPUVAddr gpu_addr;
|
|
||||||
gpu_memory->ReadBlock(ssbo_addr, &gpu_addr, sizeof(GPUVAddr));
|
|
||||||
|
|
||||||
if (gpu_addr == 0) {
|
|
||||||
LOG_WARNING(HW_GPU, "Null GPU address read from storage buffer at {:x} for cbuf index {}",
|
|
||||||
ssbo_addr, cbuf_index);
|
|
||||||
return NULL_BINDING;
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto size = [&]() {
|
const auto size = [&]() {
|
||||||
const bool is_nvn_cbuf = cbuf_index == 0;
|
const bool is_nvn_cbuf = cbuf_index == 0;
|
||||||
|
// The NVN driver buffer (index 0) is known to pack the SSBO address followed by its size.
|
||||||
if (is_nvn_cbuf) {
|
if (is_nvn_cbuf) {
|
||||||
// Try to read the size for NVN buffers
|
const u32 ssbo_size = gpu_memory->Read<u32>(ssbo_addr + 8);
|
||||||
u32 nvn_size;
|
if (ssbo_size != 0) {
|
||||||
gpu_memory->ReadBlock(ssbo_addr + 8, &nvn_size, sizeof(u32));
|
return ssbo_size;
|
||||||
if (nvn_size != 0) {
|
|
||||||
return nvn_size;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Other titles (notably Doom Eternal) may use STG/LDG on buffer addresses in custom defined
|
||||||
// Determine size by reading memory pages
|
// cbufs, which do not store the sizes adjacent to the addresses, so use the fully
|
||||||
const u64 max_size = 8_MiB;
|
// mapped buffer size for now.
|
||||||
u32 current_size = 0;
|
const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr));
|
||||||
u8 test_byte;
|
return std::min(memory_layout_size, static_cast<u32>(8_MiB));
|
||||||
|
|
||||||
for (u64 offset = 0; offset < max_size; offset += Core::DEVICE_PAGESIZE) {
|
|
||||||
gpu_memory->ReadBlock(gpu_addr + offset, &test_byte, sizeof(u8));
|
|
||||||
current_size = static_cast<u32>(offset + Core::DEVICE_PAGESIZE);
|
|
||||||
|
|
||||||
// If we can't read from this page, use the previous size
|
|
||||||
if (test_byte == 0 && offset > 0) {
|
|
||||||
current_size = static_cast<u32>(offset);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (current_size == 0) {
|
|
||||||
LOG_WARNING(HW_GPU, "Zero memory layout size for storage buffer at {:x}", gpu_addr);
|
|
||||||
return 0U;
|
|
||||||
}
|
|
||||||
return std::min(current_size, static_cast<u32>(max_size));
|
|
||||||
}();
|
}();
|
||||||
|
// Alignment only applies to the offset of the buffer
|
||||||
// Early return if size is 0
|
|
||||||
if (size == 0) {
|
|
||||||
LOG_WARNING(HW_GPU, "Zero size storage buffer for cbuf index {}", cbuf_index);
|
|
||||||
return NULL_BINDING;
|
|
||||||
}
|
|
||||||
|
|
||||||
const u32 alignment = runtime.GetStorageBufferAlignment();
|
const u32 alignment = runtime.GetStorageBufferAlignment();
|
||||||
const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
|
const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
|
||||||
const u32 aligned_size = static_cast<u32>(gpu_addr - aligned_gpu_addr) + size;
|
const u32 aligned_size = static_cast<u32>(gpu_addr - aligned_gpu_addr) + size;
|
||||||
|
|
||||||
const std::optional<DAddr> aligned_device_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
|
const std::optional<DAddr> aligned_device_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
|
||||||
|
if (!aligned_device_addr || size == 0) {
|
||||||
|
LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index);
|
||||||
|
return NULL_BINDING;
|
||||||
|
}
|
||||||
const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||||
|
ASSERT_MSG(device_addr, "Unaligned storage buffer address not found for cbuf index {}",
|
||||||
if (!aligned_device_addr || !device_addr) {
|
cbuf_index);
|
||||||
LOG_WARNING(HW_GPU, "Failed to translate GPU address {:x} to CPU address for cbuf index {}",
|
// The end address used for size calculation does not need to be aligned
|
||||||
gpu_addr, cbuf_index);
|
|
||||||
return NULL_BINDING;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate device addresses are within bounds
|
|
||||||
constexpr size_t MAX_DEVICE_MEMORY = 1ULL << 32; // 4GB max device memory
|
|
||||||
if (*aligned_device_addr >= MAX_DEVICE_MEMORY ||
|
|
||||||
(*aligned_device_addr + aligned_size) > MAX_DEVICE_MEMORY ||
|
|
||||||
*device_addr >= MAX_DEVICE_MEMORY ||
|
|
||||||
(*device_addr + size) > MAX_DEVICE_MEMORY) {
|
|
||||||
LOG_WARNING(HW_GPU, "Device address out of bounds for storage buffer cbuf index {}",
|
|
||||||
cbuf_index);
|
|
||||||
return NULL_BINDING;
|
|
||||||
}
|
|
||||||
|
|
||||||
const DAddr cpu_end = Common::AlignUp(*device_addr + size, Core::DEVICE_PAGESIZE);
|
const DAddr cpu_end = Common::AlignUp(*device_addr + size, Core::DEVICE_PAGESIZE);
|
||||||
|
|
||||||
return Binding{
|
const Binding binding{
|
||||||
.device_addr = *aligned_device_addr,
|
.device_addr = *aligned_device_addr,
|
||||||
.size = is_written ? aligned_size : static_cast<u32>(cpu_end - *aligned_device_addr),
|
.size = is_written ? aligned_size : static_cast<u32>(cpu_end - *aligned_device_addr),
|
||||||
.buffer_id = BufferId{},
|
.buffer_id = BufferId{},
|
||||||
};
|
};
|
||||||
|
return binding;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
|
Loading…
Reference in a new issue