Merge pull request #3203 from FernandoS27/tex-cache-fixes

Texture Cache: Add HLE methods for building 3D textures
This commit is contained in:
bunnei 2019-12-22 14:25:13 -05:00 committed by GitHub
commit 0f3ac9cfeb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 144 additions and 1 deletions

View file

@ -392,4 +392,42 @@ std::string SurfaceParams::TargetName() const {
}
}
u32 SurfaceParams::GetBlockSize() const {
const u32 x = 64U << block_width;
const u32 y = 8U << block_height;
const u32 z = 1U << block_depth;
return x * y * z;
}
std::pair<u32, u32> SurfaceParams::GetBlockXY() const {
const u32 x_pixels = 64U / GetBytesPerPixel();
const u32 x = x_pixels << block_width;
const u32 y = 8U << block_height;
return {x, y};
}
std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const {
const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
const u32 block_size = GetBlockSize();
const u32 block_index = offset / block_size;
const u32 gob_offset = offset % block_size;
const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GetGOBSize());
const u32 x_gob_pixels = 64U / GetBytesPerPixel();
const u32 x_block_pixels = x_gob_pixels << block_width;
const u32 y_block_pixels = 8U << block_height;
const u32 z_block_pixels = 1U << block_depth;
const u32 x_blocks = div_ceil(width, x_block_pixels);
const u32 y_blocks = div_ceil(height, y_block_pixels);
const u32 z_blocks = div_ceil(depth, z_block_pixels);
const u32 base_x = block_index % x_blocks;
const u32 base_y = (block_index / x_blocks) % y_blocks;
const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks;
u32 x = base_x * x_block_pixels;
u32 y = base_y * y_block_pixels;
u32 z = base_z * z_block_pixels;
z += gob_index >> block_height;
y += (gob_index * 8U) % y_block_pixels;
return {x, y, z};
}
} // namespace VideoCommon

View file

@ -4,6 +4,8 @@
#pragma once
#include <utility>
#include "common/alignment.h"
#include "common/bit_util.h"
#include "common/cityhash.h"
@ -136,6 +138,15 @@ public:
std::size_t GetConvertedMipmapSize(u32 level) const;
/// Get this texture Tegra Block size in guest memory layout
u32 GetBlockSize() const;
/// Get X, Y coordinates max sizes of a single block.
std::pair<u32, u32> GetBlockXY() const;
/// Get the offset in x, y, z coordinates from a memory offset
std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const;
/// Returns the size of a layer in bytes in guest memory.
std::size_t GetGuestLayerSize() const {
return GetLayerSize(false, false);
@ -269,7 +280,8 @@ private:
/// Returns the size of all mipmap levels and aligns as needed.
std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const {
return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth);
return GetLayerSize(as_host_size, uncompressed) *
(layer_only ? 1U : (is_layered ? depth : 1U));
}
/// Returns the size of a layer

View file

@ -615,6 +615,86 @@ private:
return {{new_surface, new_surface->GetMainView()}};
}
/**
* Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D
* textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of
* the HLE methods.
*
* @param overlaps The overlapping surfaces registered in the cache.
* @param params The parameters on the new surface.
* @param gpu_addr The starting address of the new surface.
* @param cache_addr The starting address of the new surface on physical memory.
* @param preserve_contents Indicates that the new surface should be loaded from memory or
* left blank.
*/
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
const SurfaceParams& params,
const GPUVAddr gpu_addr,
const CacheAddr cache_addr,
bool preserve_contents) {
if (params.target == SurfaceTarget::Texture3D) {
bool failed = false;
if (params.num_levels > 1) {
// We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
return std::nullopt;
}
TSurface new_surface = GetUncachedSurface(gpu_addr, params);
bool modified = false;
for (auto& surface : overlaps) {
const SurfaceParams& src_params = surface->GetSurfaceParams();
if (src_params.target != SurfaceTarget::Texture2D) {
failed = true;
break;
}
if (src_params.height != params.height) {
failed = true;
break;
}
if (src_params.block_depth != params.block_depth ||
src_params.block_height != params.block_height) {
failed = true;
break;
}
const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr);
const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);
modified |= surface->IsModified();
const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
1);
ImageCopy(surface, new_surface, copy_params);
}
if (failed) {
return std::nullopt;
}
for (const auto& surface : overlaps) {
Unregister(surface);
}
new_surface->MarkAsModified(modified, Tick());
Register(new_surface);
auto view = new_surface->GetMainView();
return {{std::move(new_surface), view}};
} else {
for (const auto& surface : overlaps) {
if (!surface->MatchTarget(params.target)) {
if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) {
if (Settings::values.use_accurate_gpu_emulation) {
return std::nullopt;
}
Unregister(surface);
return InitializeSurface(gpu_addr, params, preserve_contents);
}
return std::nullopt;
}
if (surface->GetCacheAddr() != cache_addr) {
continue;
}
if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
return {{surface, surface->GetMainView()}};
}
}
return InitializeSurface(gpu_addr, params, preserve_contents);
}
}
/**
* Gets the starting address and parameters of a candidate surface and tries
* to find a matching surface within the cache. This is done in 3 big steps:
@ -687,6 +767,15 @@ private:
}
}
// Check if it's a 3D texture
if (params.block_depth > 0) {
auto surface =
Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents);
if (surface) {
return *surface;
}
}
// Split cases between 1 overlap or many.
if (overlaps.size() == 1) {
TSurface current_surface = overlaps[0];

View file

@ -12,6 +12,10 @@ namespace Tegra::Texture {
// GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents
// an small rect of (64/bytes_per_pixel)X8.
inline std::size_t GetGOBSize() {
return 512;
}
inline std::size_t GetGOBSizeShift() {
return 9;
}