Texture Cache: Fix downscaling and correct memory comsumption.

This commit is contained in:
Fernando Sahmkow 2021-10-17 18:01:18 +02:00
parent b60966041c
commit 425ab9ef4b
8 changed files with 146 additions and 35 deletions

View file

@ -876,7 +876,7 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b
} }
} }
bool Image::Scale() { bool Image::Scale(bool up_scale) {
const auto format_type = GetFormatType(info.format); const auto format_type = GetFormatType(info.format);
const GLenum attachment = [format_type] { const GLenum attachment = [format_type] {
switch (format_type) { switch (format_type) {
@ -944,14 +944,25 @@ bool Image::Scale() {
const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle; const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle;
for (s32 layer = 0; layer < info.resources.layers; ++layer) { for (s32 layer = 0; layer < info.resources.layers; ++layer) {
for (s32 level = 0; level < info.resources.levels; ++level) { for (s32 level = 0; level < info.resources.levels; ++level) {
const u32 src_level_width = std::max(1u, original_width >> level); const u32 src_level_width =
const u32 src_level_height = std::max(1u, original_height >> level); std::max(1u, (up_scale ? original_width : scaled_width) >> level);
const u32 dst_level_width = std::max(1u, scaled_width >> level); const u32 src_level_height =
const u32 dst_level_height = std::max(1u, scaled_height >> level); std::max(1u, (up_scale ? original_height : scaled_height) >> level);
const u32 dst_level_width =
std::max(1u, (up_scale ? scaled_width : original_width) >> level);
const u32 dst_level_height =
std::max(1u, (up_scale ? scaled_height : original_height) >> level);
if (up_scale) {
glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer); glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer);
glNamedFramebufferTextureLayer(draw_fbo, attachment, upscaled_backup.handle, level, glNamedFramebufferTextureLayer(draw_fbo, attachment, upscaled_backup.handle, level,
layer); layer);
} else {
glNamedFramebufferTextureLayer(read_fbo, attachment, upscaled_backup.handle, level,
layer);
glNamedFramebufferTextureLayer(draw_fbo, attachment, texture.handle, level, layer);
}
glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0, glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0,
0, dst_level_width, dst_level_height, mask, filter); 0, dst_level_width, dst_level_height, mask, filter);
} }
@ -959,7 +970,12 @@ bool Image::Scale() {
if (scissor_test != GL_FALSE) { if (scissor_test != GL_FALSE) {
glEnablei(GL_SCISSOR_TEST, 0); glEnablei(GL_SCISSOR_TEST, 0);
} }
if (up_scale) {
current_texture = upscaled_backup.handle; current_texture = upscaled_backup.handle;
} else {
current_texture = texture.handle;
}
return true; return true;
} }
@ -981,6 +997,7 @@ bool Image::ScaleUp() {
flags &= ~ImageFlagBits::Rescaled; flags &= ~ImageFlagBits::Rescaled;
return false; return false;
} }
scale_count++;
if (!Scale()) { if (!Scale()) {
flags &= ~ImageFlagBits::Rescaled; flags &= ~ImageFlagBits::Rescaled;
return false; return false;
@ -996,7 +1013,11 @@ bool Image::ScaleDown() {
if (!runtime->resolution.active) { if (!runtime->resolution.active) {
return false; return false;
} }
current_texture = texture.handle; scale_count++;
if (!Scale(false)) {
flags &= ~ImageFlagBits::Rescaled;
return false;
}
return true; return true;
} }

View file

@ -205,7 +205,7 @@ private:
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
bool Scale(); bool Scale(bool up_scale = true);
OGLTexture texture; OGLTexture texture;
OGLTexture upscaled_backup; OGLTexture upscaled_backup;

View file

@ -592,7 +592,8 @@ struct RangedBarrierRange {
} }
void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info, void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info,
VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution) { VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution,
bool up_scaling = true) {
const bool is_2d = info.type == ImageType::e2D; const bool is_2d = info.type == ImageType::e2D;
const auto resources = info.resources; const auto resources = info.resources;
const VkExtent2D extent{ const VkExtent2D extent{
@ -605,14 +606,16 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, is_2d, scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, is_2d,
vk_filter](vk::CommandBuffer cmdbuf) { vk_filter, up_scaling](vk::CommandBuffer cmdbuf) {
const VkOffset2D src_size{ const VkOffset2D src_size{
.x = static_cast<s32>(extent.width), .x = static_cast<s32>(up_scaling ? extent.width : resolution.ScaleUp(extent.width)),
.y = static_cast<s32>(extent.height), .y = static_cast<s32>(is_2d && up_scaling ? extent.height
: resolution.ScaleUp(extent.height)),
}; };
const VkOffset2D dst_size{ const VkOffset2D dst_size{
.x = static_cast<s32>(resolution.ScaleUp(extent.width)), .x = static_cast<s32>(up_scaling ? resolution.ScaleUp(extent.width) : extent.width),
.y = static_cast<s32>(is_2d ? resolution.ScaleUp(extent.height) : extent.height), .y = static_cast<s32>(is_2d && up_scaling ? resolution.ScaleUp(extent.height)
: extent.height),
}; };
boost::container::small_vector<VkImageBlit, 4> regions; boost::container::small_vector<VkImageBlit, 4> regions;
regions.reserve(resources.levels); regions.reserve(resources.levels);
@ -1134,6 +1137,7 @@ bool Image::ScaleUp() {
if (!resolution.active) { if (!resolution.active) {
return false; return false;
} }
scale_count++;
const auto& device = runtime->device; const auto& device = runtime->device;
const bool is_2d = info.type == ImageType::e2D; const bool is_2d = info.type == ImageType::e2D;
const u32 scaled_width = resolution.ScaleUp(info.size.width); const u32 scaled_width = resolution.ScaleUp(info.size.width);
@ -1161,8 +1165,10 @@ bool Image::ScaleUp() {
using namespace VideoCommon; using namespace VideoCommon;
static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy; static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy;
if (!scale_view) {
const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
scale_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this); scale_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
}
auto* view_ptr = scale_view.get(); auto* view_ptr = scale_view.get();
const Region2D src_region{ const Region2D src_region{
@ -1178,7 +1184,10 @@ bool Image::ScaleUp() {
.height = scaled_height, .height = scaled_height,
}; };
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
scale_framebuffer = std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent); if (!scale_framebuffer) {
scale_framebuffer =
std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
}
const auto color_view = scale_view->Handle(Shader::TextureType::Color2D); const auto color_view = scale_view->Handle(Shader::TextureType::Color2D);
runtime->blit_image_helper.BlitColor( runtime->blit_image_helper.BlitColor(
@ -1186,7 +1195,10 @@ bool Image::ScaleUp() {
Tegra::Engines::Fermi2D::Filter::Bilinear, BLIT_OPERATION); Tegra::Engines::Fermi2D::Filter::Bilinear, BLIT_OPERATION);
} else if (!runtime->device.IsBlitDepthStencilSupported() && } else if (!runtime->device.IsBlitDepthStencilSupported() &&
aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
scale_framebuffer = std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent); if (!scale_framebuffer) {
scale_framebuffer =
std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
}
runtime->blit_image_helper.BlitDepthStencil( runtime->blit_image_helper.BlitDepthStencil(
scale_framebuffer.get(), scale_view->DepthView(), scale_view->StencilView(), scale_framebuffer.get(), scale_view->DepthView(), scale_view->StencilView(),
dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION); dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION);
@ -1209,6 +1221,67 @@ bool Image::ScaleDown() {
if (!resolution.active) { if (!resolution.active) {
return false; return false;
} }
const auto& device = runtime->device;
const bool is_2d = info.type == ImageType::e2D;
const u32 scaled_width = resolution.ScaleUp(info.size.width);
const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
if (aspect_mask == 0) {
aspect_mask = ImageAspectMask(info.format);
}
static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
const PixelFormat format = StorageFormat(info.format);
const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false);
} else {
using namespace VideoCommon;
static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy;
if (!normal_view) {
const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
normal_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
}
auto* view_ptr = normal_view.get();
const Region2D src_region{
.start = {0, 0},
.end = {static_cast<s32>(scaled_width), static_cast<s32>(scaled_height)},
};
const Region2D dst_region{
.start = {0, 0},
.end = {static_cast<s32>(info.size.width), static_cast<s32>(info.size.height)},
};
const VkExtent2D extent{
.width = scaled_width,
.height = scaled_height,
};
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
if (!normal_framebuffer) {
normal_framebuffer =
std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
}
const auto color_view = normal_view->Handle(Shader::TextureType::Color2D);
runtime->blit_image_helper.BlitColor(
normal_framebuffer.get(), color_view, dst_region, src_region,
Tegra::Engines::Fermi2D::Filter::Bilinear, BLIT_OPERATION);
} else if (!runtime->device.IsBlitDepthStencilSupported() &&
aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
if (!normal_framebuffer) {
normal_framebuffer =
std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
}
runtime->blit_image_helper.BlitDepthStencil(
normal_framebuffer.get(), normal_view->DepthView(), normal_view->StencilView(),
dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION);
} else {
// TODO: Use helper blits where applicable
flags &= ~ImageFlagBits::Rescaled;
LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", format);
return false;
}
}
ASSERT(info.type != ImageType::Linear); ASSERT(info.type != ImageType::Linear);
current_image = *original_image; current_image = *original_image;
return true; return true;

View file

@ -148,6 +148,9 @@ private:
std::unique_ptr<Framebuffer> scale_framebuffer; std::unique_ptr<Framebuffer> scale_framebuffer;
std::unique_ptr<ImageView> scale_view; std::unique_ptr<ImageView> scale_view;
std::unique_ptr<Framebuffer> normal_framebuffer;
std::unique_ptr<ImageView> normal_view;
}; };
class ImageView : public VideoCommon::ImageViewBase { class ImageView : public VideoCommon::ImageViewBase {

View file

@ -60,8 +60,8 @@ namespace {
ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_)
: info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)},
unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, scale_tick{},
scale_tick{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, scale_count{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_},
cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} { cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} {
if (info.type == ImageType::e3D) { if (info.type == ImageType::e3D) {
slice_offsets = CalculateSliceOffsets(info); slice_offsets = CalculateSliceOffsets(info);

View file

@ -77,6 +77,10 @@ struct ImageBase {
void CheckBadOverlapState(); void CheckBadOverlapState();
void CheckAliasState(); void CheckAliasState();
bool HasScaled() {
return scale_count > 0;
}
ImageInfo info; ImageInfo info;
u32 guest_size_bytes = 0; u32 guest_size_bytes = 0;
@ -84,6 +88,7 @@ struct ImageBase {
u32 converted_size_bytes = 0; u32 converted_size_bytes = 0;
u32 scale_rating = 0; u32 scale_rating = 0;
u64 scale_tick = 0; u64 scale_tick = 0;
u32 scale_count = 0;
ImageFlagBits flags = ImageFlagBits::CpuModified; ImageFlagBits flags = ImageFlagBits::CpuModified;
GPUVAddr gpu_addr = 0; GPUVAddr gpu_addr = 0;

View file

@ -854,8 +854,8 @@ void TextureCache<P>::InvalidateScale(Image& image) {
} }
template <class P> template <class P>
u64 TextureCache<P>::GetScaledImageSizeBytes(Image& image) { u64 TextureCache<P>::GetScaledImageSizeBytes(ImageBase& image) {
const f32 add_to_size = Settings::values.resolution_info.up_factor - 1.0f; const f32 add_to_size = Settings::values.resolution_info.up_factor;
const bool sign = std::signbit(add_to_size); const bool sign = std::signbit(add_to_size);
const u32 image_size_bytes = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); const u32 image_size_bytes = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
const u64 tentative_size = image_size_bytes * static_cast<u64>(std::abs(add_to_size)); const u64 tentative_size = image_size_bytes * static_cast<u64>(std::abs(add_to_size));
@ -865,11 +865,14 @@ u64 TextureCache<P>::GetScaledImageSizeBytes(Image& image) {
template <class P> template <class P>
bool TextureCache<P>::ScaleUp(Image& image) { bool TextureCache<P>::ScaleUp(Image& image) {
const bool has_copy = image.HasScaled();
const bool rescaled = image.ScaleUp(); const bool rescaled = image.ScaleUp();
if (!rescaled) { if (!rescaled) {
return false; return false;
} }
if (!has_copy) {
total_used_memory += GetScaledImageSizeBytes(image); total_used_memory += GetScaledImageSizeBytes(image);
}
InvalidateScale(image); InvalidateScale(image);
return true; return true;
} }
@ -880,7 +883,10 @@ bool TextureCache<P>::ScaleDown(Image& image) {
if (!rescaled) { if (!rescaled) {
return false; return false;
} }
const bool has_copy = image.HasScaled();
if (!has_copy) {
total_used_memory -= GetScaledImageSizeBytes(image); total_used_memory -= GetScaledImageSizeBytes(image);
}
InvalidateScale(image); InvalidateScale(image);
return true; return true;
} }
@ -1391,13 +1397,6 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
"Trying to unregister an already registered image"); "Trying to unregister an already registered image");
image.flags &= ~ImageFlagBits::Registered; image.flags &= ~ImageFlagBits::Registered;
image.flags &= ~ImageFlagBits::BadOverlap; image.flags &= ~ImageFlagBits::BadOverlap;
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
if ((IsPixelFormatASTC(image.info.format) &&
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
True(image.flags & ImageFlagBits::Converted)) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory -= Common::AlignUp(tentative_size, 1024);
lru_cache.Free(image.lru_index); lru_cache.Free(image.lru_index);
const auto& clear_page_table = const auto& clear_page_table =
[this, image_id]( [this, image_id](
@ -1478,6 +1477,16 @@ template <class P>
void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
ASSERT(False(image.flags & ImageFlagBits::Tracked)); ASSERT(False(image.flags & ImageFlagBits::Tracked));
image.flags |= ImageFlagBits::Tracked; image.flags |= ImageFlagBits::Tracked;
if (image.HasScaled()) {
total_used_memory -= GetScaledImageSizeBytes(image);
}
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
if ((IsPixelFormatASTC(image.info.format) &&
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
True(image.flags & ImageFlagBits::Converted)) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory -= Common::AlignUp(tentative_size, 1024);
if (False(image.flags & ImageFlagBits::Sparse)) { if (False(image.flags & ImageFlagBits::Sparse)) {
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
return; return;

View file

@ -331,7 +331,7 @@ private:
void InvalidateScale(Image& image); void InvalidateScale(Image& image);
bool ScaleUp(Image& image); bool ScaleUp(Image& image);
bool ScaleDown(Image& image); bool ScaleDown(Image& image);
u64 GetScaledImageSizeBytes(Image& image); u64 GetScaledImageSizeBytes(ImageBase& image);
Runtime& runtime; Runtime& runtime;
VideoCore::RasterizerInterface& rasterizer; VideoCore::RasterizerInterface& rasterizer;