Texture Cache: Fix downscaling and correct memory comsumption.

This commit is contained in:
Fernando Sahmkow 2021-10-17 18:01:18 +02:00
parent b60966041c
commit 425ab9ef4b
8 changed files with 146 additions and 35 deletions

View file

@ -876,7 +876,7 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b
}
}
bool Image::Scale() {
bool Image::Scale(bool up_scale) {
const auto format_type = GetFormatType(info.format);
const GLenum attachment = [format_type] {
switch (format_type) {
@ -944,14 +944,25 @@ bool Image::Scale() {
const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle;
for (s32 layer = 0; layer < info.resources.layers; ++layer) {
for (s32 level = 0; level < info.resources.levels; ++level) {
const u32 src_level_width = std::max(1u, original_width >> level);
const u32 src_level_height = std::max(1u, original_height >> level);
const u32 dst_level_width = std::max(1u, scaled_width >> level);
const u32 dst_level_height = std::max(1u, scaled_height >> level);
const u32 src_level_width =
std::max(1u, (up_scale ? original_width : scaled_width) >> level);
const u32 src_level_height =
std::max(1u, (up_scale ? original_height : scaled_height) >> level);
const u32 dst_level_width =
std::max(1u, (up_scale ? scaled_width : original_width) >> level);
const u32 dst_level_height =
std::max(1u, (up_scale ? scaled_height : original_height) >> level);
if (up_scale) {
glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer);
glNamedFramebufferTextureLayer(draw_fbo, attachment, upscaled_backup.handle, level,
layer);
} else {
glNamedFramebufferTextureLayer(read_fbo, attachment, upscaled_backup.handle, level,
layer);
glNamedFramebufferTextureLayer(draw_fbo, attachment, texture.handle, level, layer);
}
glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer);
glNamedFramebufferTextureLayer(draw_fbo, attachment, upscaled_backup.handle, level,
layer);
glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0,
0, dst_level_width, dst_level_height, mask, filter);
}
@ -959,7 +970,12 @@ bool Image::Scale() {
if (scissor_test != GL_FALSE) {
glEnablei(GL_SCISSOR_TEST, 0);
}
current_texture = upscaled_backup.handle;
if (up_scale) {
current_texture = upscaled_backup.handle;
} else {
current_texture = texture.handle;
}
return true;
}
@ -981,6 +997,7 @@ bool Image::ScaleUp() {
flags &= ~ImageFlagBits::Rescaled;
return false;
}
scale_count++;
if (!Scale()) {
flags &= ~ImageFlagBits::Rescaled;
return false;
@ -996,7 +1013,11 @@ bool Image::ScaleDown() {
if (!runtime->resolution.active) {
return false;
}
current_texture = texture.handle;
scale_count++;
if (!Scale(false)) {
flags &= ~ImageFlagBits::Rescaled;
return false;
}
return true;
}

View file

@ -205,7 +205,7 @@ private:
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
bool Scale();
bool Scale(bool up_scale = true);
OGLTexture texture;
OGLTexture upscaled_backup;

View file

@ -592,7 +592,8 @@ struct RangedBarrierRange {
}
void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info,
VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution) {
VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution,
bool up_scaling = true) {
const bool is_2d = info.type == ImageType::e2D;
const auto resources = info.resources;
const VkExtent2D extent{
@ -605,14 +606,16 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, is_2d,
vk_filter](vk::CommandBuffer cmdbuf) {
vk_filter, up_scaling](vk::CommandBuffer cmdbuf) {
const VkOffset2D src_size{
.x = static_cast<s32>(extent.width),
.y = static_cast<s32>(extent.height),
.x = static_cast<s32>(up_scaling ? extent.width : resolution.ScaleUp(extent.width)),
.y = static_cast<s32>(is_2d && up_scaling ? extent.height
: resolution.ScaleUp(extent.height)),
};
const VkOffset2D dst_size{
.x = static_cast<s32>(resolution.ScaleUp(extent.width)),
.y = static_cast<s32>(is_2d ? resolution.ScaleUp(extent.height) : extent.height),
.x = static_cast<s32>(up_scaling ? resolution.ScaleUp(extent.width) : extent.width),
.y = static_cast<s32>(is_2d && up_scaling ? resolution.ScaleUp(extent.height)
: extent.height),
};
boost::container::small_vector<VkImageBlit, 4> regions;
regions.reserve(resources.levels);
@ -1134,6 +1137,7 @@ bool Image::ScaleUp() {
if (!resolution.active) {
return false;
}
scale_count++;
const auto& device = runtime->device;
const bool is_2d = info.type == ImageType::e2D;
const u32 scaled_width = resolution.ScaleUp(info.size.width);
@ -1161,8 +1165,10 @@ bool Image::ScaleUp() {
using namespace VideoCommon;
static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy;
const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
scale_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
if (!scale_view) {
const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
scale_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
}
auto* view_ptr = scale_view.get();
const Region2D src_region{
@ -1178,7 +1184,10 @@ bool Image::ScaleUp() {
.height = scaled_height,
};
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
scale_framebuffer = std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
if (!scale_framebuffer) {
scale_framebuffer =
std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
}
const auto color_view = scale_view->Handle(Shader::TextureType::Color2D);
runtime->blit_image_helper.BlitColor(
@ -1186,7 +1195,10 @@ bool Image::ScaleUp() {
Tegra::Engines::Fermi2D::Filter::Bilinear, BLIT_OPERATION);
} else if (!runtime->device.IsBlitDepthStencilSupported() &&
aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
scale_framebuffer = std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent);
if (!scale_framebuffer) {
scale_framebuffer =
std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
}
runtime->blit_image_helper.BlitDepthStencil(
scale_framebuffer.get(), scale_view->DepthView(), scale_view->StencilView(),
dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION);
@ -1209,6 +1221,67 @@ bool Image::ScaleDown() {
if (!resolution.active) {
return false;
}
const auto& device = runtime->device;
const bool is_2d = info.type == ImageType::e2D;
const u32 scaled_width = resolution.ScaleUp(info.size.width);
const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
if (aspect_mask == 0) {
aspect_mask = ImageAspectMask(info.format);
}
static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
const PixelFormat format = StorageFormat(info.format);
const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false);
} else {
using namespace VideoCommon;
static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy;
if (!normal_view) {
const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
normal_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
}
auto* view_ptr = normal_view.get();
const Region2D src_region{
.start = {0, 0},
.end = {static_cast<s32>(scaled_width), static_cast<s32>(scaled_height)},
};
const Region2D dst_region{
.start = {0, 0},
.end = {static_cast<s32>(info.size.width), static_cast<s32>(info.size.height)},
};
const VkExtent2D extent{
.width = scaled_width,
.height = scaled_height,
};
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
if (!normal_framebuffer) {
normal_framebuffer =
std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
}
const auto color_view = normal_view->Handle(Shader::TextureType::Color2D);
runtime->blit_image_helper.BlitColor(
normal_framebuffer.get(), color_view, dst_region, src_region,
Tegra::Engines::Fermi2D::Filter::Bilinear, BLIT_OPERATION);
} else if (!runtime->device.IsBlitDepthStencilSupported() &&
aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
if (!normal_framebuffer) {
normal_framebuffer =
std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
}
runtime->blit_image_helper.BlitDepthStencil(
normal_framebuffer.get(), normal_view->DepthView(), normal_view->StencilView(),
dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION);
} else {
// TODO: Use helper blits where applicable
flags &= ~ImageFlagBits::Rescaled;
LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", format);
return false;
}
}
ASSERT(info.type != ImageType::Linear);
current_image = *original_image;
return true;

View file

@ -148,6 +148,9 @@ private:
std::unique_ptr<Framebuffer> scale_framebuffer;
std::unique_ptr<ImageView> scale_view;
std::unique_ptr<Framebuffer> normal_framebuffer;
std::unique_ptr<ImageView> normal_view;
};
class ImageView : public VideoCommon::ImageViewBase {

View file

@ -60,8 +60,8 @@ namespace {
ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_)
: info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)},
unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{},
scale_tick{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_},
converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, scale_tick{},
scale_count{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_},
cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} {
if (info.type == ImageType::e3D) {
slice_offsets = CalculateSliceOffsets(info);

View file

@ -77,6 +77,10 @@ struct ImageBase {
void CheckBadOverlapState();
void CheckAliasState();
bool HasScaled() {
return scale_count > 0;
}
ImageInfo info;
u32 guest_size_bytes = 0;
@ -84,6 +88,7 @@ struct ImageBase {
u32 converted_size_bytes = 0;
u32 scale_rating = 0;
u64 scale_tick = 0;
u32 scale_count = 0;
ImageFlagBits flags = ImageFlagBits::CpuModified;
GPUVAddr gpu_addr = 0;

View file

@ -854,8 +854,8 @@ void TextureCache<P>::InvalidateScale(Image& image) {
}
template <class P>
u64 TextureCache<P>::GetScaledImageSizeBytes(Image& image) {
const f32 add_to_size = Settings::values.resolution_info.up_factor - 1.0f;
u64 TextureCache<P>::GetScaledImageSizeBytes(ImageBase& image) {
const f32 add_to_size = Settings::values.resolution_info.up_factor;
const bool sign = std::signbit(add_to_size);
const u32 image_size_bytes = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
const u64 tentative_size = image_size_bytes * static_cast<u64>(std::abs(add_to_size));
@ -865,11 +865,14 @@ u64 TextureCache<P>::GetScaledImageSizeBytes(Image& image) {
template <class P>
bool TextureCache<P>::ScaleUp(Image& image) {
const bool has_copy = image.HasScaled();
const bool rescaled = image.ScaleUp();
if (!rescaled) {
return false;
}
total_used_memory += GetScaledImageSizeBytes(image);
if (!has_copy) {
total_used_memory += GetScaledImageSizeBytes(image);
}
InvalidateScale(image);
return true;
}
@ -880,7 +883,10 @@ bool TextureCache<P>::ScaleDown(Image& image) {
if (!rescaled) {
return false;
}
total_used_memory -= GetScaledImageSizeBytes(image);
const bool has_copy = image.HasScaled();
if (!has_copy) {
total_used_memory -= GetScaledImageSizeBytes(image);
}
InvalidateScale(image);
return true;
}
@ -1391,13 +1397,6 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
"Trying to unregister an already registered image");
image.flags &= ~ImageFlagBits::Registered;
image.flags &= ~ImageFlagBits::BadOverlap;
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
if ((IsPixelFormatASTC(image.info.format) &&
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
True(image.flags & ImageFlagBits::Converted)) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory -= Common::AlignUp(tentative_size, 1024);
lru_cache.Free(image.lru_index);
const auto& clear_page_table =
[this, image_id](
@ -1478,6 +1477,16 @@ template <class P>
void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
ASSERT(False(image.flags & ImageFlagBits::Tracked));
image.flags |= ImageFlagBits::Tracked;
if (image.HasScaled()) {
total_used_memory -= GetScaledImageSizeBytes(image);
}
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
if ((IsPixelFormatASTC(image.info.format) &&
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
True(image.flags & ImageFlagBits::Converted)) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory -= Common::AlignUp(tentative_size, 1024);
if (False(image.flags & ImageFlagBits::Sparse)) {
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
return;

View file

@ -331,7 +331,7 @@ private:
void InvalidateScale(Image& image);
bool ScaleUp(Image& image);
bool ScaleDown(Image& image);
u64 GetScaledImageSizeBytes(Image& image);
u64 GetScaledImageSizeBytes(ImageBase& image);
Runtime& runtime;
VideoCore::RasterizerInterface& rasterizer;