From 96fd1348aea9d70cb502a94cbd0412be6edb0189 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 29 Dec 2023 09:50:04 +0100 Subject: [PATCH] GPU SMMU: Expand to 34 bits --- src/core/hle/service/nvdrv/core/nvmap.cpp | 38 +++++++++++++++++-- .../nvdrv/devices/nvhost_nvdec_common.cpp | 1 - src/video_core/gpu.cpp | 1 + src/video_core/host1x/codecs/h264.cpp | 9 ++--- src/video_core/host1x/codecs/vp8.cpp | 4 +- src/video_core/host1x/codecs/vp9.cpp | 6 +-- .../host1x/gpu_device_memory_manager.h | 2 +- src/video_core/host1x/host1x.cpp | 4 +- src/video_core/host1x/host1x.h | 20 ++++++++++ src/video_core/host1x/vic.cpp | 10 ++--- src/video_core/memory_manager.cpp | 18 +++++---- src/video_core/memory_manager.h | 2 + 12 files changed, 86 insertions(+), 29 deletions(-) diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp index e4168a37c..0b2ddd980 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.cpp +++ b/src/core/hle/service/nvdrv/core/nvmap.cpp @@ -80,6 +80,15 @@ void NvMap::UnmapHandle(Handle& handle_description) { handle_description.unmap_queue_entry.reset(); } + // Free and unmap the handle from Host1x GMMU + if (handle_description.pin_virt_address) { + host1x.GMMU().Unmap(static_cast(handle_description.pin_virt_address), + handle_description.aligned_size); + host1x.Allocator().Free(handle_description.pin_virt_address, + static_cast(handle_description.aligned_size)); + handle_description.pin_virt_address = 0; + } + // Free and unmap the handle from the SMMU auto& smmu = host1x.MemoryManager(); smmu.Unmap(handle_description.d_address, handle_description.aligned_size); @@ -141,6 +150,17 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are } std::scoped_lock lock(handle_description->mutex); + const auto map_low_area = [&] { + if (handle_description->pin_virt_address == 0) { + auto& gmmu_allocator = host1x.Allocator(); + auto& gmmu = host1x.GMMU(); + u32 address = + gmmu_allocator.Allocate(static_cast(handle_description->aligned_size)); + gmmu.Map(static_cast(address), handle_description->d_address, + handle_description->aligned_size); + handle_description->pin_virt_address = address; + } + }; if (!handle_description->pins) { // If we're in the unmap queue we can just remove ourselves and return since we're already // mapped @@ -152,6 +172,12 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are unmap_queue.erase(*handle_description->unmap_queue_entry); handle_description->unmap_queue_entry.reset(); + if (low_area_pin) { + map_low_area(); + handle_description->pins++; + return static_cast(handle_description->pin_virt_address); + } + handle_description->pins++; return handle_description->d_address; } @@ -162,10 +188,7 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are DAddr address{}; auto& smmu = host1x.MemoryManager(); auto* session = core.GetSession(session_id); - - auto allocate = std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1); - //: std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1); - while ((address = allocate(static_cast(handle_description->aligned_size))) == 0) { + while ((address = smmu.Allocate(handle_description->aligned_size)) == 0) { // Free handles until the allocation succeeds std::scoped_lock queueLock(unmap_queue_lock); if (auto freeHandleDesc{unmap_queue.front()}) { @@ -185,7 +208,14 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are session->smmu_id); } + if (low_area_pin) { + map_low_area(); + } + handle_description->pins++; + if (low_area_pin) { + return static_cast(handle_description->pin_virt_address); + } return handle_description->d_address; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 78bc5f3c4..0b6aa9993 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -95,7 +95,6 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span data, De offset += SliceVectors(data, fence_thresholds, params.fence_count, offset); auto& gpu = system.GPU(); - //auto& device_memory = system.Host1x().MemoryManager(); auto* session = core.GetSession(sessions[fd]); if (gpu.UseNvdec()) { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 5f780507b..6ad3b94f8 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -88,6 +88,7 @@ struct GPU::Impl { renderer = std::move(renderer_); rasterizer = renderer->ReadRasterizer(); host1x.MemoryManager().BindInterface(rasterizer); + host1x.GMMU().BindRasterizer(rasterizer); } /// Flush all current written commands into the host GPU for execution. diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp index 309a7f1d5..994591c8d 100644 --- a/src/video_core/host1x/codecs/h264.cpp +++ b/src/video_core/host1x/codecs/h264.cpp @@ -32,13 +32,12 @@ H264::~H264() = default; std::span H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, size_t* out_configuration_size, bool is_first_frame) { H264DecoderContext context; - host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context, - sizeof(H264DecoderContext)); + host1x.GMMU().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); const s64 frame_number = context.h264_parameter_set.frame_number.Value(); if (!is_first_frame && frame_number != 0) { frame.resize_destructive(context.stream_len); - host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); + host1x.GMMU().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); *out_configuration_size = 0; return frame; } @@ -159,8 +158,8 @@ std::span H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); *out_configuration_size = encoded_header.size(); - host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, - frame.data() + encoded_header.size(), context.stream_len); + host1x.GMMU().ReadBlock(state.frame_bitstream_offset, frame.data() + encoded_header.size(), + context.stream_len); return frame; } diff --git a/src/video_core/host1x/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp index ee6392ff9..be97e3b00 100644 --- a/src/video_core/host1x/codecs/vp8.cpp +++ b/src/video_core/host1x/codecs/vp8.cpp @@ -14,7 +14,7 @@ VP8::~VP8() = default; std::span VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { VP8PictureInfo info; - host1x.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo)); + host1x.GMMU().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo)); const bool is_key_frame = info.key_frame == 1u; const auto bitstream_size = static_cast(info.vld_buffer_size); @@ -45,7 +45,7 @@ std::span VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& frame[9] = static_cast(((info.frame_height >> 8) & 0x3f)); } const u64 bitstream_offset = state.frame_bitstream_offset; - host1x.MemoryManager().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size); + host1x.GMMU().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size); return frame; } diff --git a/src/video_core/host1x/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp index 306c3d0e8..e2ae1f76d 100644 --- a/src/video_core/host1x/codecs/vp9.cpp +++ b/src/video_core/host1x/codecs/vp9.cpp @@ -358,7 +358,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_ Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) { PictureInfo picture_info; - host1x.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); + host1x.GMMU().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); Vp9PictureInfo vp9_info = picture_info.Convert(); InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy); @@ -373,7 +373,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { EntropyProbs entropy; - host1x.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); + host1x.GMMU().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); entropy.Convert(dst); } @@ -383,7 +383,7 @@ Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters // gpu.SyncGuestHost(); epic, why? current_frame.info = GetVp9PictureInfo(state); current_frame.bit_stream.resize(current_frame.info.bitstream_size); - host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, + host1x.GMMU().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(), current_frame.info.bitstream_size); } diff --git a/src/video_core/host1x/gpu_device_memory_manager.h b/src/video_core/host1x/gpu_device_memory_manager.h index a406ce965..6c7858848 100644 --- a/src/video_core/host1x/gpu_device_memory_manager.h +++ b/src/video_core/host1x/gpu_device_memory_manager.h @@ -15,7 +15,7 @@ struct MaxwellDeviceMethods; struct MaxwellDeviceTraits { static constexpr bool supports_pinning = false; - static constexpr size_t device_virtual_bits = 32; + static constexpr size_t device_virtual_bits = 34; using DeviceInterface = typename VideoCore::RasterizerInterface; using DeviceMethods = typename MaxwellDeviceMethods; }; diff --git a/src/video_core/host1x/host1x.cpp b/src/video_core/host1x/host1x.cpp index d05bcaf26..b7f9a08cf 100644 --- a/src/video_core/host1x/host1x.cpp +++ b/src/video_core/host1x/host1x.cpp @@ -9,7 +9,9 @@ namespace Tegra { namespace Host1x { Host1x::Host1x(Core::System& system_) - : system{system_}, syncpoint_manager{}, memory_manager(system.DeviceMemory()) {} + : system{system_}, syncpoint_manager{}, + memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12}, + allocator{std::make_unique>(1 << 12)} {} } // namespace Host1x diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h index 18f7389f6..13c37e6b4 100644 --- a/src/video_core/host1x/host1x.h +++ b/src/video_core/host1x/host1x.h @@ -5,8 +5,10 @@ #include "common/common_types.h" +#include "common/address_space.h" #include "video_core/host1x/gpu_device_memory_manager.h" #include "video_core/host1x/syncpoint_manager.h" +#include "video_core/memory_manager.h" namespace Core { class System; @@ -36,10 +38,28 @@ public: return memory_manager; } + Tegra::MemoryManager& GMMU() { + return gmmu_manager; + } + + const Tegra::MemoryManager& GMMU() const { + return gmmu_manager; + } + + Common::FlatAllocator& Allocator() { + return *allocator; + } + + const Common::FlatAllocator& Allocator() const { + return *allocator; + } + private: Core::System& system; SyncpointManager syncpoint_manager; Tegra::MaxwellDeviceMemoryManager memory_manager; + Tegra::MemoryManager gmmu_manager; + std::unique_ptr> allocator; }; } // namespace Host1x diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index 2a5eba415..1826211a1 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp @@ -81,7 +81,7 @@ void Vic::Execute() { LOG_ERROR(Service_NVDRV, "VIC Luma address not set."); return; } - const VicConfig config{host1x.MemoryManager().Read(config_struct_address + 0x20)}; + const VicConfig config{host1x.GMMU().Read(config_struct_address + 0x20)}; auto frame = nvdec_processor->GetFrame(); if (!frame) { return; @@ -162,11 +162,11 @@ void Vic::WriteRGBFrame(std::unique_ptr frame, const VicConfig& c Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, 0, 0, width, height, block_height, 0, width * 4); - host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); + host1x.GMMU().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); } else { // send pitch linear frame const size_t linear_size = width * height * 4; - host1x.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, + host1x.GMMU().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, linear_size); } } @@ -193,7 +193,7 @@ void Vic::WriteYUVFrame(std::unique_ptr frame, const VicConfig& c const std::size_t dst = y * aligned_width; std::memcpy(luma_buffer.data() + dst, luma_src + src, frame_width); } - host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), + host1x.GMMU().WriteBlock(output_surface_luma_address, luma_buffer.data(), luma_buffer.size()); // Chroma @@ -233,7 +233,7 @@ void Vic::WriteYUVFrame(std::unique_ptr frame, const VicConfig& c ASSERT(false); break; } - host1x.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(), + host1x.GMMU().WriteBlock(output_surface_chroma_address, chroma_buffer.data(), chroma_buffer.size()); } diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 82f7a1c3b..ac1417fbc 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -16,18 +16,17 @@ #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" - namespace Tegra { using Tegra::Memory::GuestMemoryFlags; std::atomic MemoryManager::unique_identifier_generator{}; -MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, - u64 page_bits_) - : system{system_}, memory{system.Host1x().MemoryManager()}, - address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, - entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, - page_bits != big_page_bits ? page_bits : 0}, +MemoryManager::MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_, + u64 address_space_bits_, u64 big_page_bits_, u64 page_bits_) + : system{system_}, memory{memory_}, address_space_bits{address_space_bits_}, + page_bits{page_bits_}, big_page_bits{big_page_bits_}, entries{}, big_entries{}, + page_table{address_space_bits, address_space_bits + page_bits - 38, + page_bits != big_page_bits ? page_bits : 0}, kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( 1, std::memory_order_acq_rel)}, accumulator{std::make_unique()} { @@ -49,6 +48,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 entries.resize(page_table_size / 32, 0); } +MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, + u64 page_bits_) + : MemoryManager(system_, system_.Host1x().MemoryManager(), address_space_bits_, big_page_bits_, + page_bits_) {} + MemoryManager::~MemoryManager() = default; template diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index e2912a73f..6b2cd7efb 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -38,6 +38,8 @@ class MemoryManager final { public: explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40, u64 big_page_bits_ = 16, u64 page_bits_ = 12); + explicit MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_, u64 address_space_bits_ = 40, + u64 big_page_bits_ = 16, u64 page_bits_ = 12); ~MemoryManager(); size_t GetID() const {