From b8c906f9d1acbd59c264bd3ad335c6d75f92da5c Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 24 Jun 2023 19:58:45 -0400 Subject: [PATCH 1/5] scratch_buffer: Add member types to ScratchBuffer Allows for implicit conversion to std::span. --- src/common/scratch_buffer.h | 46 +++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/src/common/scratch_buffer.h b/src/common/scratch_buffer.h index 6fe907953..d5961b020 100644 --- a/src/common/scratch_buffer.h +++ b/src/common/scratch_buffer.h @@ -5,7 +5,6 @@ #include -#include "common/concepts.h" #include "common/make_unique_for_overwrite.h" namespace Common { @@ -19,15 +18,22 @@ namespace Common { template class ScratchBuffer { public: - using iterator = T*; - using const_iterator = const T*; - using value_type = T; using element_type = T; - using iterator_category = std::contiguous_iterator_tag; + using value_type = T; + using size_type = size_t; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using const_pointer = const T*; + using reference = T&; + using const_reference = const T&; + using iterator = pointer; + using const_iterator = const_pointer; + using iterator_category = std::random_access_iterator_tag; + using iterator_concept = std::contiguous_iterator_tag; ScratchBuffer() = default; - explicit ScratchBuffer(size_t initial_capacity) + explicit ScratchBuffer(size_type initial_capacity) : last_requested_size{initial_capacity}, buffer_capacity{initial_capacity}, buffer{Common::make_unique_for_overwrite(initial_capacity)} {} @@ -39,7 +45,7 @@ public: /// This will only grow the buffer's capacity if size is greater than the current capacity. /// The previously held data will remain intact. - void resize(size_t size) { + void resize(size_type size) { if (size > buffer_capacity) { auto new_buffer = Common::make_unique_for_overwrite(size); std::move(buffer.get(), buffer.get() + buffer_capacity, new_buffer.get()); @@ -51,7 +57,7 @@ public: /// This will only grow the buffer's capacity if size is greater than the current capacity. /// The previously held data will be destroyed if a reallocation occurs. - void resize_destructive(size_t size) { + void resize_destructive(size_type size) { if (size > buffer_capacity) { buffer_capacity = size; buffer = Common::make_unique_for_overwrite(buffer_capacity); @@ -59,43 +65,43 @@ public: last_requested_size = size; } - [[nodiscard]] T* data() noexcept { + [[nodiscard]] pointer data() noexcept { return buffer.get(); } - [[nodiscard]] const T* data() const noexcept { + [[nodiscard]] const_pointer data() const noexcept { return buffer.get(); } - [[nodiscard]] T* begin() noexcept { + [[nodiscard]] iterator begin() noexcept { return data(); } - [[nodiscard]] const T* begin() const noexcept { + [[nodiscard]] const_iterator begin() const noexcept { return data(); } - [[nodiscard]] T* end() noexcept { + [[nodiscard]] iterator end() noexcept { return data() + last_requested_size; } - [[nodiscard]] const T* end() const noexcept { + [[nodiscard]] const_iterator end() const noexcept { return data() + last_requested_size; } - [[nodiscard]] T& operator[](size_t i) { + [[nodiscard]] reference operator[](size_type i) { return buffer[i]; } - [[nodiscard]] const T& operator[](size_t i) const { + [[nodiscard]] const_reference operator[](size_type i) const { return buffer[i]; } - [[nodiscard]] size_t size() const noexcept { + [[nodiscard]] size_type size() const noexcept { return last_requested_size; } - [[nodiscard]] size_t capacity() const noexcept { + [[nodiscard]] size_type capacity() const noexcept { return buffer_capacity; } @@ -106,8 +112,8 @@ public: } private: - size_t last_requested_size{}; - size_t buffer_capacity{}; + size_type last_requested_size{}; + size_type buffer_capacity{}; std::unique_ptr buffer{}; }; From fbd85417ffdfe23dd4b4d3d13518244bd00be361 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 24 Jun 2023 20:31:20 -0400 Subject: [PATCH 2/5] ring_buffer: Fix const usage on std::span --- src/common/ring_buffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/ring_buffer.h b/src/common/ring_buffer.h index 416680d44..5c961b202 100644 --- a/src/common/ring_buffer.h +++ b/src/common/ring_buffer.h @@ -54,7 +54,7 @@ public: return push_count; } - std::size_t Push(const std::span input) { + std::size_t Push(std::span input) { return Push(input.data(), input.size()); } From 310b6cf4af940fa07666400426bbcca815c5375c Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 24 Jun 2023 21:58:23 -0400 Subject: [PATCH 3/5] general: Use ScratchBuffer where possible --- src/core/hle/service/audio/audin_u.cpp | 16 ++++++------ src/core/hle/service/audio/audout_u.cpp | 20 ++++++++------- src/core/hle/service/audio/audren_u.cpp | 23 +++++++++-------- src/core/hle/service/audio/hwopus.cpp | 9 ++++--- .../hle/service/nvdrv/nvdrv_interface.cpp | 25 ++++++++++--------- src/core/hle/service/nvdrv/nvdrv_interface.h | 5 ++-- src/video_core/host1x/codecs/codec.cpp | 2 +- src/video_core/host1x/codecs/h264.cpp | 14 +++++------ src/video_core/host1x/codecs/h264.h | 12 ++++++--- src/video_core/host1x/codecs/vp8.cpp | 2 +- src/video_core/host1x/codecs/vp8.h | 7 +++--- src/video_core/host1x/codecs/vp9.cpp | 1 + src/video_core/host1x/codecs/vp9.h | 8 +++--- src/video_core/host1x/codecs/vp9_types.h | 1 + 14 files changed, 81 insertions(+), 64 deletions(-) diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp index c8d574993..526a39130 100644 --- a/src/core/hle/service/audio/audin_u.cpp +++ b/src/core/hle/service/audio/audin_u.cpp @@ -5,7 +5,7 @@ #include "audio_core/renderer/audio_device.h" #include "common/common_funcs.h" #include "common/logging/log.h" -#include "common/settings.h" +#include "common/scratch_buffer.h" #include "common/string_util.h" #include "core/core.h" #include "core/hle/kernel/k_event.h" @@ -124,12 +124,15 @@ private: void GetReleasedAudioInBuffer(HLERequestContext& ctx) { const auto write_buffer_size = ctx.GetWriteBufferNumElements(); - tmp_buffer.resize_destructive(write_buffer_size); - tmp_buffer[0] = 0; + released_buffer.resize_destructive(write_buffer_size); + released_buffer[0] = 0; - const auto count = impl->GetReleasedBuffers(tmp_buffer); + const auto count = impl->GetReleasedBuffers(released_buffer); - ctx.WriteBuffer(tmp_buffer); + LOG_TRACE(Service_Audio, "called. Session {} released {} buffers", + impl->GetSystem().GetSessionId(), count); + + ctx.WriteBuffer(released_buffer); IPC::ResponseBuilder rb{ctx, 3}; rb.Push(ResultSuccess); @@ -155,7 +158,6 @@ private: LOG_DEBUG(Service_Audio, "called. Buffer count={}", buffer_count); IPC::ResponseBuilder rb{ctx, 3}; - rb.Push(ResultSuccess); rb.Push(buffer_count); } @@ -195,7 +197,7 @@ private: KernelHelpers::ServiceContext service_context; Kernel::KEvent* event; std::shared_ptr impl; - Common::ScratchBuffer tmp_buffer; + Common::ScratchBuffer released_buffer; }; AudInU::AudInU(Core::System& system_) diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 032c8c11f..23f84a29f 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp @@ -9,6 +9,7 @@ #include "audio_core/renderer/audio_device.h" #include "common/common_funcs.h" #include "common/logging/log.h" +#include "common/scratch_buffer.h" #include "common/string_util.h" #include "common/swap.h" #include "core/core.h" @@ -102,8 +103,8 @@ private: AudioOutBuffer buffer{}; std::memcpy(&buffer, in_buffer.data(), sizeof(AudioOutBuffer)); - [[maybe_unused]] auto sessionid{impl->GetSystem().GetSessionId()}; - LOG_TRACE(Service_Audio, "called. Session {} Appending buffer {:08X}", sessionid, tag); + LOG_TRACE(Service_Audio, "called. Session {} Appending buffer {:08X}", + impl->GetSystem().GetSessionId(), tag); auto result = impl->AppendBuffer(buffer, tag); @@ -123,12 +124,15 @@ private: void GetReleasedAudioOutBuffers(HLERequestContext& ctx) { const auto write_buffer_size = ctx.GetWriteBufferNumElements(); - tmp_buffer.resize_destructive(write_buffer_size); - tmp_buffer[0] = 0; + released_buffer.resize_destructive(write_buffer_size); + released_buffer[0] = 0; - const auto count = impl->GetReleasedBuffers(tmp_buffer); + const auto count = impl->GetReleasedBuffers(released_buffer); - ctx.WriteBuffer(tmp_buffer); + ctx.WriteBuffer(released_buffer); + + LOG_TRACE(Service_Audio, "called. Session {} released {} buffers", + impl->GetSystem().GetSessionId(), count); IPC::ResponseBuilder rb{ctx, 3}; rb.Push(ResultSuccess); @@ -154,7 +158,6 @@ private: LOG_DEBUG(Service_Audio, "called. Buffer count={}", buffer_count); IPC::ResponseBuilder rb{ctx, 3}; - rb.Push(ResultSuccess); rb.Push(buffer_count); } @@ -165,7 +168,6 @@ private: LOG_DEBUG(Service_Audio, "called. Played samples={}", samples_played); IPC::ResponseBuilder rb{ctx, 4}; - rb.Push(ResultSuccess); rb.Push(samples_played); } @@ -205,7 +207,7 @@ private: KernelHelpers::ServiceContext service_context; Kernel::KEvent* event; std::shared_ptr impl; - Common::ScratchBuffer tmp_buffer; + Common::ScratchBuffer released_buffer; }; AudOutU::AudOutU(Core::System& system_) diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 12845c23a..003870176 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -15,6 +15,7 @@ #include "common/common_funcs.h" #include "common/logging/log.h" #include "common/polyfill_ranges.h" +#include "common/scratch_buffer.h" #include "common/string_util.h" #include "core/core.h" #include "core/hle/kernel/k_event.h" @@ -119,23 +120,23 @@ private: auto is_buffer_b{ctx.BufferDescriptorB()[0].Size() != 0}; if (is_buffer_b) { const auto buffersB{ctx.BufferDescriptorB()}; - tmp_output.resize_destructive(buffersB[0].Size()); - tmp_performance.resize_destructive(buffersB[1].Size()); + output_buffer.resize_destructive(buffersB[0].Size()); + performance_buffer.resize_destructive(buffersB[1].Size()); } else { const auto buffersC{ctx.BufferDescriptorC()}; - tmp_output.resize_destructive(buffersC[0].Size()); - tmp_performance.resize_destructive(buffersC[1].Size()); + output_buffer.resize_destructive(buffersC[0].Size()); + performance_buffer.resize_destructive(buffersC[1].Size()); } - auto result = impl->RequestUpdate(input, tmp_performance, tmp_output); + auto result = impl->RequestUpdate(input, performance_buffer, output_buffer); if (result.IsSuccess()) { if (is_buffer_b) { - ctx.WriteBufferB(tmp_output.data(), tmp_output.size(), 0); - ctx.WriteBufferB(tmp_performance.data(), tmp_performance.size(), 1); + ctx.WriteBufferB(output_buffer.data(), output_buffer.size(), 0); + ctx.WriteBufferB(performance_buffer.data(), performance_buffer.size(), 1); } else { - ctx.WriteBufferC(tmp_output.data(), tmp_output.size(), 0); - ctx.WriteBufferC(tmp_performance.data(), tmp_performance.size(), 1); + ctx.WriteBufferC(output_buffer.data(), output_buffer.size(), 0); + ctx.WriteBufferC(performance_buffer.data(), performance_buffer.size(), 1); } } else { LOG_ERROR(Service_Audio, "RequestUpdate failed error 0x{:02X}!", result.description); @@ -233,8 +234,8 @@ private: Kernel::KEvent* rendered_event; Manager& manager; std::unique_ptr impl; - Common::ScratchBuffer tmp_output; - Common::ScratchBuffer tmp_performance; + Common::ScratchBuffer output_buffer; + Common::ScratchBuffer performance_buffer; }; class IAudioDevice final : public ServiceFramework { diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp index c835f6cb7..fa77007f3 100644 --- a/src/core/hle/service/audio/hwopus.cpp +++ b/src/core/hle/service/audio/hwopus.cpp @@ -11,6 +11,7 @@ #include "common/assert.h" #include "common/logging/log.h" +#include "common/scratch_buffer.h" #include "core/hle/service/audio/hwopus.h" #include "core/hle/service/ipc_helpers.h" @@ -68,13 +69,13 @@ private: ExtraBehavior extra_behavior) { u32 consumed = 0; u32 sample_count = 0; - tmp_samples.resize_destructive(ctx.GetWriteBufferNumElements()); + samples.resize_destructive(ctx.GetWriteBufferNumElements()); if (extra_behavior == ExtraBehavior::ResetContext) { ResetDecoderContext(); } - if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), tmp_samples, performance)) { + if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) { LOG_ERROR(Audio, "Failed to decode opus data"); IPC::ResponseBuilder rb{ctx, 2}; // TODO(ogniK): Use correct error code @@ -90,7 +91,7 @@ private: if (performance) { rb.Push(*performance); } - ctx.WriteBuffer(tmp_samples); + ctx.WriteBuffer(samples); } bool DecodeOpusData(u32& consumed, u32& sample_count, std::span input, @@ -154,7 +155,7 @@ private: OpusDecoderPtr decoder; u32 sample_rate; u32 channel_count; - Common::ScratchBuffer tmp_samples; + Common::ScratchBuffer samples; }; class IHardwareOpusDecoderManager final : public ServiceFramework { diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp index 348207e25..c8a880e84 100644 --- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp +++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: 2021 Skyline Team and Contributors // SPDX-License-Identifier: GPL-3.0-or-later -#include #include "common/logging/log.h" #include "core/core.h" #include "core/hle/kernel/k_event.h" @@ -63,12 +62,12 @@ void NVDRV::Ioctl1(HLERequestContext& ctx) { } // Check device - tmp_output.resize_destructive(ctx.GetWriteBufferSize(0)); + output_buffer.resize_destructive(ctx.GetWriteBufferSize(0)); const auto input_buffer = ctx.ReadBuffer(0); - const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, tmp_output); + const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer); if (command.is_out != 0) { - ctx.WriteBuffer(tmp_output); + ctx.WriteBuffer(output_buffer); } IPC::ResponseBuilder rb{ctx, 3}; @@ -90,12 +89,12 @@ void NVDRV::Ioctl2(HLERequestContext& ctx) { const auto input_buffer = ctx.ReadBuffer(0); const auto input_inlined_buffer = ctx.ReadBuffer(1); - tmp_output.resize_destructive(ctx.GetWriteBufferSize(0)); + output_buffer.resize_destructive(ctx.GetWriteBufferSize(0)); const auto nv_result = - nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, tmp_output); + nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer); if (command.is_out != 0) { - ctx.WriteBuffer(tmp_output); + ctx.WriteBuffer(output_buffer); } IPC::ResponseBuilder rb{ctx, 3}; @@ -116,12 +115,14 @@ void NVDRV::Ioctl3(HLERequestContext& ctx) { } const auto input_buffer = ctx.ReadBuffer(0); - tmp_output.resize_destructive(ctx.GetWriteBufferSize(0)); - tmp_output_inline.resize_destructive(ctx.GetWriteBufferSize(1)); - const auto nv_result = nvdrv->Ioctl3(fd, command, input_buffer, tmp_output, tmp_output_inline); + output_buffer.resize_destructive(ctx.GetWriteBufferSize(0)); + inline_output_buffer.resize_destructive(ctx.GetWriteBufferSize(1)); + + const auto nv_result = + nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, inline_output_buffer); if (command.is_out != 0) { - ctx.WriteBuffer(tmp_output, 0); - ctx.WriteBuffer(tmp_output_inline, 1); + ctx.WriteBuffer(output_buffer, 0); + ctx.WriteBuffer(inline_output_buffer, 1); } IPC::ResponseBuilder rb{ctx, 3}; diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.h b/src/core/hle/service/nvdrv/nvdrv_interface.h index 4b593ff90..6e98115dc 100644 --- a/src/core/hle/service/nvdrv/nvdrv_interface.h +++ b/src/core/hle/service/nvdrv/nvdrv_interface.h @@ -4,6 +4,7 @@ #pragma once #include + #include "common/scratch_buffer.h" #include "core/hle/service/nvdrv/nvdrv.h" #include "core/hle/service/service.h" @@ -34,8 +35,8 @@ private: u64 pid{}; bool is_initialized{}; - Common::ScratchBuffer tmp_output; - Common::ScratchBuffer tmp_output_inline; + Common::ScratchBuffer output_buffer; + Common::ScratchBuffer inline_output_buffer; }; } // namespace Service::Nvidia diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp index cd6a3a9b8..da07a556f 100644 --- a/src/video_core/host1x/codecs/codec.cpp +++ b/src/video_core/host1x/codecs/codec.cpp @@ -290,7 +290,7 @@ void Codec::Decode() { return vp9_decoder->GetFrameBytes(); default: ASSERT(false); - return std::vector{}; + return std::span{}; } }(); AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter}; diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp index ce827eb6c..862904e39 100644 --- a/src/video_core/host1x/codecs/h264.cpp +++ b/src/video_core/host1x/codecs/h264.cpp @@ -29,15 +29,15 @@ H264::H264(Host1x::Host1x& host1x_) : host1x{host1x_} {} H264::~H264() = default; -const std::vector& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, - bool is_first_frame) { +std::span H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, + bool is_first_frame) { H264DecoderContext context; host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); const s64 frame_number = context.h264_parameter_set.frame_number.Value(); if (!is_first_frame && frame_number != 0) { - frame.resize(context.stream_len); + frame.resize_destructive(context.stream_len); host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); return frame; } @@ -135,14 +135,14 @@ const std::vector& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegist for (s32 index = 0; index < 6; index++) { writer.WriteBit(true); std::span matrix{context.weight_scale}; - writer.WriteScalingList(matrix, index * 16, 16); + writer.WriteScalingList(scan, matrix, index * 16, 16); } if (context.h264_parameter_set.transform_8x8_mode_flag) { for (s32 index = 0; index < 2; index++) { writer.WriteBit(true); std::span matrix{context.weight_scale_8x8}; - writer.WriteScalingList(matrix, index * 64, 64); + writer.WriteScalingList(scan, matrix, index * 64, 64); } } @@ -188,8 +188,8 @@ void H264BitWriter::WriteBit(bool state) { WriteBits(state ? 1 : 0, 1); } -void H264BitWriter::WriteScalingList(std::span list, s32 start, s32 count) { - static Common::ScratchBuffer scan{}; +void H264BitWriter::WriteScalingList(Common::ScratchBuffer& scan, std::span list, + s32 start, s32 count) { scan.resize_destructive(count); if (count == 16) { std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); diff --git a/src/video_core/host1x/codecs/h264.h b/src/video_core/host1x/codecs/h264.h index 5cc86454e..d6b556322 100644 --- a/src/video_core/host1x/codecs/h264.h +++ b/src/video_core/host1x/codecs/h264.h @@ -5,9 +5,11 @@ #include #include + #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/scratch_buffer.h" #include "video_core/host1x/nvdec_common.h" namespace Tegra { @@ -37,7 +39,8 @@ public: /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification /// Writes the scaling matrices of the sream - void WriteScalingList(std::span list, s32 start, s32 count); + void WriteScalingList(Common::ScratchBuffer& scan, std::span list, s32 start, + s32 count); /// Return the bitstream as a vector. [[nodiscard]] std::vector& GetByteArray(); @@ -63,11 +66,12 @@ public: ~H264(); /// Compose the H264 frame for FFmpeg decoding - [[nodiscard]] const std::vector& ComposeFrame( - const Host1x::NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); + [[nodiscard]] std::span ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, + bool is_first_frame = false); private: - std::vector frame; + Common::ScratchBuffer frame; + Common::ScratchBuffer scan; Host1x::Host1x& host1x; struct H264ParameterSet { diff --git a/src/video_core/host1x/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp index 28fb12cb8..ee6392ff9 100644 --- a/src/video_core/host1x/codecs/vp8.cpp +++ b/src/video_core/host1x/codecs/vp8.cpp @@ -12,7 +12,7 @@ VP8::VP8(Host1x::Host1x& host1x_) : host1x{host1x_} {} VP8::~VP8() = default; -const std::vector& VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { +std::span VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { VP8PictureInfo info; host1x.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo)); diff --git a/src/video_core/host1x/codecs/vp8.h b/src/video_core/host1x/codecs/vp8.h index 5bf07ecab..7926b73f3 100644 --- a/src/video_core/host1x/codecs/vp8.h +++ b/src/video_core/host1x/codecs/vp8.h @@ -4,10 +4,11 @@ #pragma once #include -#include +#include #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/scratch_buffer.h" #include "video_core/host1x/nvdec_common.h" namespace Tegra { @@ -24,11 +25,11 @@ public: ~VP8(); /// Compose the VP8 frame for FFmpeg decoding - [[nodiscard]] const std::vector& ComposeFrame( + [[nodiscard]] std::span ComposeFrame( const Host1x::NvdecCommon::NvdecRegisters& state); private: - std::vector frame; + Common::ScratchBuffer frame; Host1x::Host1x& host1x; struct VP8PictureInfo { diff --git a/src/video_core/host1x/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp index cf40c9012..306c3d0e8 100644 --- a/src/video_core/host1x/codecs/vp9.cpp +++ b/src/video_core/host1x/codecs/vp9.cpp @@ -3,6 +3,7 @@ #include // for std::copy #include + #include "common/assert.h" #include "video_core/host1x/codecs/vp9.h" #include "video_core/host1x/host1x.h" diff --git a/src/video_core/host1x/codecs/vp9.h b/src/video_core/host1x/codecs/vp9.h index d4083e8d3..f1ed19508 100644 --- a/src/video_core/host1x/codecs/vp9.h +++ b/src/video_core/host1x/codecs/vp9.h @@ -4,9 +4,11 @@ #pragma once #include +#include #include #include "common/common_types.h" +#include "common/scratch_buffer.h" #include "common/stream.h" #include "video_core/host1x/codecs/vp9_types.h" #include "video_core/host1x/nvdec_common.h" @@ -128,8 +130,8 @@ public: return !current_frame_info.show_frame; } - /// Returns a const reference to the composed frame data. - [[nodiscard]] const std::vector& GetFrameBytes() const { + /// Returns a const span to the composed frame data. + [[nodiscard]] std::span GetFrameBytes() const { return frame; } @@ -181,7 +183,7 @@ private: [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader(); Host1x::Host1x& host1x; - std::vector frame; + Common::ScratchBuffer frame; std::array loop_filter_ref_deltas{}; std::array loop_filter_mode_deltas{}; diff --git a/src/video_core/host1x/codecs/vp9_types.h b/src/video_core/host1x/codecs/vp9_types.h index adad8ed7e..cc9b25690 100644 --- a/src/video_core/host1x/codecs/vp9_types.h +++ b/src/video_core/host1x/codecs/vp9_types.h @@ -5,6 +5,7 @@ #include #include + #include "common/common_funcs.h" #include "common/common_types.h" From 5a09fa50122af7c56ca3a05b18a1d2ab1e6b0e8b Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 24 Jun 2023 22:25:35 -0400 Subject: [PATCH 4/5] maxwell_dma: Specify dst_operand.pitch instead of a temp var --- src/video_core/engines/maxwell_dma.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index a290d6ea7..f8598fd98 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -174,8 +174,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { src_operand.address = regs.offset_in; DMA::BufferOperand dst_operand; - u32 abs_pitch_out = std::abs(static_cast(regs.pitch_out)); - dst_operand.pitch = abs_pitch_out; + dst_operand.pitch = static_cast(std::abs(regs.pitch_out)); dst_operand.width = regs.line_length_in; dst_operand.height = regs.line_count; dst_operand.address = regs.offset_out; @@ -222,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { const size_t src_size = CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); - const size_t dst_size = static_cast(abs_pitch_out) * regs.line_count; + const size_t dst_size = dst_operand.pitch * regs.line_count; read_buffer.resize_destructive(src_size); write_buffer.resize_destructive(dst_size); @@ -231,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, - abs_pitch_out); + dst_operand.pitch); memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } From 1a46823ec59b0ad4556e494c0488d5a8160aa19b Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Fri, 30 Jun 2023 21:50:44 -0400 Subject: [PATCH 5/5] parcel: Optimize small_vector sizes --- src/core/hle/service/nvnflinger/parcel.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/core/hle/service/nvnflinger/parcel.h b/src/core/hle/service/nvnflinger/parcel.h index 23ba315a0..e2c9bbd50 100644 --- a/src/core/hle/service/nvnflinger/parcel.h +++ b/src/core/hle/service/nvnflinger/parcel.h @@ -6,6 +6,7 @@ #include #include #include + #include #include "common/alignment.h" @@ -148,9 +149,9 @@ public: this->WriteImpl(0U, m_object_buffer); } - std::vector Serialize() const { - std::vector output_buffer(sizeof(ParcelHeader) + m_data_buffer.size() + - m_object_buffer.size()); + std::span Serialize() { + m_output_buffer.resize(sizeof(ParcelHeader) + m_data_buffer.size() + + m_object_buffer.size()); ParcelHeader header{}; header.data_size = static_cast(m_data_buffer.size()); @@ -158,17 +159,17 @@ public: header.objects_size = static_cast(m_object_buffer.size()); header.objects_offset = header.data_offset + header.data_size; - std::memcpy(output_buffer.data(), &header, sizeof(header)); - std::ranges::copy(m_data_buffer, output_buffer.data() + header.data_offset); - std::ranges::copy(m_object_buffer, output_buffer.data() + header.objects_offset); + std::memcpy(m_output_buffer.data(), &header, sizeof(ParcelHeader)); + std::ranges::copy(m_data_buffer, m_output_buffer.data() + header.data_offset); + std::ranges::copy(m_object_buffer, m_output_buffer.data() + header.objects_offset); - return output_buffer; + return m_output_buffer; } private: - template + template requires(std::is_trivially_copyable_v) - void WriteImpl(const T& val, boost::container::small_vector& buffer) { + void WriteImpl(const T& val, boost::container::small_vector& buffer) { const size_t aligned_size = Common::AlignUp(sizeof(T), 4); const size_t old_size = buffer.size(); buffer.resize(old_size + aligned_size); @@ -177,8 +178,9 @@ private: } private: - boost::container::small_vector m_data_buffer; - boost::container::small_vector m_object_buffer; + boost::container::small_vector m_data_buffer; + boost::container::small_vector m_object_buffer; + boost::container::small_vector m_output_buffer; }; } // namespace Service::android