VideoCore: implement channels on gpu caches.

This commit is contained in:
Fernando Sahmkow 2021-11-05 15:52:31 +01:00
parent c77b8df12e
commit 139ea93512
50 changed files with 1469 additions and 817 deletions

View file

@ -10,13 +10,17 @@
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
#include "core/hle/service/nvdrv/nvdrv.h"
#include "video_core/control/channel_state.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
namespace Service::Nvidia::Devices {
nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, NvCore::Container& core)
: nvdevice{system_}, container{core}, nvmap{core.GetNvMapFile()} {}
nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core)
: nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()},
gmmu{std::make_shared<Tegra::MemoryManager>(system)} {}
nvhost_as_gpu::~nvhost_as_gpu() = default;
NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@ -102,9 +106,9 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<
const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)};
if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) {
params.offset = *system.GPU().MemoryManager().AllocateFixed(params.offset, size);
params.offset = *(gmmu->AllocateFixed(params.offset, size));
} else {
params.offset = system.GPU().MemoryManager().Allocate(size, params.align);
params.offset = gmmu->Allocate(size, params.align);
}
auto result = NvResult::Success;
@ -124,8 +128,7 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>&
LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset,
params.pages, params.page_size);
system.GPU().MemoryManager().Unmap(params.offset,
static_cast<std::size_t>(params.pages) * params.page_size);
gmmu->Unmap(params.offset, static_cast<std::size_t>(params.pages) * params.page_size);
std::memcpy(output.data(), &params, output.size());
return NvResult::Success;
@ -148,7 +151,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
// If nvmap handle is null, we should unmap instead.
const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10};
const auto size{static_cast<u64>(entry.pages) << 0x10};
system.GPU().MemoryManager().Unmap(offset, size);
gmmu->Unmap(offset, size);
continue;
}
@ -162,8 +165,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10};
const auto size{static_cast<u64>(entry.pages) << 0x10};
const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10};
const auto addr{
system.GPU().MemoryManager().Map(object->address + map_offset, offset, size)};
const auto addr{gmmu->Map(object->address + map_offset, offset, size)};
if (!addr) {
LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!");
@ -186,13 +188,12 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size,
params.offset);
auto& gpu = system.GPU();
if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) {
if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) {
const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)};
const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)};
if (!gpu.MemoryManager().Map(cpu_addr, gpu_addr, params.mapping_size)) {
if (!gmmu->Map(cpu_addr, gpu_addr, params.mapping_size)) {
LOG_CRITICAL(Service_NVDRV,
"remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, "
"mapping_size = {}, offset={}",
@ -238,9 +239,9 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None};
if (is_alloc) {
params.offset = gpu.MemoryManager().MapAllocate(physical_address, size, page_size);
params.offset = gmmu->MapAllocate(physical_address, size, page_size);
} else {
params.offset = gpu.MemoryManager().Map(physical_address, params.offset, size);
params.offset = gmmu->Map(physical_address, params.offset, size);
}
auto result = NvResult::Success;
@ -262,7 +263,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset);
if (const auto size{RemoveBufferMap(params.offset)}; size) {
system.GPU().MemoryManager().Unmap(params.offset, *size);
gmmu->Unmap(params.offset, *size);
} else {
LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset);
}
@ -274,9 +275,10 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlBindChannel params{};
std::memcpy(&params, input.data(), input.size());
LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}", params.fd);
LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);
channel = params.fd;
auto gpu_channel_device = module.GetDevice<nvhost_gpu>(params.fd);
gpu_channel_device->channel_state->memory_manager = gmmu;
return NvResult::Success;
}

View file

@ -13,6 +13,14 @@
#include "common/swap.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h"
namespace Tegra {
class MemoryManager;
} // namespace Tegra
namespace Service::Nvidia {
class Module;
}
namespace Service::Nvidia::NvCore {
class Container;
class NvMap;
@ -34,7 +42,7 @@ DECLARE_ENUM_FLAG_OPERATORS(AddressSpaceFlags);
class nvhost_as_gpu final : public nvdevice {
public:
explicit nvhost_as_gpu(Core::System& system_, NvCore::Container& core);
explicit nvhost_as_gpu(Core::System& system_, Module& module, NvCore::Container& core);
~nvhost_as_gpu() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@ -187,9 +195,13 @@ private:
void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);
Module& module;
NvCore::Container& container;
NvCore::NvMap& nvmap;
std::shared_ptr<Tegra::MemoryManager> gmmu;
// This is expected to be ordered, therefore we must use a map, not unordered_map
std::map<GPUVAddr, BufferMap> buffer_mappings;
};

View file

@ -11,12 +11,14 @@
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
#include "core/hle/service/nvdrv/nvdrv.h"
#include "core/memory.h"
#include "video_core/control/channel_state.h"
#include "video_core/engines/puller.h"
#include "video_core/gpu.h"
namespace Service::Nvidia::Devices {
namespace {
Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoint_id) {
Tegra::GPU::FenceAction result{};
Tegra::CommandHeader BuildFenceAction(Tegra::Engines::Puller::FenceOperation op, u32 syncpoint_id) {
Tegra::Engines::Puller::FenceAction result{};
result.op.Assign(op);
result.syncpoint_id.Assign(syncpoint_id);
return {result.raw};
@ -26,7 +28,8 @@ Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoi
nvhost_gpu::nvhost_gpu(Core::System& system_, EventInterface& events_interface_,
NvCore::Container& core_)
: nvdevice{system_}, events_interface{events_interface_}, core{core_},
syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()} {
syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()},
channel_state{system.GPU().AllocateChannel()} {
channel_fence.id = syncpoint_manager.AllocateSyncpoint();
channel_fence.value = system_.GPU().GetSyncpointValue(channel_fence.id);
sm_exception_breakpoint_int_report_event =
@ -180,6 +183,12 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8
params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
params.unk3);
if (channel_state->initiated) {
LOG_CRITICAL(Service_NVDRV, "Already allocated!");
return NvResult::AlreadyAllocated;
}
system.GPU().InitChannel(*channel_state);
channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
params.fence_out = channel_fence;
@ -206,7 +215,7 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
{fence.value},
Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
Tegra::SubmissionMode::Increasing),
BuildFenceAction(Tegra::GPU::FenceOperation::Acquire, fence.id),
BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Acquire, fence.id),
};
}
@ -220,7 +229,8 @@ static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence
for (u32 count = 0; count < add_increment; ++count) {
result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
Tegra::SubmissionMode::Increasing));
result.emplace_back(BuildFenceAction(Tegra::GPU::FenceOperation::Increment, fence.id));
result.emplace_back(
BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id));
}
return result;
@ -247,11 +257,13 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
auto& gpu = system.GPU();
const auto bind_id = channel_state->bind_id;
params.fence_out.id = channel_fence.id;
if (params.flags.add_wait.Value() &&
!syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) {
gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
}
if (params.flags.add_increment.Value() || params.flags.increment.Value()) {
@ -262,15 +274,15 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id);
}
gpu.PushGPUEntries(std::move(entries));
gpu.PushGPUEntries(bind_id, std::move(entries));
if (params.flags.add_increment.Value()) {
if (params.flags.suppress_wfi) {
gpu.PushGPUEntries(Tegra::CommandList{
BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())});
gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementCommandList(
params.fence_out, params.AddIncrementValue())});
} else {
gpu.PushGPUEntries(Tegra::CommandList{
BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())});
gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementWithWfiCommandList(
params.fence_out, params.AddIncrementValue())});
}
}

View file

@ -13,6 +13,12 @@
#include "core/hle/service/nvdrv/nvdata.h"
#include "video_core/dma_pusher.h"
namespace Tegra {
namespace Control {
struct ChannelState;
}
} // namespace Tegra
namespace Service::Nvidia {
namespace NvCore {
@ -26,6 +32,7 @@ class EventInterface;
namespace Service::Nvidia::Devices {
class nvhost_as_gpu;
class nvmap;
class nvhost_gpu final : public nvdevice {
public:
@ -46,6 +53,7 @@ public:
Kernel::KEvent* QueryEvent(u32 event_id) override;
private:
friend class nvhost_as_gpu;
enum class CtxObjects : u32_le {
Ctx2D = 0x902D,
Ctx3D = 0xB197,
@ -204,6 +212,7 @@ private:
NvCore::Container& core;
NvCore::SyncpointManager& syncpoint_manager;
NvCore::NvMap& nvmap;
std::shared_ptr<Tegra::Control::ChannelState> channel_state;
NvFence channel_fence;
// Events

View file

@ -168,7 +168,7 @@ NvResult nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output)
IocFromIdParams params;
std::memcpy(&params, input.data(), sizeof(params));
LOG_DEBUG(Service_NVDRV, "called, id:{}");
LOG_DEBUG(Service_NVDRV, "called, id:{}", params.id);
// Handles and IDs are always the same value in nvmap however IDs can be used globally given the
// right permissions.

View file

@ -74,7 +74,7 @@ Module::Module(Core::System& system)
: service_context{system, "nvdrv"}, events_interface{*this}, container{system.GPU()} {
builders["/dev/nvhost-as-gpu"] = [this, &system](DeviceFD fd) {
std::shared_ptr<Devices::nvdevice> device =
std::make_shared<Devices::nvhost_as_gpu>(system, container);
std::make_shared<Devices::nvhost_as_gpu>(system, *this, container);
return open_files.emplace(fd, device).first;
};
builders["/dev/nvhost-gpu"] = [this, &system](DeviceFD fd) {

View file

@ -35,6 +35,12 @@ add_library(video_core STATIC
command_classes/vic.h
compatible_formats.cpp
compatible_formats.h
control/channel_state.cpp
control/channel_state.h
control/channel_state_cache.cpp
control/channel_state_cache.h
control/scheduler.cpp
control/scheduler.h
delayed_destruction_ring.h
dirty_flags.cpp
dirty_flags.h
@ -54,6 +60,8 @@ add_library(video_core STATIC
engines/maxwell_3d.h
engines/maxwell_dma.cpp
engines/maxwell_dma.h
engines/puller.cpp
engines/puller.h
framebuffer_config.h
macro/macro.cpp
macro/macro.h

View file

@ -5,7 +5,6 @@
#include <algorithm>
#include <array>
#include <deque>
#include <memory>
#include <mutex>
#include <numeric>
@ -23,6 +22,7 @@
#include "common/settings.h"
#include "core/memory.h"
#include "video_core/buffer_cache/buffer_base.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
@ -56,7 +56,7 @@ using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFE
using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
template <typename P>
class BufferCache {
class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
// Page size for caching purposes.
// This is unrelated to the CPU page size and it can be changed as it seems optimal.
@ -116,10 +116,7 @@ public:
static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
Runtime& runtime_);
Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
void TickFrame();
@ -367,9 +364,6 @@ private:
void ClearDownload(IntervalType subtract_interval);
VideoCore::RasterizerInterface& rasterizer;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
Core::Memory::Memory& cpu_memory;
SlotVector<Buffer> slot_buffers;
@ -444,12 +438,8 @@ private:
template <class P>
BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
Runtime& runtime_)
: runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
Core::Memory::Memory& cpu_memory_, Runtime& runtime_)
: runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} {
// Ensure the first slot is used for the null buffer
void(slot_buffers.insert(runtime, NullBufferParams{}));
common_ranges.clear();
@ -552,8 +542,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
template <class P>
bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address);
const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address);
const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address);
const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address);
if (!cpu_src_address || !cpu_dest_address) {
return false;
}
@ -611,7 +601,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
template <class P>
bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address);
const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address);
if (!cpu_dst_address) {
return false;
}
@ -635,7 +625,7 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
template <class P>
void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
u32 size) {
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
const Binding binding{
.cpu_addr = *cpu_addr,
.size = size,
@ -673,7 +663,7 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
if (is_indexed) {
BindHostIndexBuffer();
} else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
const auto& regs = maxwell3d.regs;
const auto& regs = maxwell3d->regs;
if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count);
}
@ -733,7 +723,7 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
enabled_storage_buffers[stage] |= 1U << ssbo_index;
written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
const auto& cbufs = maxwell3d.state.shader_stages[stage];
const auto& cbufs = maxwell3d->state.shader_stages[stage];
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr);
}
@ -770,7 +760,7 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
enabled_compute_storage_buffers |= 1U << ssbo_index;
written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
const auto& launch_desc = kepler_compute.launch_description;
const auto& launch_desc = kepler_compute->launch_description;
ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
const auto& cbufs = launch_desc.const_buffer_config;
@ -991,19 +981,19 @@ void BufferCache<P>::BindHostIndexBuffer() {
const u32 size = index_buffer.size;
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
const u32 new_offset = offset + maxwell3d.regs.index_array.first *
maxwell3d.regs.index_array.FormatSizeInBytes();
const u32 new_offset = offset + maxwell3d->regs.index_array.first *
maxwell3d->regs.index_array.FormatSizeInBytes();
runtime.BindIndexBuffer(buffer, new_offset, size);
} else {
runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
buffer, offset, size);
runtime.BindIndexBuffer(maxwell3d->regs.draw.topology, maxwell3d->regs.index_array.format,
maxwell3d->regs.index_array.first,
maxwell3d->regs.index_array.count, buffer, offset, size);
}
}
template <class P>
void BufferCache<P>::BindHostVertexBuffers() {
auto& flags = maxwell3d.dirty.flags;
auto& flags = maxwell3d->dirty.flags;
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
const Binding& binding = vertex_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
@ -1014,7 +1004,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
}
flags[Dirty::VertexBuffer0 + index] = false;
const u32 stride = maxwell3d.regs.vertex_array[index].stride;
const u32 stride = maxwell3d->regs.vertex_array[index].stride;
const u32 offset = buffer.Offset(binding.cpu_addr);
runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
}
@ -1154,7 +1144,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
template <class P>
void BufferCache<P>::BindHostTransformFeedbackBuffers() {
if (maxwell3d.regs.tfb_enabled == 0) {
if (maxwell3d->regs.tfb_enabled == 0) {
return;
}
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@ -1262,8 +1252,8 @@ template <class P>
void BufferCache<P>::UpdateIndexBuffer() {
// We have to check for the dirty flags and index count
// The index count is currently changed without updating the dirty flags
const auto& index_array = maxwell3d.regs.index_array;
auto& flags = maxwell3d.dirty.flags;
const auto& index_array = maxwell3d->regs.index_array;
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
return;
}
@ -1272,7 +1262,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
const GPUVAddr gpu_addr_begin = index_array.StartAddress();
const GPUVAddr gpu_addr_end = index_array.EndAddress();
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes();
const u32 size = std::min(address_size, draw_size);
@ -1289,8 +1279,8 @@ void BufferCache<P>::UpdateIndexBuffer() {
template <class P>
void BufferCache<P>::UpdateVertexBuffers() {
auto& flags = maxwell3d.dirty.flags;
if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) {
auto& flags = maxwell3d->dirty.flags;
if (!maxwell3d->dirty.flags[Dirty::VertexBuffers]) {
return;
}
flags[Dirty::VertexBuffers] = false;
@ -1302,28 +1292,15 @@ void BufferCache<P>::UpdateVertexBuffers() {
template <class P>
void BufferCache<P>::UpdateVertexBuffer(u32 index) {
if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) {
if (!maxwell3d->dirty.flags[Dirty::VertexBuffer0 + index]) {
return;
}
const auto& array = maxwell3d.regs.vertex_array[index];
const auto& limit = maxwell3d.regs.vertex_array_limit[index];
const auto& array = maxwell3d->regs.vertex_array[index];
const auto& limit = maxwell3d->regs.vertex_array_limit[index];
const GPUVAddr gpu_addr_begin = array.StartAddress();
const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
if (address_size >= 64_MiB) {
// Reported vertex buffer size is very large, cap to mapped buffer size
GPUVAddr submapped_addr_end = gpu_addr_begin;
const auto ranges{gpu_memory.GetSubmappedRange(gpu_addr_begin, address_size)};
if (ranges.size() > 0) {
const auto& [addr, size] = *ranges.begin();
submapped_addr_end = addr + size;
}
address_size =
std::min(address_size, static_cast<u32>(submapped_addr_end - gpu_addr_begin));
}
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
const u32 size = address_size; // TODO: Analyze stride and number of vertices
if (array.enable == 0 || size == 0 || !cpu_addr) {
vertex_buffers[index] = NULL_BINDING;
@ -1382,7 +1359,7 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
template <class P>
void BufferCache<P>::UpdateTransformFeedbackBuffers() {
if (maxwell3d.regs.tfb_enabled == 0) {
if (maxwell3d->regs.tfb_enabled == 0) {
return;
}
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@ -1392,10 +1369,10 @@ void BufferCache<P>::UpdateTransformFeedbackBuffers() {
template <class P>
void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
const auto& binding = maxwell3d.regs.tfb_bindings[index];
const auto& binding = maxwell3d->regs.tfb_bindings[index];
const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset;
const u32 size = binding.buffer_size;
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) {
transform_feedback_buffers[index] = NULL_BINDING;
return;
@ -1414,10 +1391,10 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
Binding& binding = compute_uniform_buffers[index];
binding = NULL_BINDING;
const auto& launch_desc = kepler_compute.launch_description;
const auto& launch_desc = kepler_compute->launch_description;
if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
const auto& cbuf = launch_desc.const_buffer_config[index];
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address());
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address());
if (cpu_addr) {
binding.cpu_addr = *cpu_addr;
binding.size = cbuf.size;
@ -1831,7 +1808,7 @@ void BufferCache<P>::NotifyBufferDeletion() {
dirty_uniform_buffers.fill(~u32{0});
uniform_buffer_binding_sizes.fill({});
}
auto& flags = maxwell3d.dirty.flags;
auto& flags = maxwell3d->dirty.flags;
flags[Dirty::IndexBuffer] = true;
flags[Dirty::VertexBuffers] = true;
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
@ -1842,9 +1819,9 @@ void BufferCache<P>::NotifyBufferDeletion() {
template <class P>
typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const {
const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr);
const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8);
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (!cpu_addr || size == 0) {
return NULL_BINDING;
}
@ -1859,7 +1836,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
template <class P>
typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
GPUVAddr gpu_addr, u32 size, PixelFormat format) {
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
TextureBufferBinding binding;
if (!cpu_addr || size == 0) {
binding.cpu_addr = 0;

View file

@ -0,0 +1,44 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "video_core/control/channel_state.h"
#include "video_core/dma_pusher.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/engines/puller.h"
#include "video_core/memory_manager.h"
namespace Tegra::Control {
ChannelState::ChannelState(s32 bind_id_) {
bind_id = bind_id_;
initiated = false;
}
void ChannelState::Init(Core::System& system, GPU& gpu) {
ASSERT(memory_manager);
dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>();
kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
initiated = true;
}
void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
dma_pusher->BindRasterizer(rasterizer);
memory_manager->BindRasterizer(rasterizer);
maxwell_3d->BindRasterizer(rasterizer);
fermi_2d->BindRasterizer(rasterizer);
kepler_memory->BindRasterizer(rasterizer);
kepler_compute->BindRasterizer(rasterizer);
maxwell_dma->BindRasterizer(rasterizer);
}
} // namespace Tegra::Control

View file

@ -0,0 +1,69 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "common/common_types.h"
namespace Core {
class System;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra {
class GPU;
namespace Engines {
class Puller;
class Fermi2D;
class Maxwell3D;
class MaxwellDMA;
class KeplerCompute;
class KeplerMemory;
} // namespace Engines
class MemoryManager;
class DmaPusher;
namespace Control {
struct ChannelState {
ChannelState(s32 bind_id);
ChannelState(const ChannelState& state) = delete;
ChannelState& operator=(const ChannelState&) = delete;
ChannelState(ChannelState&& other) noexcept = default;
ChannelState& operator=(ChannelState&& other) noexcept = default;
void Init(Core::System& system, GPU& gpu);
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
s32 bind_id = -1;
/// 3D engine
std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
/// 2D engine
std::unique_ptr<Engines::Fermi2D> fermi_2d;
/// Compute engine
std::unique_ptr<Engines::KeplerCompute> kepler_compute;
/// DMA engine
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
/// Inline memory engine
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
std::shared_ptr<MemoryManager> memory_manager;
std::unique_ptr<DmaPusher> dma_pusher;
bool initiated{};
};
} // namespace Control
} // namespace Tegra

View file

@ -0,0 +1,5 @@
#include "video_core/control/channel_state_cache.inc"
namespace VideoCommon {
template class VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>;
}

View file

@ -0,0 +1,68 @@
#pragma once
#include <deque>
#include <limits>
#include <unordered_map>
#include "common/common_types.h"
namespace Tegra {
namespace Engines {
class Maxwell3D;
class KeplerCompute;
} // namespace Engines
class MemoryManager;
namespace Control {
struct ChannelState;
}
} // namespace Tegra
namespace VideoCommon {
class ChannelInfo {
public:
ChannelInfo() = delete;
ChannelInfo(Tegra::Control::ChannelState& state);
ChannelInfo(const ChannelInfo& state) = delete;
ChannelInfo& operator=(const ChannelInfo&) = delete;
ChannelInfo(ChannelInfo&& other) = default;
ChannelInfo& operator=(ChannelInfo&& other) = default;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
};
template <class P>
class ChannelSetupCaches {
public:
/// Operations for seting the channel of execution.
/// Create channel state.
void CreateChannel(Tegra::Control::ChannelState& channel);
/// Bind a channel for execution.
void BindToChannel(s32 id);
/// Erase channel's state.
void EraseChannel(s32 id);
protected:
static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
std::deque<P> channel_storage;
std::deque<size_t> free_channel_ids;
std::unordered_map<s32, size_t> channel_map;
P* channel_state;
size_t current_channel_id{UNSET_CHANNEL};
Tegra::Engines::Maxwell3D* maxwell3d;
Tegra::Engines::KeplerCompute* kepler_compute;
Tegra::MemoryManager* gpu_memory;
};
} // namespace VideoCommon

View file

@ -0,0 +1,64 @@
#include "video_core/control/channel_state.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
namespace VideoCommon {
ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state)
: maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute},
gpu_memory{*channel_state.memory_manager} {}
template <class P>
void ChannelSetupCaches<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
auto new_id = [this, &channel]() {
if (!free_channel_ids.empty()) {
auto id = free_channel_ids.front();
free_channel_ids.pop_front();
new (&channel_storage[id]) ChannelInfo(channel);
return id;
}
channel_storage.emplace_back(channel);
return channel_storage.size() - 1;
}();
channel_map.emplace(channel.bind_id, new_id);
if (current_channel_id != UNSET_CHANNEL) {
channel_state = &channel_storage[current_channel_id];
}
}
/// Bind a channel for execution.
template <class P>
void ChannelSetupCaches<P>::BindToChannel(s32 id) {
auto it = channel_map.find(id);
ASSERT(it != channel_map.end() && id >= 0);
current_channel_id = it->second;
channel_state = &channel_storage[current_channel_id];
maxwell3d = &channel_state->maxwell3d;
kepler_compute = &channel_state->kepler_compute;
gpu_memory = &channel_state->gpu_memory;
}
/// Erase channel's channel_state.
template <class P>
void ChannelSetupCaches<P>::EraseChannel(s32 id) {
const auto it = channel_map.find(id);
ASSERT(it != channel_map.end() && id >= 0);
const auto this_id = it->second;
free_channel_ids.push_back(this_id);
channel_map.erase(it);
if (this_id == current_channel_id) {
current_channel_id = UNSET_CHANNEL;
channel_state = nullptr;
maxwell3d = nullptr;
kepler_compute = nullptr;
gpu_memory = nullptr;
} else if (current_channel_id != UNSET_CHANNEL) {
channel_state = &channel_storage[current_channel_id];
}
}
} // namespace VideoCommon

View file

@ -0,0 +1,31 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include "video_core/control/channel_state.h"
#include "video_core/control/scheduler.h"
#include "video_core/gpu.h"
namespace Tegra::Control {
Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}
Scheduler::~Scheduler() = default;
void Scheduler::Push(s32 channel, CommandList&& entries) {
std::unique_lock<std::mutex> lk(scheduling_guard);
auto it = channels.find(channel);
auto channel_state = it->second;
gpu.BindChannel(channel_state->bind_id);
channel_state->dma_pusher->Push(std::move(entries));
channel_state->dma_pusher->DispatchCalls();
}
void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
s32 channel = new_channel->bind_id;
std::unique_lock<std::mutex> lk(scheduling_guard);
channels.emplace(channel, new_channel);
}
} // namespace Tegra::Control

View file

@ -0,0 +1,38 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <mutex>
#include <unordered_map>
#include "video_core/dma_pusher.h"
namespace Tegra {
class GPU;
namespace Control {
struct ChannelState;
class Scheduler {
public:
Scheduler(GPU& gpu_);
~Scheduler();
void Push(s32 channel, CommandList&& entries);
void DeclareChannel(std::shared_ptr<ChannelState> new_channel);
private:
std::unordered_map<s32, std::shared_ptr<ChannelState>> channels;
std::mutex scheduling_guard;
GPU& gpu;
};
} // namespace Control
} // namespace Tegra

View file

@ -12,7 +12,10 @@
namespace Tegra {
DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_) : gpu{gpu_}, system{system_} {}
DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
Control::ChannelState& channel_state_)
: gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_,
*this, channel_state_} {}
DmaPusher::~DmaPusher() = default;
@ -76,11 +79,11 @@ bool DmaPusher::Step() {
// Push buffer non-empty, read a word
command_headers.resize(command_list_header.size);
if (Settings::IsGPULevelHigh()) {
gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
memory_manager.ReadBlock(dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
} else {
gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
}
}
for (std::size_t index = 0; index < command_headers.size();) {
@ -154,7 +157,7 @@ void DmaPusher::SetState(const CommandHeader& command_header) {
void DmaPusher::CallMethod(u32 argument) const {
if (dma_state.method < non_puller_methods) {
gpu.CallMethod(GPU::MethodCall{
puller.CallPullerMethod(Engines::Puller::MethodCall{
dma_state.method,
argument,
dma_state.subchannel,
@ -168,12 +171,16 @@ void DmaPusher::CallMethod(u32 argument) const {
void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
if (dma_state.method < non_puller_methods) {
gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
dma_state.method_count);
puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
dma_state.method_count);
} else {
subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,
num_methods, dma_state.method_count);
}
}
void DmaPusher::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
puller.BindRasterizer(rasterizer);
}
} // namespace Tegra

View file

@ -10,6 +10,7 @@
#include "common/bit_field.h"
#include "common/common_types.h"
#include "video_core/engines/engine_interface.h"
#include "video_core/engines/puller.h"
namespace Core {
class System;
@ -17,7 +18,12 @@ class System;
namespace Tegra {
namespace Control {
struct ChannelState;
}
class GPU;
class MemoryManager;
enum class SubmissionMode : u32 {
IncreasingOld = 0,
@ -102,7 +108,8 @@ struct CommandList final {
*/
class DmaPusher final {
public:
explicit DmaPusher(Core::System& system_, GPU& gpu_);
explicit DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
Control::ChannelState& channel_state_);
~DmaPusher();
void Push(CommandList&& entries) {
@ -115,6 +122,8 @@ public:
subchannels[subchannel_id] = engine;
}
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
private:
static constexpr u32 non_puller_methods = 0x40;
static constexpr u32 max_subchannels = 8;
@ -148,6 +157,8 @@ private:
GPU& gpu;
Core::System& system;
MemoryManager& memory_manager;
mutable Engines::Puller puller;
};
} // namespace Tegra

View file

@ -0,0 +1,297 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "core/core.h"
#include "video_core/control/channel_state.h"
#include "video_core/dma_pusher.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/engines/puller.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
namespace Tegra::Engines {
Puller::Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher_,
Control::ChannelState& channel_state_)
: gpu{gpu_}, memory_manager{memory_manager_}, dma_pusher{dma_pusher_}, channel_state{
channel_state_} {}
Puller::~Puller() = default;
void Puller::ProcessBindMethod(const MethodCall& method_call) {
// Bind the current subchannel to the desired engine id.
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
method_call.argument);
const auto engine_id = static_cast<EngineID>(method_call.argument);
bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
switch (engine_id) {
case EngineID::FERMI_TWOD_A:
dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel);
break;
case EngineID::MAXWELL_B:
dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel);
break;
case EngineID::KEPLER_COMPUTE_B:
dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel);
break;
case EngineID::MAXWELL_DMA_COPY_A:
dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel);
break;
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
}
}
void Puller::ProcessFenceActionMethod() {
switch (regs.fence_action.op) {
case Puller::FenceOperation::Acquire:
// UNIMPLEMENTED_MSG("Channel Scheduling pending.");
// WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
break;
case Puller::FenceOperation::Increment:
rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
}
}
void Puller::ProcessWaitForInterruptMethod() {
// TODO(bunnei) ImplementMe
LOG_WARNING(HW_GPU, "(STUBBED) called");
}
void Puller::ProcessSemaphoreTriggerMethod() {
const auto semaphoreOperationMask = 0xF;
const auto op =
static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
if (op == GpuSemaphoreOperation::WriteLong) {
struct Block {
u32 sequence;
u32 zeros = 0;
u64 timestamp;
};
Block block{};
block.sequence = regs.semaphore_sequence;
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
block.timestamp = gpu.GetTicks();
memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block));
} else {
const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
(op == GpuSemaphoreOperation::AcquireGequal &&
static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
(op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
// Nothing to do in this case
} else {
regs.acquire_source = true;
regs.acquire_value = regs.semaphore_sequence;
if (op == GpuSemaphoreOperation::AcquireEqual) {
regs.acquire_active = true;
regs.acquire_mode = false;
} else if (op == GpuSemaphoreOperation::AcquireGequal) {
regs.acquire_active = true;
regs.acquire_mode = true;
} else if (op == GpuSemaphoreOperation::AcquireMask) {
// TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
// semaphore_sequence, gives a non-0 result
LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
} else {
LOG_ERROR(HW_GPU, "Invalid semaphore operation");
}
}
}
}
void Puller::ProcessSemaphoreRelease() {
memory_manager.Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
}
void Puller::ProcessSemaphoreAcquire() {
const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
const auto value = regs.semaphore_acquire;
if (word != value) {
regs.acquire_active = true;
regs.acquire_value = value;
// TODO(kemathe73) figure out how to do the acquire_timeout
regs.acquire_mode = false;
regs.acquire_source = false;
}
}
/// Calls a GPU puller method.
void Puller::CallPullerMethod(const MethodCall& method_call) {
regs.reg_array[method_call.method] = method_call.argument;
const auto method = static_cast<BufferMethods>(method_call.method);
switch (method) {
case BufferMethods::BindObject: {
ProcessBindMethod(method_call);
break;
}
case BufferMethods::Nop:
case BufferMethods::SemaphoreAddressHigh:
case BufferMethods::SemaphoreAddressLow:
case BufferMethods::SemaphoreSequence:
case BufferMethods::UnkCacheFlush:
case BufferMethods::WrcacheFlush:
case BufferMethods::FenceValue:
break;
case BufferMethods::RefCnt:
rasterizer->SignalReference();
break;
case BufferMethods::FenceAction:
ProcessFenceActionMethod();
break;
case BufferMethods::WaitForInterrupt:
ProcessWaitForInterruptMethod();
break;
case BufferMethods::SemaphoreTrigger: {
ProcessSemaphoreTriggerMethod();
break;
}
case BufferMethods::NotifyIntr: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
break;
}
case BufferMethods::Unk28: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
break;
}
case BufferMethods::SemaphoreAcquire: {
ProcessSemaphoreAcquire();
break;
}
case BufferMethods::SemaphoreRelease: {
ProcessSemaphoreRelease();
break;
}
case BufferMethods::Yield: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
break;
}
default:
LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
break;
}
}
/// Calls a GPU engine method.
void Puller::CallEngineMethod(const MethodCall& method_call) {
const EngineID engine = bound_engines[method_call.subchannel];
switch (engine) {
case EngineID::FERMI_TWOD_A:
channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument,
method_call.IsLastCall());
break;
case EngineID::MAXWELL_B:
channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument,
method_call.IsLastCall());
break;
case EngineID::KEPLER_COMPUTE_B:
channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument,
method_call.IsLastCall());
break;
case EngineID::MAXWELL_DMA_COPY_A:
channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument,
method_call.IsLastCall());
break;
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument,
method_call.IsLastCall());
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine");
}
}
/// Calls a GPU engine multivalue method.
void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
u32 methods_pending) {
const EngineID engine = bound_engines[subchannel];
switch (engine) {
case EngineID::FERMI_TWOD_A:
channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
break;
case EngineID::MAXWELL_B:
channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
break;
case EngineID::KEPLER_COMPUTE_B:
channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
break;
case EngineID::MAXWELL_DMA_COPY_A:
channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
break;
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine");
}
}
/// Calls a GPU method.
void Puller::CallMethod(const MethodCall& method_call) {
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
method_call.subchannel);
ASSERT(method_call.subchannel < bound_engines.size());
if (ExecuteMethodOnEngine(method_call.method)) {
CallEngineMethod(method_call);
} else {
CallPullerMethod(method_call);
}
}
/// Calls a GPU multivalue method.
void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
u32 methods_pending) {
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
ASSERT(subchannel < bound_engines.size());
if (ExecuteMethodOnEngine(method)) {
CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
} else {
for (std::size_t i = 0; i < amount; i++) {
CallPullerMethod(MethodCall{
method,
base_start[i],
subchannel,
methods_pending - static_cast<u32>(i),
});
}
}
}
void Puller::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
rasterizer = rasterizer_;
}
/// Determines where the method should be executed.
[[nodiscard]] bool Puller::ExecuteMethodOnEngine(u32 method) {
const auto buffer_method = static_cast<BufferMethods>(method);
return buffer_method >= BufferMethods::NonPullerMethods;
}
} // namespace Tegra::Engines

View file

@ -0,0 +1,179 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstddef>
#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/engines/engine_interface.h"
namespace Core {
class System;
}
namespace Tegra {
class MemoryManager;
class DmaPusher;
enum class EngineID {
FERMI_TWOD_A = 0x902D, // 2D Engine
MAXWELL_B = 0xB197, // 3D Engine
KEPLER_COMPUTE_B = 0xB1C0,
KEPLER_INLINE_TO_MEMORY_B = 0xA140,
MAXWELL_DMA_COPY_A = 0xB0B5,
};
namespace Control {
struct ChannelState;
}
} // namespace Tegra
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra::Engines {
class Puller final {
public:
struct MethodCall {
u32 method{};
u32 argument{};
u32 subchannel{};
u32 method_count{};
explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
: method(method_), argument(argument_), subchannel(subchannel_),
method_count(method_count_) {}
[[nodiscard]] bool IsLastCall() const {
return method_count <= 1;
}
};
enum class FenceOperation : u32 {
Acquire = 0,
Increment = 1,
};
union FenceAction {
u32 raw;
BitField<0, 1, FenceOperation> op;
BitField<8, 24, u32> syncpoint_id;
};
explicit Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher,
Control::ChannelState& channel_state);
~Puller();
void CallMethod(const MethodCall& method_call);
void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
u32 methods_pending);
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
void CallPullerMethod(const MethodCall& method_call);
void CallEngineMethod(const MethodCall& method_call);
void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
u32 methods_pending);
private:
Tegra::GPU& gpu;
MemoryManager& memory_manager;
DmaPusher& dma_pusher;
Control::ChannelState& channel_state;
VideoCore::RasterizerInterface* rasterizer = nullptr;
static constexpr std::size_t NUM_REGS = 0x800;
struct Regs {
static constexpr size_t NUM_REGS = 0x40;
union {
struct {
INSERT_PADDING_WORDS_NOINIT(0x4);
struct {
u32 address_high;
u32 address_low;
[[nodiscard]] GPUVAddr SemaphoreAddress() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} semaphore_address;
u32 semaphore_sequence;
u32 semaphore_trigger;
INSERT_PADDING_WORDS_NOINIT(0xC);
// The pusher and the puller share the reference counter, the pusher only has read
// access
u32 reference_count;
INSERT_PADDING_WORDS_NOINIT(0x5);
u32 semaphore_acquire;
u32 semaphore_release;
u32 fence_value;
FenceAction fence_action;
INSERT_PADDING_WORDS_NOINIT(0xE2);
// Puller state
u32 acquire_mode;
u32 acquire_source;
u32 acquire_active;
u32 acquire_timeout;
u32 acquire_value;
};
std::array<u32, NUM_REGS> reg_array;
};
} regs{};
void ProcessBindMethod(const MethodCall& method_call);
void ProcessFenceActionMethod();
void ProcessSemaphoreAcquire();
void ProcessSemaphoreRelease();
void ProcessSemaphoreTriggerMethod();
void ProcessWaitForInterruptMethod();
[[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
/// Mapping of command subchannels to their bound engine ids
std::array<EngineID, 8> bound_engines{};
enum class GpuSemaphoreOperation {
AcquireEqual = 0x1,
WriteLong = 0x2,
AcquireGequal = 0x4,
AcquireMask = 0x8,
};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(semaphore_address, 0x4);
ASSERT_REG_POSITION(semaphore_sequence, 0x6);
ASSERT_REG_POSITION(semaphore_trigger, 0x7);
ASSERT_REG_POSITION(reference_count, 0x14);
ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
ASSERT_REG_POSITION(semaphore_release, 0x1B);
ASSERT_REG_POSITION(fence_value, 0x1C);
ASSERT_REG_POSITION(fence_action, 0x1D);
ASSERT_REG_POSITION(acquire_mode, 0x100);
ASSERT_REG_POSITION(acquire_source, 0x101);
ASSERT_REG_POSITION(acquire_active, 0x102);
ASSERT_REG_POSITION(acquire_timeout, 0x103);
ASSERT_REG_POSITION(acquire_value, 0x104);
#undef ASSERT_REG_POSITION
};
} // namespace Tegra::Engines

View file

@ -4,12 +4,13 @@
#pragma once
#include <algorithm>
#include <cstring>
#include <memory>
#include <queue>
#include "common/common_types.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
namespace VideoCommon {
@ -19,10 +20,10 @@ public:
explicit FenceBase(u32 payload_, bool is_stubbed_)
: address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {}
explicit FenceBase(GPUVAddr address_, u32 payload_, bool is_stubbed_)
explicit FenceBase(u8* address_, u32 payload_, bool is_stubbed_)
: address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {}
GPUVAddr GetAddress() const {
u8* GetAddress() const {
return address;
}
@ -35,7 +36,7 @@ public:
}
private:
GPUVAddr address;
u8* address;
u32 payload;
bool is_semaphore;
@ -57,7 +58,7 @@ public:
buffer_cache.AccumulateFlushes();
}
void SignalSemaphore(GPUVAddr addr, u32 value) {
void SignalSemaphore(u8* addr, u32 value) {
TryReleasePendingFences();
const bool should_flush = ShouldFlush();
CommitAsyncFlushes();
@ -91,8 +92,9 @@ public:
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
gpu_memory.template Write<u32>(current_fence->GetAddress(),
current_fence->GetPayload());
char* address = reinterpret_cast<char*>(current_fence->GetAddress());
auto payload = current_fence->GetPayload();
std::memcpy(address, &payload, sizeof(payload));
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
@ -104,8 +106,8 @@ protected:
explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
TQueryCache& query_cache_)
: rasterizer{rasterizer_}, gpu{gpu_}, gpu_memory{gpu.MemoryManager()},
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
: rasterizer{rasterizer_}, gpu{gpu_}, texture_cache{texture_cache_},
buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
virtual ~FenceManager() = default;
@ -113,7 +115,7 @@ protected:
/// true
virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
/// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0;
virtual TFence CreateFence(u8* addr, u32 value, bool is_stubbed) = 0;
/// Queues a fence into the backend if the fence isn't stubbed.
virtual void QueueFence(TFence& fence) = 0;
/// Notifies that the backend fence has been signaled/reached in host GPU.
@ -123,7 +125,6 @@ protected:
VideoCore::RasterizerInterface& rasterizer;
Tegra::GPU& gpu;
Tegra::MemoryManager& gpu_memory;
TTextureCache& texture_cache;
TTBufferCache& buffer_cache;
TQueryCache& query_cache;
@ -137,8 +138,9 @@ private:
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
gpu_memory.template Write<u32>(current_fence->GetAddress(),
current_fence->GetPayload());
char* address = reinterpret_cast<char*>(current_fence->GetAddress());
const auto payload = current_fence->GetPayload();
std::memcpy(address, &payload, sizeof(payload));
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}

View file

@ -18,6 +18,8 @@
#include "core/hle/service/nvdrv/nvdata.h"
#include "core/perf_stats.h"
#include "video_core/cdma_pusher.h"
#include "video_core/control/channel_state.h"
#include "video_core/control/scheduler.h"
#include "video_core/dma_pusher.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
@ -36,65 +38,58 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
struct GPU::Impl {
explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
: gpu{gpu_}, system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(
system)},
dma_pusher{std::make_unique<Tegra::DmaPusher>(system, gpu)}, use_nvdec{use_nvdec_},
maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
fermi_2d{std::make_unique<Engines::Fermi2D>()},
kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
: gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_},
shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
gpu_thread{system_, is_async_} {}
gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
~Impl() = default;
std::shared_ptr<Control::ChannelState> CreateChannel(s32 channel_id) {
auto channel_state = std::make_shared<Tegra::Control::ChannelState>(channel_id);
channels.emplace(channel_id, channel_state);
scheduler->DeclareChannel(channel_state);
return channel_state;
}
void BindChannel(s32 channel_id) {
if (bound_channel == channel_id) {
return;
}
auto it = channels.find(channel_id);
ASSERT(it != channels.end());
bound_channel = channel_id;
current_channel = it->second.get();
rasterizer->BindChannel(*current_channel);
}
std::shared_ptr<Control::ChannelState> AllocateChannel() {
return CreateChannel(new_channel_id++);
}
void InitChannel(Control::ChannelState& to_init) {
to_init.Init(system, gpu);
to_init.BindRasterizer(rasterizer);
rasterizer->InitializeChannel(to_init);
}
void ReleaseChannel(Control::ChannelState& to_release) {
UNIMPLEMENTED();
}
void CreateHost1xChannel() {
if (host1x_channel) {
return;
}
host1x_channel = CreateChannel(0);
host1x_channel->memory_manager = std::make_shared<Tegra::MemoryManager>(system);
InitChannel(*host1x_channel);
}
/// Binds a renderer to the GPU.
void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
renderer = std::move(renderer_);
rasterizer = renderer->ReadRasterizer();
memory_manager->BindRasterizer(rasterizer);
maxwell_3d->BindRasterizer(rasterizer);
fermi_2d->BindRasterizer(rasterizer);
kepler_compute->BindRasterizer(rasterizer);
kepler_memory->BindRasterizer(rasterizer);
maxwell_dma->BindRasterizer(rasterizer);
}
/// Calls a GPU method.
void CallMethod(const GPU::MethodCall& method_call) {
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
method_call.subchannel);
ASSERT(method_call.subchannel < bound_engines.size());
if (ExecuteMethodOnEngine(method_call.method)) {
CallEngineMethod(method_call);
} else {
CallPullerMethod(method_call);
}
}
/// Calls a GPU multivalue method.
void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
u32 methods_pending) {
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
ASSERT(subchannel < bound_engines.size());
if (ExecuteMethodOnEngine(method)) {
CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
} else {
for (std::size_t i = 0; i < amount; i++) {
CallPullerMethod(GPU::MethodCall{
method,
base_start[i],
subchannel,
methods_pending - static_cast<u32>(i),
});
}
}
}
/// Flush all current written commands into the host GPU for execution.
@ -146,42 +141,44 @@ struct GPU::Impl {
/// Returns a reference to the Maxwell3D GPU engine.
[[nodiscard]] Engines::Maxwell3D& Maxwell3D() {
return *maxwell_3d;
ASSERT(current_channel);
return *current_channel->maxwell_3d;
}
/// Returns a const reference to the Maxwell3D GPU engine.
[[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const {
return *maxwell_3d;
ASSERT(current_channel);
return *current_channel->maxwell_3d;
}
/// Returns a reference to the KeplerCompute GPU engine.
[[nodiscard]] Engines::KeplerCompute& KeplerCompute() {
return *kepler_compute;
ASSERT(current_channel);
return *current_channel->kepler_compute;
}
/// Returns a reference to the KeplerCompute GPU engine.
[[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const {
return *kepler_compute;
ASSERT(current_channel);
return *current_channel->kepler_compute;
}
/// Returns a reference to the GPU memory manager.
[[nodiscard]] Tegra::MemoryManager& MemoryManager() {
return *memory_manager;
}
/// Returns a const reference to the GPU memory manager.
[[nodiscard]] const Tegra::MemoryManager& MemoryManager() const {
return *memory_manager;
CreateHost1xChannel();
return *host1x_channel->memory_manager;
}
/// Returns a reference to the GPU DMA pusher.
[[nodiscard]] Tegra::DmaPusher& DmaPusher() {
return *dma_pusher;
ASSERT(current_channel);
return *current_channel->dma_pusher;
}
/// Returns a const reference to the GPU DMA pusher.
[[nodiscard]] const Tegra::DmaPusher& DmaPusher() const {
return *dma_pusher;
ASSERT(current_channel);
return *current_channel->dma_pusher;
}
/// Returns a reference to the underlying renderer.
@ -306,7 +303,7 @@ struct GPU::Impl {
/// This can be used to launch any necessary threads and register any necessary
/// core timing events.
void Start() {
gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler);
cpu_context = renderer->GetRenderWindow().CreateSharedContext();
cpu_context->MakeCurrent();
}
@ -328,8 +325,8 @@ struct GPU::Impl {
}
/// Push GPU command entries to be processed
void PushGPUEntries(Tegra::CommandList&& entries) {
gpu_thread.SubmitList(std::move(entries));
void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
gpu_thread.SubmitList(channel, std::move(entries));
}
/// Push GPU command buffer entries to be processed
@ -381,303 +378,16 @@ struct GPU::Impl {
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
}
void ProcessBindMethod(const GPU::MethodCall& method_call) {
// Bind the current subchannel to the desired engine id.
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
method_call.argument);
const auto engine_id = static_cast<EngineID>(method_call.argument);
bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
switch (engine_id) {
case EngineID::FERMI_TWOD_A:
dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
break;
case EngineID::MAXWELL_B:
dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
break;
case EngineID::KEPLER_COMPUTE_B:
dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
break;
case EngineID::MAXWELL_DMA_COPY_A:
dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
break;
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
}
}
void ProcessFenceActionMethod() {
switch (regs.fence_action.op) {
case GPU::FenceOperation::Acquire:
WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
break;
case GPU::FenceOperation::Increment:
IncrementSyncPoint(regs.fence_action.syncpoint_id);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
}
}
void ProcessWaitForInterruptMethod() {
// TODO(bunnei) ImplementMe
LOG_WARNING(HW_GPU, "(STUBBED) called");
}
void ProcessSemaphoreTriggerMethod() {
const auto semaphoreOperationMask = 0xF;
const auto op =
static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
if (op == GpuSemaphoreOperation::WriteLong) {
struct Block {
u32 sequence;
u32 zeros = 0;
u64 timestamp;
};
Block block{};
block.sequence = regs.semaphore_sequence;
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
block.timestamp = GetTicks();
memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
sizeof(block));
} else {
const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
(op == GpuSemaphoreOperation::AcquireGequal &&
static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
(op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
// Nothing to do in this case
} else {
regs.acquire_source = true;
regs.acquire_value = regs.semaphore_sequence;
if (op == GpuSemaphoreOperation::AcquireEqual) {
regs.acquire_active = true;
regs.acquire_mode = false;
} else if (op == GpuSemaphoreOperation::AcquireGequal) {
regs.acquire_active = true;
regs.acquire_mode = true;
} else if (op == GpuSemaphoreOperation::AcquireMask) {
// TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
// semaphore_sequence, gives a non-0 result
LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
} else {
LOG_ERROR(HW_GPU, "Invalid semaphore operation");
}
}
}
}
void ProcessSemaphoreRelease() {
memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(),
regs.semaphore_release);
}
void ProcessSemaphoreAcquire() {
const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
const auto value = regs.semaphore_acquire;
if (word != value) {
regs.acquire_active = true;
regs.acquire_value = value;
// TODO(kemathe73) figure out how to do the acquire_timeout
regs.acquire_mode = false;
regs.acquire_source = false;
}
}
/// Calls a GPU puller method.
void CallPullerMethod(const GPU::MethodCall& method_call) {
regs.reg_array[method_call.method] = method_call.argument;
const auto method = static_cast<BufferMethods>(method_call.method);
switch (method) {
case BufferMethods::BindObject: {
ProcessBindMethod(method_call);
break;
}
case BufferMethods::Nop:
case BufferMethods::SemaphoreAddressHigh:
case BufferMethods::SemaphoreAddressLow:
case BufferMethods::SemaphoreSequence:
break;
case BufferMethods::UnkCacheFlush:
rasterizer->SyncGuestHost();
break;
case BufferMethods::WrcacheFlush:
rasterizer->SignalReference();
break;
case BufferMethods::FenceValue:
break;
case BufferMethods::RefCnt:
rasterizer->SignalReference();
break;
case BufferMethods::FenceAction:
ProcessFenceActionMethod();
break;
case BufferMethods::WaitForInterrupt:
rasterizer->WaitForIdle();
break;
case BufferMethods::SemaphoreTrigger: {
ProcessSemaphoreTriggerMethod();
break;
}
case BufferMethods::NotifyIntr: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
break;
}
case BufferMethods::Unk28: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
break;
}
case BufferMethods::SemaphoreAcquire: {
ProcessSemaphoreAcquire();
break;
}
case BufferMethods::SemaphoreRelease: {
ProcessSemaphoreRelease();
break;
}
case BufferMethods::Yield: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
break;
}
default:
LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
break;
}
}
/// Calls a GPU engine method.
void CallEngineMethod(const GPU::MethodCall& method_call) {
const EngineID engine = bound_engines[method_call.subchannel];
switch (engine) {
case EngineID::FERMI_TWOD_A:
fermi_2d->CallMethod(method_call.method, method_call.argument,
method_call.IsLastCall());
break;
case EngineID::MAXWELL_B:
maxwell_3d->CallMethod(method_call.method, method_call.argument,
method_call.IsLastCall());
break;
case EngineID::KEPLER_COMPUTE_B:
kepler_compute->CallMethod(method_call.method, method_call.argument,
method_call.IsLastCall());
break;
case EngineID::MAXWELL_DMA_COPY_A:
maxwell_dma->CallMethod(method_call.method, method_call.argument,
method_call.IsLastCall());
break;
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
kepler_memory->CallMethod(method_call.method, method_call.argument,
method_call.IsLastCall());
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine");
}
}
/// Calls a GPU engine multivalue method.
void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
u32 methods_pending) {
const EngineID engine = bound_engines[subchannel];
switch (engine) {
case EngineID::FERMI_TWOD_A:
fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
break;
case EngineID::MAXWELL_B:
maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
break;
case EngineID::KEPLER_COMPUTE_B:
kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
break;
case EngineID::MAXWELL_DMA_COPY_A:
maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
break;
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine");
}
}
/// Determines where the method should be executed.
[[nodiscard]] bool ExecuteMethodOnEngine(u32 method) {
const auto buffer_method = static_cast<BufferMethods>(method);
return buffer_method >= BufferMethods::NonPullerMethods;
}
struct Regs {
static constexpr size_t NUM_REGS = 0x40;
union {
struct {
INSERT_PADDING_WORDS_NOINIT(0x4);
struct {
u32 address_high;
u32 address_low;
[[nodiscard]] GPUVAddr SemaphoreAddress() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} semaphore_address;
u32 semaphore_sequence;
u32 semaphore_trigger;
INSERT_PADDING_WORDS_NOINIT(0xC);
// The pusher and the puller share the reference counter, the pusher only has read
// access
u32 reference_count;
INSERT_PADDING_WORDS_NOINIT(0x5);
u32 semaphore_acquire;
u32 semaphore_release;
u32 fence_value;
GPU::FenceAction fence_action;
INSERT_PADDING_WORDS_NOINIT(0xE2);
// Puller state
u32 acquire_mode;
u32 acquire_source;
u32 acquire_active;
u32 acquire_timeout;
u32 acquire_value;
};
std::array<u32, NUM_REGS> reg_array;
};
} regs{};
GPU& gpu;
Core::System& system;
std::unique_ptr<Tegra::MemoryManager> memory_manager;
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
std::unique_ptr<VideoCore::RendererBase> renderer;
VideoCore::RasterizerInterface* rasterizer = nullptr;
const bool use_nvdec;
/// Mapping of command subchannels to their bound engine ids
std::array<EngineID, 8> bound_engines{};
/// 3D engine
std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
/// 2D engine
std::unique_ptr<Engines::Fermi2D> fermi_2d;
/// Compute engine
std::unique_ptr<Engines::KeplerCompute> kepler_compute;
/// DMA engine
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
/// Inline memory engine
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
std::shared_ptr<Control::ChannelState> host1x_channel;
s32 new_channel_id{1};
/// Shader build notifier
std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
/// When true, we are about to shut down emulation session, so terminate outstanding tasks
@ -710,33 +420,10 @@ struct GPU::Impl {
VideoCommon::GPUThread::ThreadManager gpu_thread;
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(semaphore_address, 0x4);
ASSERT_REG_POSITION(semaphore_sequence, 0x6);
ASSERT_REG_POSITION(semaphore_trigger, 0x7);
ASSERT_REG_POSITION(reference_count, 0x14);
ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
ASSERT_REG_POSITION(semaphore_release, 0x1B);
ASSERT_REG_POSITION(fence_value, 0x1C);
ASSERT_REG_POSITION(fence_action, 0x1D);
ASSERT_REG_POSITION(acquire_mode, 0x100);
ASSERT_REG_POSITION(acquire_source, 0x101);
ASSERT_REG_POSITION(acquire_active, 0x102);
ASSERT_REG_POSITION(acquire_timeout, 0x103);
ASSERT_REG_POSITION(acquire_value, 0x104);
#undef ASSERT_REG_POSITION
enum class GpuSemaphoreOperation {
AcquireEqual = 0x1,
WriteLong = 0x2,
AcquireGequal = 0x4,
AcquireMask = 0x8,
};
std::unique_ptr<Tegra::Control::Scheduler> scheduler;
std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
Tegra::Control::ChannelState* current_channel;
s32 bound_channel{-1};
};
GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
@ -744,19 +431,26 @@ GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
GPU::~GPU() = default;
std::shared_ptr<Control::ChannelState> GPU::AllocateChannel() {
return impl->AllocateChannel();
}
void GPU::InitChannel(Control::ChannelState& to_init) {
impl->InitChannel(to_init);
}
void GPU::BindChannel(s32 channel_id) {
impl->BindChannel(channel_id);
}
void GPU::ReleaseChannel(Control::ChannelState& to_release) {
impl->ReleaseChannel(to_release);
}
void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) {
impl->BindRenderer(std::move(renderer));
}
void GPU::CallMethod(const MethodCall& method_call) {
impl->CallMethod(method_call);
}
void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
u32 methods_pending) {
impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending);
}
void GPU::FlushCommands() {
impl->FlushCommands();
}
@ -881,8 +575,8 @@ void GPU::ReleaseContext() {
impl->ReleaseContext();
}
void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
impl->PushGPUEntries(std::move(entries));
void GPU::PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
impl->PushGPUEntries(channel, std::move(entries));
}
void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {

View file

@ -89,57 +89,20 @@ class Maxwell3D;
class KeplerCompute;
} // namespace Engines
enum class EngineID {
FERMI_TWOD_A = 0x902D, // 2D Engine
MAXWELL_B = 0xB197, // 3D Engine
KEPLER_COMPUTE_B = 0xB1C0,
KEPLER_INLINE_TO_MEMORY_B = 0xA140,
MAXWELL_DMA_COPY_A = 0xB0B5,
};
namespace Control {
struct ChannelState;
}
class MemoryManager;
class GPU final {
public:
struct MethodCall {
u32 method{};
u32 argument{};
u32 subchannel{};
u32 method_count{};
explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
: method(method_), argument(argument_), subchannel(subchannel_),
method_count(method_count_) {}
[[nodiscard]] bool IsLastCall() const {
return method_count <= 1;
}
};
enum class FenceOperation : u32 {
Acquire = 0,
Increment = 1,
};
union FenceAction {
u32 raw;
BitField<0, 1, FenceOperation> op;
BitField<8, 24, u32> syncpoint_id;
};
explicit GPU(Core::System& system, bool is_async, bool use_nvdec);
~GPU();
/// Binds a renderer to the GPU.
void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
/// Calls a GPU method.
void CallMethod(const MethodCall& method_call);
/// Calls a GPU multivalue method.
void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
u32 methods_pending);
/// Flush all current written commands into the host GPU for execution.
void FlushCommands();
/// Synchronizes CPU writes with Host GPU memory.
@ -147,6 +110,14 @@ public:
/// Signal the ending of command list.
void OnCommandListEnd();
std::shared_ptr<Control::ChannelState> AllocateChannel();
void InitChannel(Control::ChannelState& to_init);
void BindChannel(s32 channel_id);
void ReleaseChannel(Control::ChannelState& to_release);
/// Request a host GPU memory flush from the CPU.
[[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
@ -226,7 +197,7 @@ public:
void ReleaseContext();
/// Push GPU command entries to be processed
void PushGPUEntries(Tegra::CommandList&& entries);
void PushGPUEntries(s32 channel, Tegra::CommandList&& entries);
/// Push GPU command buffer entries to be processed
void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries);
@ -248,7 +219,7 @@ public:
private:
struct Impl;
std::unique_ptr<Impl> impl;
mutable std::unique_ptr<Impl> impl;
};
} // namespace Tegra

View file

@ -8,6 +8,7 @@
#include "common/thread.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "video_core/control/scheduler.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
@ -18,7 +19,7 @@ namespace VideoCommon::GPUThread {
/// Runs the GPU thread
static void RunThread(std::stop_token stop_token, Core::System& system,
VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
Tegra::DmaPusher& dma_pusher, SynchState& state) {
Tegra::Control::Scheduler& scheduler, SynchState& state) {
std::string name = "GPU";
MicroProfileOnThreadCreate(name.c_str());
SCOPE_EXIT({ MicroProfileOnThreadExit(); });
@ -36,8 +37,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
break;
}
if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
dma_pusher.Push(std::move(submit_list->entries));
dma_pusher.DispatchCalls();
scheduler.Push(submit_list->channel, std::move(submit_list->entries));
} else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
} else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
@ -68,14 +68,14 @@ ThreadManager::~ThreadManager() = default;
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
Core::Frontend::GraphicsContext& context,
Tegra::DmaPusher& dma_pusher) {
Tegra::Control::Scheduler& scheduler) {
rasterizer = renderer.ReadRasterizer();
thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
std::ref(dma_pusher), std::ref(state));
std::ref(scheduler), std::ref(state));
}
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
PushCommand(SubmitListCommand(std::move(entries)));
void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
PushCommand(SubmitListCommand(channel, std::move(entries)));
}
void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {

View file

@ -15,7 +15,9 @@
namespace Tegra {
struct FramebufferConfig;
class DmaPusher;
namespace Control {
class Scheduler;
}
} // namespace Tegra
namespace Core {
@ -34,8 +36,10 @@ namespace VideoCommon::GPUThread {
/// Command to signal to the GPU thread that a command list is ready for processing
struct SubmitListCommand final {
explicit SubmitListCommand(Tegra::CommandList&& entries_) : entries{std::move(entries_)} {}
explicit SubmitListCommand(s32 channel_, Tegra::CommandList&& entries_)
: channel{channel_}, entries{std::move(entries_)} {}
s32 channel;
Tegra::CommandList entries;
};
@ -112,10 +116,10 @@ public:
/// Creates and starts the GPU thread.
void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
Tegra::DmaPusher& dma_pusher);
Tegra::Control::Scheduler& scheduler);
/// Push GPU command entries to be processed
void SubmitList(Tegra::CommandList&& entries);
void SubmitList(s32 channel, Tegra::CommandList&& entries);
/// Swap buffers (render frame)
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);

View file

@ -133,11 +133,6 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
// TryLockPage(page_entry, size);
auto& current_page = page_table[PageEntryIndex(gpu_addr)];
if ((!current_page.IsValid() && page_entry.IsValid()) ||
current_page.ToAddress() != page_entry.ToAddress()) {
rasterizer->ModifyGPUMemory(gpu_addr, size);
}
current_page = page_entry;
}

View file

@ -17,6 +17,7 @@
#include "common/assert.h"
#include "common/settings.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
@ -90,13 +91,10 @@ private:
};
template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
class QueryCacheBase {
class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
public:
explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::MemoryManager& gpu_memory_)
: rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_)
: rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
VideoCore::QueryType::SamplesPassed}}} {}
void InvalidateRegion(VAddr addr, std::size_t size) {
@ -117,13 +115,13 @@ public:
*/
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
std::unique_lock lock{mutex};
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
CachedQuery* query = TryGet(*cpu_addr);
if (!query) {
ASSERT_OR_EXECUTE(cpu_addr, return;);
u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
u8* const host_ptr = gpu_memory->GetPointer(gpu_addr);
query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
}
@ -137,7 +135,7 @@ public:
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
void UpdateCounters() {
std::unique_lock lock{mutex};
const auto& regs = maxwell3d.regs;
const auto& regs = maxwell3d->regs;
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
}
@ -264,8 +262,6 @@ private:
static constexpr unsigned YUZU_PAGEBITS = 12;
VideoCore::RasterizerInterface& rasterizer;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::MemoryManager& gpu_memory;
std::recursive_mutex mutex;

View file

@ -16,6 +16,9 @@ class MemoryManager;
namespace Engines {
class AccelerateDMAInterface;
}
namespace Control {
struct ChannelState;
}
} // namespace Tegra
namespace VideoCore {
@ -137,5 +140,11 @@ public:
/// Initialize disk cached resources for the game being emulated
virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const DiskResourceLoadCallback& callback) {}
virtual void InitializeChannel(Tegra::Control::ChannelState& channel) {}
virtual void BindChannel(Tegra::Control::ChannelState& channel) {}
virtual void ReleaseChannel(s32 channel_id) {}
};
} // namespace VideoCore

View file

@ -12,7 +12,7 @@ namespace OpenGL {
GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {}
GLInnerFence::GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_)
GLInnerFence::GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_)
: FenceBase{address_, payload_, is_stubbed_} {}
GLInnerFence::~GLInnerFence() = default;
@ -52,7 +52,7 @@ Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(value, is_stubbed);
}
Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
Fence FenceManagerOpenGL::CreateFence(u8* addr, u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
}

View file

@ -17,7 +17,7 @@ namespace OpenGL {
class GLInnerFence : public VideoCommon::FenceBase {
public:
explicit GLInnerFence(u32 payload_, bool is_stubbed_);
explicit GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_);
explicit GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_);
~GLInnerFence();
void Queue();
@ -41,7 +41,7 @@ public:
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;

View file

@ -26,9 +26,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
} // Anonymous namespace
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::MemoryManager& gpu_memory_)
: QueryCacheBase(rasterizer_, maxwell3d_, gpu_memory_), gl_rasterizer{rasterizer_} {}
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_)
: QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {}
QueryCache::~QueryCache() = default;

View file

@ -28,8 +28,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::MemoryManager& gpu_memory_);
explicit QueryCache(RasterizerOpenGL& rasterizer_);
~QueryCache();
OGLQuery AllocateQuery(VideoCore::QueryType type);

View file

@ -60,12 +60,11 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
texture_cache_runtime(device, program_manager, state_tracker),
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
buffer_cache_runtime(device),
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()),
query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device),
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
state_tracker, gpu.ShaderNotify()),
query_cache(*this), accelerate_dma(buffer_cache),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
RasterizerOpenGL::~RasterizerOpenGL() = default;
@ -392,7 +391,8 @@ void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
gpu_memory.Write<u32>(addr, value);
return;
}
fence_manager.SignalSemaphore(addr, value);
auto paddr = gpu_memory.GetPointer(addr);
fence_manager.SignalSemaphore(paddr, value);
}
void RasterizerOpenGL::SignalSyncPoint(u32 value) {

View file

@ -151,16 +151,13 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs
} // Anonymous namespace
ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const Device& device_,
TextureCache& texture_cache_, BufferCache& buffer_cache_,
ProgramManager& program_manager_, StateTracker& state_tracker_,
VideoCore::ShaderNotify& shader_notify_)
: VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_},
shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()},
const Device& device_, TextureCache& texture_cache_,
BufferCache& buffer_cache_, ProgramManager& program_manager_,
StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_)
: VideoCommon::ShaderCache{rasterizer_}, emu_window{emu_window_}, device{device_},
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_},
state_tracker{state_tracker_}, shader_notify{shader_notify_},
use_asynchronous_shaders{device.UseAsynchronousShaders()},
profile{
.supported_spirv = 0x00010000,
@ -310,7 +307,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
current_pipeline = nullptr;
return nullptr;
}
const auto& regs{maxwell3d.regs};
const auto& regs{maxwell3d->regs};
graphics_key.raw = 0;
graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
@ -351,13 +348,13 @@ GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const n
}
// If something is using depth, we can assume that games are not rendering anything which
// will be used one time.
if (maxwell3d.regs.zeta_enable) {
if (maxwell3d->regs.zeta_enable) {
return nullptr;
}
// If games are using a small index count, we can assume these are full screen quads.
// Usually these shaders are only used once for building textures so we can assume they
// can't be built async
if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) {
return pipeline;
}
return nullptr;
@ -368,7 +365,7 @@ ComputePipeline* ShaderCache::CurrentComputePipeline() {
if (!shader) {
return nullptr;
}
const auto& qmd{kepler_compute.launch_description};
const auto& qmd{kepler_compute->launch_description};
const ComputePipelineKey key{
.unique_hash = shader->unique_hash,
.shared_memory_size = qmd.shared_alloc,
@ -481,8 +478,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
}
auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
return std::make_unique<GraphicsPipeline>(
device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
thread_worker, &shader_notify, sources, sources_spirv, infos, key);
device, texture_cache, buffer_cache, *gpu_memory, *maxwell3d, program_manager,
state_tracker, thread_worker, &shader_notify, sources, sources_spirv, infos, key);
} catch (Shader::Exception& exception) {
LOG_ERROR(Render_OpenGL, "{}", exception.what());
@ -491,9 +488,9 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) {
const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
const auto& qmd{kepler_compute.launch_description};
ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
const auto& qmd{kepler_compute->launch_description};
ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
env.SetCachedSize(shader->size_bytes);
main_pools.ReleaseContents();
@ -536,8 +533,8 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
break;
}
return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory,
kepler_compute, program_manager, program.info, code,
return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, *gpu_memory,
*kepler_compute, program_manager, program.info, code,
code_spirv);
} catch (Shader::Exception& exception) {
LOG_ERROR(Render_OpenGL, "{}", exception.what());

View file

@ -30,12 +30,9 @@ using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
class ShaderCache : public VideoCommon::ShaderCache {
public:
explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const Device& device_,
TextureCache& texture_cache_, BufferCache& buffer_cache_,
ProgramManager& program_manager_, StateTracker& state_tracker_,
VideoCore::ShaderNotify& shader_notify_);
const Device& device_, TextureCache& texture_cache_,
BufferCache& buffer_cache_, ProgramManager& program_manager_,
StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_);
~ShaderCache();
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,

View file

@ -95,20 +95,25 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
: RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
: RendererBase(emu_window, std::move(context_)),
telemetry_session(telemetry_session_),
cpu_memory(cpu_memory_),
gpu(gpu_),
library(OpenLibrary()),
instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
true, Settings::values.renderer_debug.GetValue())),
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
surface(CreateSurface(instance, render_window)),
device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
state_tracker(gpu), scheduler(device, state_tracker),
device(CreateDevice(instance, dld, *surface)),
memory_allocator(device, false),
state_tracker(gpu),
scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
render_window.GetFramebufferLayout().height, false),
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
screen_info),
rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
memory_allocator, state_tracker, scheduler) {
rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
state_tracker, scheduler) {
Report();
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());

View file

@ -14,7 +14,7 @@ namespace Vulkan {
InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_)
: FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {}
InnerFence::InnerFence(Scheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_)
InnerFence::InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_)
: FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {}
InnerFence::~InnerFence() = default;
@ -52,7 +52,7 @@ Fence FenceManager::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(scheduler, value, is_stubbed);
}
Fence FenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
Fence FenceManager::CreateFence(u8* addr, u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed);
}

View file

@ -26,7 +26,7 @@ class Scheduler;
class InnerFence : public VideoCommon::FenceBase {
public:
explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_);
explicit InnerFence(Scheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_);
explicit InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_);
~InnerFence();
void Queue();
@ -51,7 +51,7 @@ public:
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;

View file

@ -259,17 +259,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c
return std::memcmp(&rhs, this, Size()) == 0;
}
PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const Device& device_,
PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_,
Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
UpdateDescriptorQueue& update_descriptor_queue_,
RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
: VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_},
: VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_},
descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_},
render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_},
texture_cache{texture_cache_}, shader_notify{shader_notify_},
use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
serialization_thread(1, "VkPipelineSerialization") {
@ -337,7 +335,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
current_pipeline = nullptr;
return nullptr;
}
graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(),
graphics_key.state.Refresh(*maxwell3d, device.IsExtExtendedDynamicStateSupported(),
device.IsExtVertexInputDynamicStateSupported());
if (current_pipeline) {
@ -357,7 +355,7 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
if (!shader) {
return nullptr;
}
const auto& qmd{kepler_compute.launch_description};
const auto& qmd{kepler_compute->launch_description};
const ComputePipelineCacheKey key{
.unique_hash = shader->unique_hash,
.shared_memory_size = qmd.shared_alloc,
@ -486,13 +484,13 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
}
// If something is using depth, we can assume that games are not rendering anything which
// will be used one time.
if (maxwell3d.regs.zeta_enable) {
if (maxwell3d->regs.zeta_enable) {
return nullptr;
}
// If games are using a small index count, we can assume these are full screen quads.
// Usually these shaders are only used once for building textures so we can assume they
// can't be built async
if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) {
return pipeline;
}
return nullptr;
@ -558,7 +556,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
}
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
return std::make_unique<GraphicsPipeline>(
maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
*maxwell3d, *gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
std::move(modules), infos);
@ -592,9 +590,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
const ComputePipelineCacheKey& key, const ShaderInfo* shader) {
const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
const auto& qmd{kepler_compute.launch_description};
ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
const auto& qmd{kepler_compute->launch_description};
ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
env.SetCachedSize(shader->size_bytes);
main_pools.ReleaseContents();

View file

@ -100,10 +100,8 @@ struct ShaderPools {
class PipelineCache : public VideoCommon::ShaderCache {
public:
explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::KeplerCompute& kepler_compute,
Tegra::MemoryManager& gpu_memory, const Device& device,
Scheduler& scheduler, DescriptorPool& descriptor_pool,
explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler,
DescriptorPool& descriptor_pool,
UpdateDescriptorQueue& update_descriptor_queue,
RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);

View file

@ -65,10 +65,9 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
}
QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
const Device& device_, Scheduler& scheduler_)
: QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_},
QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
Scheduler& scheduler_)
: QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_},
query_pools{
QueryPool{device_, scheduler_, QueryType::SamplesPassed},
} {}

View file

@ -52,9 +52,8 @@ private:
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
const Device& device_, Scheduler& scheduler_);
explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
Scheduler& scheduler_);
~QueryCache();
std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);

View file

@ -11,6 +11,7 @@
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "video_core/control/channel_state.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/blit_image.h"
@ -148,14 +149,11 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan
} // Anonymous namespace
RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Tegra::MemoryManager& gpu_memory_,
Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
const Device& device_, MemoryAllocator& memory_allocator_,
StateTracker& state_tracker_, Scheduler& scheduler_)
: RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
state_tracker{state_tracker_}, scheduler{scheduler_},
: RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_},
memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_},
staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
update_descriptor_queue(device, scheduler),
blit_image(device, scheduler, state_tracker, descriptor_pool),
@ -165,14 +163,13 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
memory_allocator, staging_pool,
blit_image, astc_decoder_pass,
render_pass_cache},
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
texture_cache(texture_cache_runtime, *this),
buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
update_descriptor_queue, descriptor_pool),
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
texture_cache, gpu.ShaderNotify()),
query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue,
render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
query_cache{*this, device, scheduler}, accelerate_dma{buffer_cache},
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
wfi_event(device.GetLogical().CreateEvent()) {
scheduler.SetQueryCache(query_cache);
@ -199,8 +196,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
UpdateDynamicStates();
const auto& regs{maxwell3d.regs};
const u32 num_instances{maxwell3d.mme_draw.instance_count};
const auto& regs{maxwell3d->regs};
const u32 num_instances{maxwell3d->mme_draw.instance_count};
const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
@ -218,14 +215,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
void RasterizerVulkan::Clear() {
MICROPROFILE_SCOPE(Vulkan_Clearing);
if (!maxwell3d.ShouldExecute()) {
if (!maxwell3d->ShouldExecute()) {
return;
}
FlushWork();
query_cache.UpdateCounters();
auto& regs = maxwell3d.regs;
auto& regs = maxwell3d->regs;
const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A;
const bool use_depth = regs.clear_buffers.Z;
@ -339,9 +336,9 @@ void RasterizerVulkan::DispatchCompute() {
return;
}
std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache);
pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache);
const auto& qmd{kepler_compute.launch_description};
const auto& qmd{kepler_compute->launch_description};
const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
@ -451,10 +448,11 @@ void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
if (!gpu.IsAsync()) {
gpu_memory.Write<u32>(addr, value);
gpu_memory->Write<u32>(addr, value);
return;
}
fence_manager.SignalSemaphore(addr, value);
auto paddr = gpu_memory->GetPointer(addr);
fence_manager.SignalSemaphore(paddr, value);
}
void RasterizerVulkan::SignalSyncPoint(u32 value) {
@ -553,12 +551,12 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA()
void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<u8> memory) {
auto cpu_addr = gpu_memory.GpuToCpuAddress(address);
auto cpu_addr = gpu_memory->GpuToCpuAddress(address);
if (!cpu_addr) [[unlikely]] {
gpu_memory.WriteBlock(address, memory.data(), copy_size);
gpu_memory->WriteBlock(address, memory.data(), copy_size);
return;
}
gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size);
gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
{
std::unique_lock<std::mutex> lock{buffer_cache.mutex};
if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
@ -627,7 +625,7 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64
}
void RasterizerVulkan::UpdateDynamicStates() {
auto& regs = maxwell3d.regs;
auto& regs = maxwell3d->regs;
UpdateViewportsState(regs);
UpdateScissorsState(regs);
UpdateDepthBias(regs);
@ -651,7 +649,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
}
void RasterizerVulkan::BeginTransformFeedback() {
const auto& regs = maxwell3d.regs;
const auto& regs = maxwell3d->regs;
if (regs.tfb_enabled == 0) {
return;
}
@ -667,7 +665,7 @@ void RasterizerVulkan::BeginTransformFeedback() {
}
void RasterizerVulkan::EndTransformFeedback() {
const auto& regs = maxwell3d.regs;
const auto& regs = maxwell3d->regs;
if (regs.tfb_enabled == 0) {
return;
}
@ -917,7 +915,7 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
}
void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) {
auto& dirty{maxwell3d.dirty.flags};
auto& dirty{maxwell3d->dirty.flags};
if (!dirty[Dirty::VertexInput]) {
return;
}
@ -974,4 +972,41 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs)
});
}
void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) {
CreateChannel(channel);
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.CreateChannel(channel);
buffer_cache.CreateChannel(channel);
}
pipeline_cache.CreateChannel(channel);
query_cache.CreateChannel(channel);
state_tracker.SetupTables(channel);
}
void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
const s32 channel_id = channel.bind_id;
BindToChannel(channel_id);
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.BindToChannel(channel_id);
buffer_cache.BindToChannel(channel_id);
}
pipeline_cache.BindToChannel(channel_id);
query_cache.BindToChannel(channel_id);
state_tracker.ChangeChannel(channel);
scheduler.InvalidateState();
}
void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
EraseChannel(channel_id);
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.EraseChannel(channel_id);
buffer_cache.EraseChannel(channel_id);
}
pipeline_cache.EraseChannel(channel_id);
query_cache.EraseChannel(channel_id);
}
} // namespace Vulkan

View file

@ -8,6 +8,7 @@
#include <boost/container/static_vector.hpp>
#include "common/common_types.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
@ -54,13 +55,13 @@ private:
BufferCache& buffer_cache;
};
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated,
protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
public:
explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
ScreenInfo& screen_info_, const Device& device_,
MemoryAllocator& memory_allocator_, StateTracker& state_tracker_,
Scheduler& scheduler_);
Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
const Device& device_, MemoryAllocator& memory_allocator_,
StateTracker& state_tracker_, Scheduler& scheduler_);
~RasterizerVulkan() override;
void Draw(bool is_indexed, bool is_instanced) override;
@ -99,6 +100,12 @@ public:
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
void InitializeChannel(Tegra::Control::ChannelState& channel) override;
void BindChannel(Tegra::Control::ChannelState& channel) override;
void ReleaseChannel(s32 channel_id) override;
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
@ -134,9 +141,6 @@ private:
void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
Tegra::GPU& gpu;
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
ScreenInfo& screen_info;
const Device& device;

View file

@ -7,6 +7,7 @@
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/control/channel_state.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
@ -174,9 +175,8 @@ void SetupDirtyVertexBindings(Tables& tables) {
}
} // Anonymous namespace
StateTracker::StateTracker(Tegra::GPU& gpu)
: flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
auto& tables{gpu.Maxwell3D().dirty.tables};
void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
auto& tables{channel_state.maxwell_3d->dirty.tables};
SetupDirtyFlags(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
@ -199,4 +199,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
SetupDirtyVertexBindings(tables);
}
void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
flags = &channel_state.maxwell_3d->dirty.flags;
}
StateTracker::StateTracker(Tegra::GPU& gpu)
: flags{}, invalidation_flags{MakeInvalidationFlags()} {}
} // namespace Vulkan

View file

@ -10,6 +10,12 @@
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
namespace Tegra {
namespace Control {
struct ChannelState;
}
} // namespace Tegra
namespace Vulkan {
namespace Dirty {
@ -56,16 +62,16 @@ public:
explicit StateTracker(Tegra::GPU& gpu);
void InvalidateCommandBufferState() {
flags |= invalidation_flags;
(*flags) |= invalidation_flags;
current_topology = INVALID_TOPOLOGY;
}
void InvalidateViewports() {
flags[Dirty::Viewports] = true;
(*flags)[Dirty::Viewports] = true;
}
void InvalidateScissors() {
flags[Dirty::Scissors] = true;
(*flags)[Dirty::Scissors] = true;
}
bool TouchViewports() {
@ -139,16 +145,20 @@ public:
return has_changed;
}
void SetupTables(Tegra::Control::ChannelState& channel_state);
void ChangeChannel(Tegra::Control::ChannelState& channel_state);
private:
static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
bool Exchange(std::size_t id, bool new_value) const noexcept {
const bool is_dirty = flags[id];
flags[id] = new_value;
const bool is_dirty = (*flags)[id];
(*flags)[id] = new_value;
return is_dirty;
}
Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags* flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
};

View file

@ -8,6 +8,7 @@
#include "common/assert.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/object_pool.h"
#include "video_core/control/channel_state.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
@ -33,29 +34,25 @@ void ShaderCache::SyncGuestHost() {
RemovePendingShaders();
}
ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_)
: gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
rasterizer{rasterizer_} {}
ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
auto& dirty{maxwell3d.dirty.flags};
auto& dirty{maxwell3d->dirty.flags};
if (!dirty[VideoCommon::Dirty::Shaders]) {
return last_shaders_valid;
}
dirty[VideoCommon::Dirty::Shaders] = false;
const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
const GPUVAddr base_addr{maxwell3d->regs.code_address.CodeAddress()};
for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
if (!maxwell3d->regs.IsShaderConfigEnabled(index)) {
unique_hashes[index] = 0;
continue;
}
const auto& shader_config{maxwell3d.regs.shader_config[index]};
const auto& shader_config{maxwell3d->regs.shader_config[index]};
const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
const GPUVAddr shader_addr{base_addr + shader_config.offset};
const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
if (!cpu_shader_addr) {
LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
last_shaders_valid = false;
@ -64,7 +61,7 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
if (!shader_info) {
const u32 start_address{shader_config.offset};
GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
GraphicsEnvironment env{*maxwell3d, *gpu_memory, program, base_addr, start_address};
shader_info = MakeShaderInfo(env, *cpu_shader_addr);
}
shader_infos[index] = shader_info;
@ -75,10 +72,10 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
}
const ShaderInfo* ShaderCache::ComputeShader() {
const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
const auto& qmd{kepler_compute.launch_description};
const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
const auto& qmd{kepler_compute->launch_description};
const GPUVAddr shader_addr{program_base + qmd.program_start};
const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
if (!cpu_shader_addr) {
LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
return nullptr;
@ -86,22 +83,22 @@ const ShaderInfo* ShaderCache::ComputeShader() {
if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
return shader;
}
ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
return MakeShaderInfo(env, *cpu_shader_addr);
}
void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
size_t env_index{};
const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
const GPUVAddr base_addr{maxwell3d->regs.code_address.CodeAddress()};
for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
if (unique_hashes[index] == 0) {
continue;
}
const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
auto& env{result.envs[index]};
const u32 start_address{maxwell3d.regs.shader_config[index].offset};
env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
const u32 start_address{maxwell3d->regs.shader_config[index].offset};
env = GraphicsEnvironment{*maxwell3d, *gpu_memory, program, base_addr, start_address};
env.SetCachedSize(shader_infos[index]->size_bytes);
result.env_ptrs[env_index++] = &env;
}

View file

@ -12,6 +12,7 @@
#include <vector>
#include "common/common_types.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/shader_environment.h"
@ -19,6 +20,10 @@ namespace Tegra {
class MemoryManager;
}
namespace Tegra::Control {
struct ChannelState;
}
namespace VideoCommon {
class GenericEnvironment;
@ -28,7 +33,7 @@ struct ShaderInfo {
size_t size_bytes{};
};
class ShaderCache {
class ShaderCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
static constexpr u64 YUZU_PAGEBITS = 14;
static constexpr u64 YUZU_PAGESIZE = u64(1) << YUZU_PAGEBITS;
@ -71,9 +76,7 @@ protected:
}
};
explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_);
explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_);
/// @brief Update the hashes and information of shader stages
/// @param unique_hashes Shader hashes to store into when a stage is enabled
@ -88,10 +91,6 @@ protected:
void GetGraphicsEnvironments(GraphicsEnvironments& result,
const std::array<u64, NUM_PROGRAMS>& unique_hashes);
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{};
bool last_shaders_valid = false;

View file

@ -88,6 +88,9 @@ struct ImageBase {
u32 scale_rating = 0;
u64 scale_tick = 0;
bool has_scaled = false;
size_t channel = 0;
ImageFlagBits flags = ImageFlagBits::CpuModified;
GPUVAddr gpu_addr = 0;

View file

@ -7,6 +7,7 @@
#include "common/alignment.h"
#include "common/settings.h"
#include "video_core/control/channel_state.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/texture_cache/image_view_base.h"
@ -29,12 +30,8 @@ using VideoCore::Surface::SurfaceType;
using namespace Common::Literals;
template <class P>
TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_)
: runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_)
: runtime{runtime_}, rasterizer{rasterizer_} {
// Configure null sampler
TSCEntry sampler_descriptor{};
sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
@ -42,6 +39,13 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
sampler_descriptor.cubemap_anisotropy.Assign(1);
// Setup channels
current_channel_id = UNSET_CHANNEL;
state = nullptr;
maxwell3d = nullptr;
kepler_compute = nullptr;
gpu_memory = nullptr;
// Make sure the first index is reserved for the null resources
// This way the null resource becomes a compile time constant
void(slot_images.insert(NullImageParams{}));
@ -93,7 +97,7 @@ void TextureCache<P>::RunGarbageCollector() {
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
}
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
@ -152,22 +156,23 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
template <class P>
template <bool has_blacklists>
void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views);
FillImageViews<has_blacklists>(state->graphics_image_table, state->graphics_image_view_ids,
views);
}
template <class P>
void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
FillImageViews<true>(compute_image_table, compute_image_view_ids, views);
FillImageViews<true>(state->compute_image_table, state->compute_image_view_ids, views);
}
template <class P>
typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
if (index > graphics_sampler_table.Limit()) {
if (index > state->graphics_sampler_table.Limit()) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return &slot_samplers[NULL_SAMPLER_ID];
}
const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
SamplerId& id = graphics_sampler_ids[index];
const auto [descriptor, is_new] = state->graphics_sampler_table.Read(index);
SamplerId& id = state->graphics_sampler_ids[index];
if (is_new) {
id = FindSampler(descriptor);
}
@ -176,12 +181,12 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
template <class P>
typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
if (index > compute_sampler_table.Limit()) {
if (index > state->compute_sampler_table.Limit()) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return &slot_samplers[NULL_SAMPLER_ID];
}
const auto [descriptor, is_new] = compute_sampler_table.Read(index);
SamplerId& id = compute_sampler_ids[index];
const auto [descriptor, is_new] = state->compute_sampler_table.Read(index);
SamplerId& id = state->compute_sampler_ids[index];
if (is_new) {
id = FindSampler(descriptor);
}
@ -191,34 +196,34 @@ typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
template <class P>
void TextureCache<P>::SynchronizeGraphicsDescriptors() {
using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
const u32 tic_limit = maxwell3d.regs.tic.limit;
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex;
const u32 tic_limit = maxwell3d->regs.tic.limit;
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit;
if (state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(), tsc_limit)) {
state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
}
if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
if (state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) {
state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
}
}
template <class P>
void TextureCache<P>::SynchronizeComputeDescriptors() {
const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
const u32 tic_limit = kepler_compute.regs.tic.limit;
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
const u32 tic_limit = kepler_compute->regs.tic.limit;
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
if (state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
}
if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
if (state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(), tic_limit)) {
state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
}
}
template <class P>
bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
auto& flags = maxwell3d.dirty.flags;
auto& flags = maxwell3d->dirty.flags;
u32 scale_rating = 0;
bool rescaled = false;
std::array<ImageId, NUM_RT> tmp_color_images{};
@ -315,7 +320,7 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
template <class P>
void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
using namespace VideoCommon::Dirty;
auto& flags = maxwell3d.dirty.flags;
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::RenderTargets]) {
for (size_t index = 0; index < NUM_RT; ++index) {
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
@ -342,7 +347,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
for (size_t index = 0; index < NUM_RT; ++index) {
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d->regs.rt_control.Map(index));
}
u32 up_scale = 1;
u32 down_shift = 0;
@ -351,8 +356,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
down_shift = Settings::values.resolution_info.down_shift;
}
render_targets.size = Extent2D{
(maxwell3d.regs.render_area.width * up_scale) >> down_shift,
(maxwell3d.regs.render_area.height * up_scale) >> down_shift,
(maxwell3d->regs.render_area.width * up_scale) >> down_shift,
(maxwell3d->regs.render_area.height * up_scale) >> down_shift,
};
flags[Dirty::DepthBiasGlobal] = true;
@ -458,7 +463,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
}
}
@ -655,7 +660,7 @@ void TextureCache<P>::PopAsyncFlushes() {
for (const ImageId image_id : download_ids) {
const ImageBase& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span);
download_map.offset += image.unswizzled_size_bytes;
download_span = download_span.subspan(image.unswizzled_size_bytes);
}
@ -714,26 +719,26 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
const GPUVAddr gpu_addr = image.gpu_addr;
if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
const auto uploads = FullUploadSwizzles(image.info);
runtime.AccelerateImageUpload(image, staging, uploads);
} else if (True(image.flags & ImageFlagBits::Converted)) {
std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data);
ConvertImage(unswizzled_data, image.info, mapped_span, copies);
image.UploadMemory(staging, copies);
} else {
const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span);
image.UploadMemory(staging, copies);
}
}
template <class P>
ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
if (!IsValidEntry(gpu_memory, config)) {
if (!IsValidEntry(*gpu_memory, config)) {
return NULL_IMAGE_VIEW_ID;
}
const auto [pair, is_new] = image_views.try_emplace(config);
const auto [pair, is_new] = state->image_views.try_emplace(config);
ImageViewId& image_view_id = pair->second;
if (is_new) {
image_view_id = CreateImageView(config);
@ -777,9 +782,9 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
template <class P>
ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options) {
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
if (!cpu_addr) {
return ImageId{};
}
@ -860,7 +865,7 @@ void TextureCache<P>::InvalidateScale(Image& image) {
image.scale_tick = frame_tick + 1;
}
const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
auto& dirty = maxwell3d.dirty.flags;
auto& dirty = maxwell3d->dirty.flags;
dirty[Dirty::RenderTargets] = true;
dirty[Dirty::ZetaBuffer] = true;
for (size_t rt = 0; rt < NUM_RT; ++rt) {
@ -881,11 +886,11 @@ void TextureCache<P>::InvalidateScale(Image& image) {
image.image_view_ids.clear();
image.image_view_infos.clear();
if constexpr (ENABLE_VALIDATION) {
std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
std::ranges::fill(state->graphics_image_view_ids, CORRUPT_ID);
std::ranges::fill(state->compute_image_view_ids, CORRUPT_ID);
}
graphics_image_table.Invalidate();
compute_image_table.Invalidate();
state->graphics_image_table.Invalidate();
state->compute_image_table.Invalidate();
has_deleted_images = true;
}
@ -929,10 +934,10 @@ bool TextureCache<P>::ScaleDown(Image& image) {
template <class P>
ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options) {
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
const auto size = CalculateGuestSizeInBytes(info);
cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size);
if (!cpu_addr) {
const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
virtual_invalid_space += Common::AlignUp(size, 32);
@ -1050,7 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
Image& new_image = slot_images[new_image_id];
if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
new_image.flags |= ImageFlagBits::Sparse;
}
@ -1192,7 +1197,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
return NULL_SAMPLER_ID;
}
const auto [pair, is_new] = samplers.try_emplace(config);
const auto [pair, is_new] = state->samplers.try_emplace(config);
if (is_new) {
pair->second = slot_samplers.insert(runtime, config);
}
@ -1201,7 +1206,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
template <class P>
ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
const auto& regs = maxwell3d.regs;
const auto& regs = maxwell3d->regs;
if (index >= regs.rt_control.count) {
return ImageViewId{};
}
@ -1219,7 +1224,7 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
template <class P>
ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
const auto& regs = maxwell3d.regs;
const auto& regs = maxwell3d->regs;
if (!regs.zeta_enable) {
return ImageViewId{};
}
@ -1321,8 +1326,8 @@ void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
boost::container::small_vector<ImageId, 8> images;
ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
const auto it = gpu_page_table.find(page);
if (it == gpu_page_table.end()) {
const auto it = state->gpu_page_table.find(page);
if (it == state->gpu_page_table.end()) {
if constexpr (BOOL_BREAK) {
return false;
} else {
@ -1403,9 +1408,9 @@ template <typename Func>
void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
for (const auto& [gpu_addr, size] : segments) {
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
if constexpr (RETURNS_BOOL) {
if (func(gpu_addr, *cpu_addr, size)) {
@ -1449,7 +1454,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
image.lru_index = lru_cache.Insert(image_id, frame_tick);
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
[this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
[this, image_id](u64 page) { state->gpu_page_table[page].push_back(image_id); });
if (False(image.flags & ImageFlagBits::Sparse)) {
auto map_id =
slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
@ -1497,8 +1502,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
}
image_ids.erase(vector_it);
};
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
[this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
clear_page_table(page, state->gpu_page_table);
});
if (False(image.flags & ImageFlagBits::Sparse)) {
const auto map_id = image.map_view_id;
ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
@ -1631,7 +1637,7 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
// Mark render targets as dirty
auto& dirty = maxwell3d.dirty.flags;
auto& dirty = maxwell3d->dirty.flags;
dirty[Dirty::RenderTargets] = true;
dirty[Dirty::ZetaBuffer] = true;
for (size_t rt = 0; rt < NUM_RT; ++rt) {
@ -1681,22 +1687,24 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
if (alloc_images.empty()) {
image_allocs_table.erase(alloc_it);
}
if constexpr (ENABLE_VALIDATION) {
std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
for (auto& this_state : channel_storage) {
if constexpr (ENABLE_VALIDATION) {
std::ranges::fill(this_state.graphics_image_view_ids, CORRUPT_ID);
std::ranges::fill(this_state.compute_image_view_ids, CORRUPT_ID);
}
this_state.graphics_image_table.Invalidate();
this_state.compute_image_table.Invalidate();
}
graphics_image_table.Invalidate();
compute_image_table.Invalidate();
has_deleted_images = true;
}
template <class P>
void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
auto it = image_views.begin();
while (it != image_views.end()) {
auto it = state->image_views.begin();
while (it != state->image_views.end()) {
const auto found = std::ranges::find(removed_views, it->second);
if (found != removed_views.end()) {
it = image_views.erase(it);
it = state->image_views.erase(it);
} else {
++it;
}
@ -1943,7 +1951,7 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
const ImageViewBase& image_view = slot_image_views[id];
const ImageBase& image = slot_images[image_view.image_id];
const Extent3D size = image_view.size;
const auto& regs = maxwell3d.regs;
const auto& regs = maxwell3d->regs;
const auto& scissor = regs.scissor_test[0];
if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
// Images with multiple resources can't be cleared in a single call
@ -1958,4 +1966,61 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
scissor.max_y >= size.height;
}
template <class P>
TextureCache<P>::ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& state) noexcept
: maxwell3d{*state.maxwell_3d}, kepler_compute{*state.kepler_compute},
gpu_memory{*state.memory_manager}, graphics_image_table{gpu_memory},
graphics_sampler_table{gpu_memory}, compute_image_table{gpu_memory}, compute_sampler_table{
gpu_memory} {}
template <class P>
void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
auto new_id = [this, &channel]() {
if (!free_channel_ids.empty()) {
auto id = free_channel_ids.front();
free_channel_ids.pop_front();
new (&channel_storage[id]) ChannelInfo(channel);
return id;
}
channel_storage.emplace_back(channel);
return channel_storage.size() - 1;
}();
channel_map.emplace(channel.bind_id, new_id);
if (current_channel_id != UNSET_CHANNEL) {
state = &channel_storage[current_channel_id];
}
}
/// Bind a channel for execution.
template <class P>
void TextureCache<P>::BindToChannel(s32 id) {
auto it = channel_map.find(id);
ASSERT(it != channel_map.end() && id >= 0);
current_channel_id = it->second;
state = &channel_storage[current_channel_id];
maxwell3d = &state->maxwell3d;
kepler_compute = &state->kepler_compute;
gpu_memory = &state->gpu_memory;
}
/// Erase channel's state.
template <class P>
void TextureCache<P>::EraseChannel(s32 id) {
const auto it = channel_map.find(id);
ASSERT(it != channel_map.end() && id >= 0);
const auto this_id = it->second;
free_channel_ids.push_back(this_id);
channel_map.erase(it);
if (this_id == current_channel_id) {
current_channel_id = UNSET_CHANNEL;
state = nullptr;
maxwell3d = nullptr;
kepler_compute = nullptr;
gpu_memory = nullptr;
} else if (current_channel_id != UNSET_CHANNEL) {
state = &channel_storage[current_channel_id];
}
}
} // namespace VideoCommon

View file

@ -3,6 +3,8 @@
#pragma once
#include <deque>
#include <limits>
#include <mutex>
#include <span>
#include <type_traits>
@ -26,6 +28,10 @@
#include "video_core/texture_cache/types.h"
#include "video_core/textures/texture.h"
namespace Tegra::Control {
struct ChannelState;
}
namespace VideoCommon {
using Tegra::Texture::SwizzleSource;
@ -58,6 +64,8 @@ class TextureCache {
/// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
static constexpr s64 TARGET_THRESHOLD = 4_GiB;
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
@ -85,8 +93,7 @@ class TextureCache {
};
public:
explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&);
/// Notify the cache that a new frame has been queued
void TickFrame();
@ -171,6 +178,15 @@ public:
[[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept;
/// Create channel state.
void CreateChannel(struct Tegra::Control::ChannelState& channel);
/// Bind a channel for execution.
void BindToChannel(s32 id);
/// Erase channel's state.
void EraseChannel(s32 id);
std::mutex mutex;
private:
@ -338,31 +354,52 @@ private:
u64 GetScaledImageSizeBytes(ImageBase& image);
Runtime& runtime;
struct ChannelInfo {
ChannelInfo() = delete;
ChannelInfo(struct Tegra::Control::ChannelState& state) noexcept;
ChannelInfo(const ChannelInfo& state) = delete;
ChannelInfo& operator=(const ChannelInfo&) = delete;
ChannelInfo(ChannelInfo&& other) noexcept = default;
ChannelInfo& operator=(ChannelInfo&& other) noexcept = default;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
std::vector<SamplerId> graphics_sampler_ids;
std::vector<ImageViewId> graphics_image_view_ids;
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
std::vector<SamplerId> compute_sampler_ids;
std::vector<ImageViewId> compute_image_view_ids;
std::unordered_map<TICEntry, ImageViewId> image_views;
std::unordered_map<TSCEntry, SamplerId> samplers;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
};
std::deque<ChannelInfo> channel_storage;
std::deque<size_t> free_channel_ids;
std::unordered_map<s32, size_t> channel_map;
ChannelInfo* state;
size_t current_channel_id{UNSET_CHANNEL};
VideoCore::RasterizerInterface& rasterizer;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
std::vector<SamplerId> graphics_sampler_ids;
std::vector<ImageViewId> graphics_image_view_ids;
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
std::vector<SamplerId> compute_sampler_ids;
std::vector<ImageViewId> compute_image_view_ids;
Tegra::Engines::Maxwell3D* maxwell3d;
Tegra::Engines::KeplerCompute* kepler_compute;
Tegra::MemoryManager* gpu_memory;
RenderTargets render_targets;
std::unordered_map<TICEntry, ImageViewId> image_views;
std::unordered_map<TSCEntry, SamplerId> samplers;
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
VAddr virtual_invalid_space{};