529 lines
20 KiB
C++
529 lines
20 KiB
C++
// Copyright 2021 yuzu Emulator Project
|
|
// Licensed under GPLv2 or any later version
|
|
// Refer to the license.txt file included.
|
|
|
|
#include <algorithm>
|
|
#include <compare>
|
|
#include <optional>
|
|
#include <ranges>
|
|
#include <queue>
|
|
|
|
#include <boost/container/flat_set.hpp>
|
|
#include <boost/container/small_vector.hpp>
|
|
|
|
#include "common/alignment.h"
|
|
#include "shader_recompiler/frontend/ir/basic_block.h"
|
|
#include "shader_recompiler/frontend/ir/breadth_first_search.h"
|
|
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
|
#include "shader_recompiler/frontend/ir/value.h"
|
|
#include "shader_recompiler/ir_opt/passes.h"
|
|
|
|
namespace Shader::Optimization {
|
|
namespace {
|
|
/// Address in constant buffers to the storage buffer descriptor
|
|
struct StorageBufferAddr {
|
|
auto operator<=>(const StorageBufferAddr&) const noexcept = default;
|
|
|
|
u32 index;
|
|
u32 offset;
|
|
};
|
|
|
|
/// Block iterator to a global memory instruction and the storage buffer it uses
|
|
struct StorageInst {
|
|
StorageBufferAddr storage_buffer;
|
|
IR::Inst* inst;
|
|
IR::Block* block;
|
|
};
|
|
|
|
/// Bias towards a certain range of constant buffers when looking for storage buffers
|
|
struct Bias {
|
|
u32 index;
|
|
u32 offset_begin;
|
|
u32 offset_end;
|
|
};
|
|
|
|
using boost::container::flat_set;
|
|
using boost::container::small_vector;
|
|
using StorageBufferSet =
|
|
flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
|
|
using StorageInstVector = small_vector<StorageInst, 24>;
|
|
using StorageWritesSet =
|
|
flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
|
|
|
|
struct StorageInfo {
|
|
StorageBufferSet set;
|
|
StorageInstVector to_replace;
|
|
StorageWritesSet writes;
|
|
};
|
|
|
|
/// Returns true when the instruction is a global memory instruction
|
|
bool IsGlobalMemory(const IR::Inst& inst) {
|
|
switch (inst.GetOpcode()) {
|
|
case IR::Opcode::LoadGlobalS8:
|
|
case IR::Opcode::LoadGlobalU8:
|
|
case IR::Opcode::LoadGlobalS16:
|
|
case IR::Opcode::LoadGlobalU16:
|
|
case IR::Opcode::LoadGlobal32:
|
|
case IR::Opcode::LoadGlobal64:
|
|
case IR::Opcode::LoadGlobal128:
|
|
case IR::Opcode::WriteGlobalS8:
|
|
case IR::Opcode::WriteGlobalU8:
|
|
case IR::Opcode::WriteGlobalS16:
|
|
case IR::Opcode::WriteGlobalU16:
|
|
case IR::Opcode::WriteGlobal32:
|
|
case IR::Opcode::WriteGlobal64:
|
|
case IR::Opcode::WriteGlobal128:
|
|
case IR::Opcode::GlobalAtomicIAdd32:
|
|
case IR::Opcode::GlobalAtomicSMin32:
|
|
case IR::Opcode::GlobalAtomicUMin32:
|
|
case IR::Opcode::GlobalAtomicSMax32:
|
|
case IR::Opcode::GlobalAtomicUMax32:
|
|
case IR::Opcode::GlobalAtomicInc32:
|
|
case IR::Opcode::GlobalAtomicDec32:
|
|
case IR::Opcode::GlobalAtomicAnd32:
|
|
case IR::Opcode::GlobalAtomicOr32:
|
|
case IR::Opcode::GlobalAtomicXor32:
|
|
case IR::Opcode::GlobalAtomicExchange32:
|
|
case IR::Opcode::GlobalAtomicIAdd64:
|
|
case IR::Opcode::GlobalAtomicSMin64:
|
|
case IR::Opcode::GlobalAtomicUMin64:
|
|
case IR::Opcode::GlobalAtomicSMax64:
|
|
case IR::Opcode::GlobalAtomicUMax64:
|
|
case IR::Opcode::GlobalAtomicAnd64:
|
|
case IR::Opcode::GlobalAtomicOr64:
|
|
case IR::Opcode::GlobalAtomicXor64:
|
|
case IR::Opcode::GlobalAtomicExchange64:
|
|
case IR::Opcode::GlobalAtomicAddF32:
|
|
case IR::Opcode::GlobalAtomicAddF16x2:
|
|
case IR::Opcode::GlobalAtomicAddF32x2:
|
|
case IR::Opcode::GlobalAtomicMinF16x2:
|
|
case IR::Opcode::GlobalAtomicMinF32x2:
|
|
case IR::Opcode::GlobalAtomicMaxF16x2:
|
|
case IR::Opcode::GlobalAtomicMaxF32x2:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/// Returns true when the instruction is a global memory instruction
|
|
bool IsGlobalMemoryWrite(const IR::Inst& inst) {
|
|
switch (inst.GetOpcode()) {
|
|
case IR::Opcode::WriteGlobalS8:
|
|
case IR::Opcode::WriteGlobalU8:
|
|
case IR::Opcode::WriteGlobalS16:
|
|
case IR::Opcode::WriteGlobalU16:
|
|
case IR::Opcode::WriteGlobal32:
|
|
case IR::Opcode::WriteGlobal64:
|
|
case IR::Opcode::WriteGlobal128:
|
|
case IR::Opcode::GlobalAtomicIAdd32:
|
|
case IR::Opcode::GlobalAtomicSMin32:
|
|
case IR::Opcode::GlobalAtomicUMin32:
|
|
case IR::Opcode::GlobalAtomicSMax32:
|
|
case IR::Opcode::GlobalAtomicUMax32:
|
|
case IR::Opcode::GlobalAtomicInc32:
|
|
case IR::Opcode::GlobalAtomicDec32:
|
|
case IR::Opcode::GlobalAtomicAnd32:
|
|
case IR::Opcode::GlobalAtomicOr32:
|
|
case IR::Opcode::GlobalAtomicXor32:
|
|
case IR::Opcode::GlobalAtomicExchange32:
|
|
case IR::Opcode::GlobalAtomicIAdd64:
|
|
case IR::Opcode::GlobalAtomicSMin64:
|
|
case IR::Opcode::GlobalAtomicUMin64:
|
|
case IR::Opcode::GlobalAtomicSMax64:
|
|
case IR::Opcode::GlobalAtomicUMax64:
|
|
case IR::Opcode::GlobalAtomicAnd64:
|
|
case IR::Opcode::GlobalAtomicOr64:
|
|
case IR::Opcode::GlobalAtomicXor64:
|
|
case IR::Opcode::GlobalAtomicExchange64:
|
|
case IR::Opcode::GlobalAtomicAddF32:
|
|
case IR::Opcode::GlobalAtomicAddF16x2:
|
|
case IR::Opcode::GlobalAtomicAddF32x2:
|
|
case IR::Opcode::GlobalAtomicMinF16x2:
|
|
case IR::Opcode::GlobalAtomicMinF32x2:
|
|
case IR::Opcode::GlobalAtomicMaxF16x2:
|
|
case IR::Opcode::GlobalAtomicMaxF32x2:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/// Converts a global memory opcode to its storage buffer equivalent
|
|
IR::Opcode GlobalToStorage(IR::Opcode opcode) {
|
|
switch (opcode) {
|
|
case IR::Opcode::LoadGlobalS8:
|
|
return IR::Opcode::LoadStorageS8;
|
|
case IR::Opcode::LoadGlobalU8:
|
|
return IR::Opcode::LoadStorageU8;
|
|
case IR::Opcode::LoadGlobalS16:
|
|
return IR::Opcode::LoadStorageS16;
|
|
case IR::Opcode::LoadGlobalU16:
|
|
return IR::Opcode::LoadStorageU16;
|
|
case IR::Opcode::LoadGlobal32:
|
|
return IR::Opcode::LoadStorage32;
|
|
case IR::Opcode::LoadGlobal64:
|
|
return IR::Opcode::LoadStorage64;
|
|
case IR::Opcode::LoadGlobal128:
|
|
return IR::Opcode::LoadStorage128;
|
|
case IR::Opcode::WriteGlobalS8:
|
|
return IR::Opcode::WriteStorageS8;
|
|
case IR::Opcode::WriteGlobalU8:
|
|
return IR::Opcode::WriteStorageU8;
|
|
case IR::Opcode::WriteGlobalS16:
|
|
return IR::Opcode::WriteStorageS16;
|
|
case IR::Opcode::WriteGlobalU16:
|
|
return IR::Opcode::WriteStorageU16;
|
|
case IR::Opcode::WriteGlobal32:
|
|
return IR::Opcode::WriteStorage32;
|
|
case IR::Opcode::WriteGlobal64:
|
|
return IR::Opcode::WriteStorage64;
|
|
case IR::Opcode::WriteGlobal128:
|
|
return IR::Opcode::WriteStorage128;
|
|
case IR::Opcode::GlobalAtomicIAdd32:
|
|
return IR::Opcode::StorageAtomicIAdd32;
|
|
case IR::Opcode::GlobalAtomicSMin32:
|
|
return IR::Opcode::StorageAtomicSMin32;
|
|
case IR::Opcode::GlobalAtomicUMin32:
|
|
return IR::Opcode::StorageAtomicUMin32;
|
|
case IR::Opcode::GlobalAtomicSMax32:
|
|
return IR::Opcode::StorageAtomicSMax32;
|
|
case IR::Opcode::GlobalAtomicUMax32:
|
|
return IR::Opcode::StorageAtomicUMax32;
|
|
case IR::Opcode::GlobalAtomicInc32:
|
|
return IR::Opcode::StorageAtomicInc32;
|
|
case IR::Opcode::GlobalAtomicDec32:
|
|
return IR::Opcode::StorageAtomicDec32;
|
|
case IR::Opcode::GlobalAtomicAnd32:
|
|
return IR::Opcode::StorageAtomicAnd32;
|
|
case IR::Opcode::GlobalAtomicOr32:
|
|
return IR::Opcode::StorageAtomicOr32;
|
|
case IR::Opcode::GlobalAtomicXor32:
|
|
return IR::Opcode::StorageAtomicXor32;
|
|
case IR::Opcode::GlobalAtomicIAdd64:
|
|
return IR::Opcode::StorageAtomicIAdd64;
|
|
case IR::Opcode::GlobalAtomicSMin64:
|
|
return IR::Opcode::StorageAtomicSMin64;
|
|
case IR::Opcode::GlobalAtomicUMin64:
|
|
return IR::Opcode::StorageAtomicUMin64;
|
|
case IR::Opcode::GlobalAtomicSMax64:
|
|
return IR::Opcode::StorageAtomicSMax64;
|
|
case IR::Opcode::GlobalAtomicUMax64:
|
|
return IR::Opcode::StorageAtomicUMax64;
|
|
case IR::Opcode::GlobalAtomicAnd64:
|
|
return IR::Opcode::StorageAtomicAnd64;
|
|
case IR::Opcode::GlobalAtomicOr64:
|
|
return IR::Opcode::StorageAtomicOr64;
|
|
case IR::Opcode::GlobalAtomicXor64:
|
|
return IR::Opcode::StorageAtomicXor64;
|
|
case IR::Opcode::GlobalAtomicExchange32:
|
|
return IR::Opcode::StorageAtomicExchange32;
|
|
case IR::Opcode::GlobalAtomicExchange64:
|
|
return IR::Opcode::StorageAtomicExchange64;
|
|
case IR::Opcode::GlobalAtomicAddF32:
|
|
return IR::Opcode::StorageAtomicAddF32;
|
|
case IR::Opcode::GlobalAtomicAddF16x2:
|
|
return IR::Opcode::StorageAtomicAddF16x2;
|
|
case IR::Opcode::GlobalAtomicMinF16x2:
|
|
return IR::Opcode::StorageAtomicMinF16x2;
|
|
case IR::Opcode::GlobalAtomicMaxF16x2:
|
|
return IR::Opcode::StorageAtomicMaxF16x2;
|
|
case IR::Opcode::GlobalAtomicAddF32x2:
|
|
return IR::Opcode::StorageAtomicAddF32x2;
|
|
case IR::Opcode::GlobalAtomicMinF32x2:
|
|
return IR::Opcode::StorageAtomicMinF32x2;
|
|
case IR::Opcode::GlobalAtomicMaxF32x2:
|
|
return IR::Opcode::StorageAtomicMaxF32x2;
|
|
default:
|
|
throw InvalidArgument("Invalid global memory opcode {}", opcode);
|
|
}
|
|
}
|
|
|
|
/// Returns true when a storage buffer address satisfies a bias
|
|
bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
|
|
return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
|
|
storage_buffer.offset < bias.offset_end;
|
|
}
|
|
|
|
struct LowAddrInfo {
|
|
IR::U32 value;
|
|
s32 imm_offset;
|
|
};
|
|
|
|
/// Tries to track the first 32-bits of a global memory instruction
|
|
std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
|
|
// The first argument is the low level GPU pointer to the global memory instruction
|
|
const IR::U64 addr{inst->Arg(0)};
|
|
if (addr.IsImmediate()) {
|
|
// Not much we can do if it's an immediate
|
|
return std::nullopt;
|
|
}
|
|
// This address is expected to either be a PackUint2x32 or a IAdd64
|
|
IR::Inst* addr_inst{addr.InstRecursive()};
|
|
s32 imm_offset{0};
|
|
if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) {
|
|
// If it's an IAdd64, get the immediate offset it is applying and grab the address
|
|
// instruction. This expects for the instruction to be canonicalized having the address on
|
|
// the first argument and the immediate offset on the second one.
|
|
const IR::U64 imm_offset_value{addr_inst->Arg(1)};
|
|
if (!imm_offset_value.IsImmediate()) {
|
|
return std::nullopt;
|
|
}
|
|
imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64()));
|
|
const IR::U64 iadd_addr{addr_inst->Arg(0)};
|
|
if (iadd_addr.IsImmediate()) {
|
|
return std::nullopt;
|
|
}
|
|
addr_inst = iadd_addr.Inst();
|
|
}
|
|
// With IAdd64 handled, now PackUint2x32 is expected without exceptions
|
|
if (addr_inst->GetOpcode() != IR::Opcode::PackUint2x32) {
|
|
return std::nullopt;
|
|
}
|
|
// PackUint2x32 is expected to be generated from a vector
|
|
const IR::Value vector{addr_inst->Arg(0)};
|
|
if (vector.IsImmediate()) {
|
|
return std::nullopt;
|
|
}
|
|
// This vector is expected to be a CompositeConstructU32x2
|
|
IR::Inst* const vector_inst{vector.InstRecursive()};
|
|
if (vector_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) {
|
|
return std::nullopt;
|
|
}
|
|
// Grab the first argument from the CompositeConstructU32x2, this is the low address.
|
|
return LowAddrInfo{
|
|
.value{IR::U32{vector_inst->Arg(0)}},
|
|
.imm_offset = imm_offset,
|
|
};
|
|
}
|
|
|
|
/// Tries to track the storage buffer address used by a global memory instruction
|
|
std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
|
|
const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
|
|
if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
|
|
return std::nullopt;
|
|
}
|
|
const IR::Value index{inst->Arg(0)};
|
|
const IR::Value offset{inst->Arg(1)};
|
|
if (!index.IsImmediate()) {
|
|
// Definitely not a storage buffer if it's read from a
|
|
// non-immediate index
|
|
return std::nullopt;
|
|
}
|
|
if (!offset.IsImmediate()) {
|
|
// TODO: Support SSBO arrays
|
|
return std::nullopt;
|
|
}
|
|
const StorageBufferAddr storage_buffer{
|
|
.index{index.U32()},
|
|
.offset{offset.U32()},
|
|
};
|
|
if (!Common::IsAligned(storage_buffer.offset, 16)) {
|
|
// The SSBO pointer has to be aligned
|
|
return std::nullopt;
|
|
}
|
|
if (bias && !MeetsBias(storage_buffer, *bias)) {
|
|
// We have to blacklist some addresses in case we wrongly
|
|
// point to them
|
|
return std::nullopt;
|
|
}
|
|
return storage_buffer;
|
|
}};
|
|
return BreadthFirstSearch(value, pred);
|
|
}
|
|
|
|
/// Collects the storage buffer used by a global memory instruction and the instruction itself
|
|
void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) {
|
|
// NVN puts storage buffers in a specific range, we have to bias towards these addresses to
|
|
// avoid getting false positives
|
|
static constexpr Bias nvn_bias{
|
|
.index = 0,
|
|
.offset_begin = 0x110,
|
|
.offset_end = 0x610,
|
|
};
|
|
// Track the low address of the instruction
|
|
const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
|
|
if (!low_addr_info) {
|
|
// Failed to track the low address, use NVN fallbacks
|
|
return;
|
|
}
|
|
// First try to find storage buffers in the NVN address
|
|
const IR::U32 low_addr{low_addr_info->value};
|
|
std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
|
|
if (!storage_buffer) {
|
|
// If it fails, track without a bias
|
|
storage_buffer = Track(low_addr, nullptr);
|
|
if (!storage_buffer) {
|
|
// If that also fails, use NVN fallbacks
|
|
return;
|
|
}
|
|
}
|
|
// Collect storage buffer and the instruction
|
|
if (IsGlobalMemoryWrite(inst)) {
|
|
info.writes.insert(*storage_buffer);
|
|
}
|
|
info.set.insert(*storage_buffer);
|
|
info.to_replace.push_back(StorageInst{
|
|
.storage_buffer{*storage_buffer},
|
|
.inst = &inst,
|
|
.block = &block,
|
|
});
|
|
}
|
|
|
|
/// Returns the offset in indices (not bytes) for an equivalent storage instruction
|
|
IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
|
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
|
IR::U32 offset;
|
|
if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
|
|
offset = low_addr->value;
|
|
if (low_addr->imm_offset != 0) {
|
|
offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset));
|
|
}
|
|
} else {
|
|
offset = ir.UConvert(32, IR::U64{inst.Arg(0)});
|
|
}
|
|
// Subtract the least significant 32 bits from the guest offset. The result is the storage
|
|
// buffer offset in bytes.
|
|
const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
|
|
return ir.ISub(offset, low_cbuf);
|
|
}
|
|
|
|
/// Replace a global memory load instruction with its storage buffer equivalent
|
|
void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
|
const IR::U32& offset) {
|
|
const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
|
|
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
|
const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})};
|
|
inst.ReplaceUsesWith(value);
|
|
}
|
|
|
|
/// Replace a global memory write instruction with its storage buffer equivalent
|
|
void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
|
const IR::U32& offset) {
|
|
const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
|
|
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
|
block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)});
|
|
inst.Invalidate();
|
|
}
|
|
|
|
/// Replace an atomic operation on global memory instruction with its storage buffer equivalent
|
|
void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
|
const IR::U32& offset) {
|
|
const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
|
|
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
|
const IR::Value value{
|
|
&*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})};
|
|
inst.ReplaceUsesWith(value);
|
|
}
|
|
|
|
/// Replace a global memory instruction with its storage buffer equivalent
|
|
void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
|
const IR::U32& offset) {
|
|
switch (inst.GetOpcode()) {
|
|
case IR::Opcode::LoadGlobalS8:
|
|
case IR::Opcode::LoadGlobalU8:
|
|
case IR::Opcode::LoadGlobalS16:
|
|
case IR::Opcode::LoadGlobalU16:
|
|
case IR::Opcode::LoadGlobal32:
|
|
case IR::Opcode::LoadGlobal64:
|
|
case IR::Opcode::LoadGlobal128:
|
|
return ReplaceLoad(block, inst, storage_index, offset);
|
|
case IR::Opcode::WriteGlobalS8:
|
|
case IR::Opcode::WriteGlobalU8:
|
|
case IR::Opcode::WriteGlobalS16:
|
|
case IR::Opcode::WriteGlobalU16:
|
|
case IR::Opcode::WriteGlobal32:
|
|
case IR::Opcode::WriteGlobal64:
|
|
case IR::Opcode::WriteGlobal128:
|
|
return ReplaceWrite(block, inst, storage_index, offset);
|
|
case IR::Opcode::GlobalAtomicIAdd32:
|
|
case IR::Opcode::GlobalAtomicSMin32:
|
|
case IR::Opcode::GlobalAtomicUMin32:
|
|
case IR::Opcode::GlobalAtomicSMax32:
|
|
case IR::Opcode::GlobalAtomicUMax32:
|
|
case IR::Opcode::GlobalAtomicInc32:
|
|
case IR::Opcode::GlobalAtomicDec32:
|
|
case IR::Opcode::GlobalAtomicAnd32:
|
|
case IR::Opcode::GlobalAtomicOr32:
|
|
case IR::Opcode::GlobalAtomicXor32:
|
|
case IR::Opcode::GlobalAtomicExchange32:
|
|
case IR::Opcode::GlobalAtomicIAdd64:
|
|
case IR::Opcode::GlobalAtomicSMin64:
|
|
case IR::Opcode::GlobalAtomicUMin64:
|
|
case IR::Opcode::GlobalAtomicSMax64:
|
|
case IR::Opcode::GlobalAtomicUMax64:
|
|
case IR::Opcode::GlobalAtomicAnd64:
|
|
case IR::Opcode::GlobalAtomicOr64:
|
|
case IR::Opcode::GlobalAtomicXor64:
|
|
case IR::Opcode::GlobalAtomicExchange64:
|
|
case IR::Opcode::GlobalAtomicAddF32:
|
|
case IR::Opcode::GlobalAtomicAddF16x2:
|
|
case IR::Opcode::GlobalAtomicAddF32x2:
|
|
case IR::Opcode::GlobalAtomicMinF16x2:
|
|
case IR::Opcode::GlobalAtomicMinF32x2:
|
|
case IR::Opcode::GlobalAtomicMaxF16x2:
|
|
case IR::Opcode::GlobalAtomicMaxF32x2:
|
|
return ReplaceAtomic(block, inst, storage_index, offset);
|
|
default:
|
|
throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
|
|
}
|
|
}
|
|
} // Anonymous namespace
|
|
|
|
void GlobalMemoryToStorageBufferPass(IR::Program& program) {
|
|
StorageInfo info;
|
|
for (IR::Block* const block : program.post_order_blocks) {
|
|
for (IR::Inst& inst : block->Instructions()) {
|
|
if (!IsGlobalMemory(inst)) {
|
|
continue;
|
|
}
|
|
CollectStorageBuffers(*block, inst, info);
|
|
}
|
|
}
|
|
for (const StorageBufferAddr& storage_buffer : info.set) {
|
|
program.info.storage_buffers_descriptors.push_back({
|
|
.cbuf_index = storage_buffer.index,
|
|
.cbuf_offset = storage_buffer.offset,
|
|
.count = 1,
|
|
.is_written{info.writes.contains(storage_buffer)},
|
|
});
|
|
}
|
|
for (const StorageInst& storage_inst : info.to_replace) {
|
|
const StorageBufferAddr storage_buffer{storage_inst.storage_buffer};
|
|
const auto it{info.set.find(storage_inst.storage_buffer)};
|
|
const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
|
|
IR::Block* const block{storage_inst.block};
|
|
IR::Inst* const inst{storage_inst.inst};
|
|
const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
|
|
Replace(*block, *inst, index, offset);
|
|
}
|
|
}
|
|
|
|
template <typename Descriptors, typename Descriptor, typename Func>
|
|
static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
|
|
// TODO: Handle arrays
|
|
const auto it{std::ranges::find_if(descriptors, pred)};
|
|
if (it != descriptors.end()) {
|
|
return static_cast<u32>(std::distance(descriptors.begin(), it));
|
|
}
|
|
descriptors.push_back(desc);
|
|
return static_cast<u32>(descriptors.size()) - 1;
|
|
}
|
|
|
|
void JoinStorageInfo(Info& base, Info& source) {
|
|
auto& descriptors = base.storage_buffers_descriptors;
|
|
for (auto& desc : source.storage_buffers_descriptors) {
|
|
auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) {
|
|
return desc.cbuf_index == existing.cbuf_index &&
|
|
desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count;
|
|
})};
|
|
if (it != descriptors.end()) {
|
|
it->is_written |= desc.is_written;
|
|
continue;
|
|
}
|
|
descriptors.push_back(desc);
|
|
}
|
|
}
|
|
|
|
} // namespace Shader::Optimization
|