From 926b80102f1c00675a9f3956258a066bfe0c3642 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 11:10:45 -0400 Subject: [PATCH] shader_ir: Decompile Flow Stack --- src/video_core/shader/control_flow.cpp | 167 +++++++++++++++++++++++-- src/video_core/shader/control_flow.h | 1 + src/video_core/shader/decode.cpp | 46 +++++++ src/video_core/shader/shader_ir.h | 3 + 4 files changed, 206 insertions(+), 11 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index a9de8f814..3af4c6190 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include @@ -16,12 +16,80 @@ using Tegra::Shader::OpCode; constexpr s32 unassigned_branch = -2; +struct ControlStack { + std::array stack; + u32 index{}; + + ControlStack() {} + + ControlStack(const ControlStack& cp) { + index = cp.index; + std::memcpy(stack.data(), cp.stack.data(), index * sizeof(u32)); + } + + bool Compare(const ControlStack& cs) const { + if (index != cs.index) { + return false; + } + return std::memcmp(stack.data(), cs.stack.data(), index * sizeof(u32)) == 0; + } + + bool SoftCompare(const ControlStack& cs) const { + if (index == 0 || cs.index == 0) { + return index == cs.index; + } + return Top() == cs.Top(); + } + + u32 Size() const { + return index; + } + + u32 Top() const { + return stack[index - 1]; + } + + bool Push(u32 address) { + if (index >= 20) { + return false; + } + stack[index] = address; + index++; + return true; + } + + bool Pop() { + if (index == 0) { + return false; + } + index--; + return true; + } +}; + +struct Query { + Query() {} + Query(const Query& q) : address{q.address}, ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} + u32 address; + ControlStack ssy_stack{}; + ControlStack pbk_stack{}; +}; + +struct BlockStack { + BlockStack() = default; + BlockStack(const BlockStack& b) : ssy_stack{b.ssy_stack}, pbk_stack{b.pbk_stack} {} + BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} + ControlStack ssy_stack{}; + ControlStack pbk_stack{}; +}; + struct BlockBranchInfo { Condition condition{}; s32 address{exit_branch}; bool kill{}; bool is_sync{}; bool is_brk{}; + bool ignore{}; }; struct BlockInfo { @@ -64,19 +132,21 @@ struct CFGRebuildState { // queries.clear(); block_info.clear(); labels.clear(); - visited_address.clear(); + registered.clear(); ssy_labels.clear(); pbk_labels.clear(); inspect_queries.clear(); + queries.clear(); } std::vector block_info{}; std::list inspect_queries{}; - // std::list queries{}; - std::unordered_set visited_address{}; + std::list queries{}; + std::unordered_map registered{}; std::unordered_set labels{}; std::set ssy_labels; std::set pbk_labels; + std::unordered_map stacks{}; const ProgramCode& program_code; const std::size_t program_size; }; @@ -107,7 +177,8 @@ BlockInfo* CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { auto& it = state.block_info.emplace_back(); it.start = start; it.end = end; - state.visited_address.insert(start); + u32 index = state.block_info.size() - 1; + state.registered.insert({start, index}); return ⁢ } @@ -136,10 +207,12 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info while (true) { if (offset >= end_address) { parse_info.branch_info.address = exit_branch; + parse_info.branch_info.ignore = false; break; } - if (state.visited_address.count(offset) != 0) { + if (state.registered.count(offset) != 0) { parse_info.branch_info.address = offset; + parse_info.branch_info.ignore = true; break; } const Instruction instr = {state.program_code[offset]}; @@ -168,6 +241,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.kill = false; parse_info.branch_info.is_sync = false; parse_info.branch_info.is_brk = false; + parse_info.branch_info.ignore = false; parse_info.end_address = offset; return ParseResult::ControlCaught; @@ -199,6 +273,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.kill = false; parse_info.branch_info.is_sync = false; parse_info.branch_info.is_brk = false; + parse_info.branch_info.ignore = false; parse_info.end_address = offset; return ParseResult::ControlCaught; @@ -222,6 +297,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.kill = false; parse_info.branch_info.is_sync = true; parse_info.branch_info.is_brk = false; + parse_info.branch_info.ignore = false; parse_info.end_address = offset; return ParseResult::ControlCaught; @@ -245,6 +321,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.kill = false; parse_info.branch_info.is_sync = false; parse_info.branch_info.is_brk = true; + parse_info.branch_info.ignore = false; parse_info.end_address = offset; return ParseResult::ControlCaught; @@ -268,6 +345,7 @@ ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info parse_info.branch_info.kill = true; parse_info.branch_info.is_sync = false; parse_info.branch_info.is_brk = false; + parse_info.branch_info.ignore = false; parse_info.end_address = offset; return ParseResult::ControlCaught; @@ -322,6 +400,7 @@ bool TryInspectAddress(CFGRebuildState& state) { block_info->branch = it->branch; BlockBranchInfo forward_branch{}; forward_branch.address = address; + forward_branch.ignore = true; it->branch = forward_branch; return true; break; @@ -348,6 +427,58 @@ bool TryInspectAddress(CFGRebuildState& state) { return true; } +bool TryQuery(CFGRebuildState& state) { + auto gather_labels = ([](ControlStack& cc, std::set labels, BlockInfo& block) { + Stamp start{block.start, 0}; + Stamp end{block.end, 0}; + auto gather_start = labels.lower_bound(start); + auto gather_end = labels.upper_bound(end); + while (gather_start != gather_end) { + cc.Push(gather_start->target); + gather_start++; + } + }); + if (state.queries.empty()) { + return false; + } + Query& q = state.queries.front(); + u32 block_index = state.registered[q.address]; + BlockInfo& block = state.block_info[block_index]; + if (block.visited) { + BlockStack& stack = state.stacks[q.address]; + bool all_okay = q.ssy_stack.Compare(stack.ssy_stack) && q.pbk_stack.Compare(stack.pbk_stack); + state.queries.pop_front(); + return all_okay; + } + block.visited = true; + BlockStack bs{q}; + state.stacks[q.address] = bs; + Query q2(q); + state.queries.pop_front(); + gather_labels(q2.ssy_stack, state.ssy_labels, block); + gather_labels(q2.pbk_stack, state.pbk_labels, block); + if (!block.branch.condition.IsUnconditional()) { + q2.address = block.end + 1; + state.queries.push_back(q2); + } + Query conditional_query{q2}; + if (block.branch.is_sync) { + if (block.branch.address == unassigned_branch) { + block.branch.address = conditional_query.ssy_stack.Top(); + } + conditional_query.ssy_stack.Pop(); + } + if (block.branch.is_brk) { + if (block.branch.address == unassigned_branch) { + block.branch.address = conditional_query.pbk_stack.Top(); + } + conditional_query.pbk_stack.Pop(); + } + conditional_query.address = block.branch.address; + state.queries.push_back(conditional_query); + return true; +} + bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address, ShaderCharacteristics& result_out) { CFGRebuildState state{program_code, program_size}; @@ -360,20 +491,34 @@ bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_addre return false; } } + // Decompile Stacks + Query start_query{}; + start_query.address = start_address; + state.queries.push_back(start_query); + bool decompiled = true; + while (!state.queries.empty()) { + if (!TryQuery(state)) { + decompiled = false; + break; + } + } + // Sort and organize results std::sort(state.block_info.begin(), state.block_info.end(), [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); - // Remove unvisited blocks result_out.blocks.clear(); - result_out.decompilable = false; + result_out.decompilable = decompiled; result_out.start = start_address; result_out.end = start_address; for (auto& block : state.block_info) { ShaderBlock new_block{}; new_block.start = block.start; new_block.end = block.end; - new_block.branch.cond = block.branch.condition; - new_block.branch.kills = block.branch.kill; - new_block.branch.address = block.branch.address; + new_block.ignore_branch = block.branch.ignore; + if (!new_block.ignore_branch) { + new_block.branch.cond = block.branch.condition; + new_block.branch.kills = block.branch.kill; + new_block.branch.address = block.branch.address; + } result_out.end = std::max(result_out.end, block.end); result_out.blocks.push_back(new_block); } diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 16736d57a..f5d37a231 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -29,6 +29,7 @@ struct ShaderBlock { ShaderBlock(const ShaderBlock& sb) = default; u32 start{}; u32 end{}; + bool ignore_branch{}; struct Branch { Condition cond{}; bool kills{}; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 09f55bd21..1a74b70cb 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -44,6 +44,17 @@ void ShaderIR::Decode() { coverage_begin = shader_info.start; coverage_end = shader_info.end; if (shader_info.decompilable) { + std::list& blocks = shader_info.blocks; + for (auto& block : blocks) { + NodeBlock nodes; + if (!block.ignore_branch) { + nodes = DecodeRange(block.start, block.end); + InsertControlFlow(nodes, block); + } else { + nodes = DecodeRange(block.start, block.end + 1); + } + basic_blocks.insert({block.start, nodes}); + } return; } // we can't decompile it, fallback to standard method @@ -73,6 +84,41 @@ NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { return basic_block; } +void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { + auto apply_conditions = ([&](const Condition& cond, Node n) -> Node { + Node result = n; + if (cond.cc != ConditionCode::T) { + result = Conditional(GetConditionCode(cond.cc), {result}); + } + if (cond.predicate != Pred::UnusedIndex) { + u32 pred = static_cast(cond.predicate); + bool is_neg = pred > 7; + if (is_neg) + pred -= 8; + result = Conditional(GetPredicate(pred, is_neg), {result}); + } + return result; + }); + if (block.branch.address < 0) { + if (block.branch.kills) { + Node n = Operation(OperationCode::Discard); + n = apply_conditions(block.branch.cond, n); + bb.push_back(n); + global_code.push_back(n); + return; + } + Node n = Operation(OperationCode::Exit); + n = apply_conditions(block.branch.cond, n); + bb.push_back(n); + global_code.push_back(n); + return; + } + Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); + n = apply_conditions(block.branch.cond, n); + bb.push_back(n); + global_code.push_back(n); +} + u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { // Ignore sched instructions when generating code. if (IsSchedInstruction(pc, main_offset)) { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index a67d4f390..a6729064b 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -22,6 +22,8 @@ namespace VideoCommon::Shader { +struct ShaderBlock; + using ProgramCode = std::vector; constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; @@ -125,6 +127,7 @@ private: void Decode(); NodeBlock DecodeRange(u32 begin, u32 end); + void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); /** * Decodes a single instruction from Tegra to IR.