mirror of
https://github.com/NaC-L/Mergen.git
synced 2026-05-12 09:40:34 +00:00
Recover structured loop lifting safely
This commit is contained in:
@@ -65,7 +65,7 @@ Use `simple/protected381/*`.
|
||||
- These are the best local VMP-style performance targets.
|
||||
- Continue profiling semantics/memory/folder helpers there.
|
||||
- For large control-flow/semantics/inlining changes, run `python test.py vmp` from repo root. That command now fails required targets on diagnostics errors **or** `blocks_completed == 0`, while still reporting the older VMP 3.6 sample as best-effort only.
|
||||
- The current safe configuration keeps loop-header generalization disabled and relies on a higher basic-block budget for the stable 3.8.x samples.
|
||||
- The current safe configuration allows structured loop-header reuse only for reducible conditional/direct-jump headers (including short trampoline chains into a conditional header); indirect-jump and `ret` dispatcher-style loops remain blocked, and required 3.8.x samples must still validate via `python test.py vmp`.
|
||||
|
||||
### If the goal is older protected/VMP 3.6 support
|
||||
Use `simple/protected/simple_target_protected.vmp.exe`, but do not treat it as a normal instruction-semantics-only problem.
|
||||
|
||||
@@ -131,11 +131,11 @@ Samples without a `semantic` field are not tested. The `semantic` field is optio
|
||||
|
||||
### Coverage summary
|
||||
|
||||
Current active quick-gate semantic coverage is **30 samples / 165 cases**.
|
||||
Current active quick-gate semantic coverage is **33 samples / 177 cases**.
|
||||
|
||||
Notable current state:
|
||||
- `dummy_vm_loop` and `bytecode_vm_loop` remain active VM-shaped control-flow samples.
|
||||
- `stack_vm_loop` and `calc_sum_to_n` are currently marked `skip` because the safe VMP configuration disables loop-header generalization and these samples still exceed the block budget without it.
|
||||
- `dummy_vm_loop`, `bytecode_vm_loop`, and `stack_vm_loop` are active VM-shaped control-flow samples.
|
||||
- `calc_sum_to_n` is active again under the safe structured-loop recovery path.
|
||||
- `calc_cout` remains `ci_skip` because its C++ codegen is toolchain-dependent on CI.
|
||||
|
||||
## Call-boundary ABI framework
|
||||
|
||||
+1
-1
@@ -33,7 +33,7 @@ Mergen is a function-level LLVM IR lifting engine for deobfuscation and devirtua
|
||||
|
||||
## Quality Contract
|
||||
- Handler coverage: 112/115 handlers with oracle-backed verification
|
||||
- Active regression corpus: 30 semantic samples / 165 runtime semantic cases; `stack_vm_loop` and `calc_sum_to_n` are tracked known limitations, and `calc_cout` remains CI-skipped because its C++ codegen is toolchain-dependent
|
||||
- Active regression corpus: 33 semantic samples / 177 runtime semantic cases; structured loop recovery now keeps `calc_sum_to_n` and `stack_vm_loop` active, and `calc_cout` remains CI-skipped because its C++ codegen is toolchain-dependent
|
||||
- Determinism: golden IR hashes are enforced for tracked outputs
|
||||
- CI gates: register/flag correctness, rewrite baseline, semantic regression, and Windows build lanes
|
||||
- Targeted VMP gate: `python test.py vmp` must keep required 3.8.x targets at `blocks_completed > 0`; VMP 3.6 remains best-effort only
|
||||
|
||||
@@ -59,24 +59,24 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(PATH_info)::solvePath(
|
||||
visitedAddresses.contains(target) &&
|
||||
target <= blockInfo.block_address;
|
||||
auto it = addrToBB.find(target);
|
||||
const bool generalizedHeaderLooksSimple =
|
||||
it == addrToBB.end() || !it->second || llvm::pred_size(it->second) <= 1;
|
||||
const bool pendingGeneralization =
|
||||
pendingLoopGeneralizationAddresses.contains(target);
|
||||
const bool wantsGeneralization =
|
||||
currentPathSolveAllowsLoopGeneralization() &&
|
||||
generalizedHeaderLooksSimple &&
|
||||
!generalizedLoopAddresses.contains(target) &&
|
||||
backwardVisitedTarget;
|
||||
pendingGeneralization ||
|
||||
(backwardVisitedTarget && canGeneralizeStructuredLoopHeader(target));
|
||||
if (wantsGeneralization) {
|
||||
if (currentPathSolveContext == PathSolveContext::DirectJump) {
|
||||
stackBypassGeneralizedLoopAddresses.insert(target);
|
||||
}
|
||||
const bool generalizedBackup =
|
||||
stackBypassGeneralizedLoopAddresses.contains(target);
|
||||
if (pendingLoopGeneralizationAddresses.contains(target) &&
|
||||
it != addrToBB.end() && it->second && it->second->empty()) {
|
||||
if (pendingGeneralization && it != addrToBB.end() && it->second &&
|
||||
it->second->empty()) {
|
||||
return {it->second, false, generalizedBackup};
|
||||
}
|
||||
pendingLoopGeneralizationAddresses.insert(target);
|
||||
if (!pendingGeneralization) {
|
||||
pendingLoopGeneralizationAddresses.insert(target);
|
||||
}
|
||||
if (it != addrToBB.end() && it->second && !it->second->empty()) {
|
||||
return {replaceWithGeneralizedLoopBlock(target, name), false,
|
||||
generalizedBackup};
|
||||
|
||||
@@ -503,11 +503,49 @@ public:
|
||||
bool currentBlockUsesGeneralizedLoopState() const {
|
||||
return currentBlockRestoreMode == BlockRestoreMode::GeneralizedLoop;
|
||||
}
|
||||
bool currentPathSolveAllowsLoopGeneralization() const {
|
||||
// Disabled until the generalized loop heuristics can preserve real VMP exit
|
||||
// paths without collapsing required 3.8.x targets into non-terminating IR.
|
||||
bool currentPathSolveAllowsStructuredLoopGeneralization() const {
|
||||
return currentPathSolveContext == PathSolveContext::ConditionalBranch ||
|
||||
currentPathSolveContext == PathSolveContext::DirectJump;
|
||||
}
|
||||
bool isStructuredLoopHeaderShape(BasicBlock* block) const {
|
||||
std::set<BasicBlock*> seenBlocks;
|
||||
auto* current = block;
|
||||
for (unsigned depth = 0; current && depth < 8; ++depth) {
|
||||
if (!seenBlocks.insert(current).second || current->empty()) {
|
||||
return false;
|
||||
}
|
||||
const size_t maxPreds = depth == 0 ? 2 : 1;
|
||||
if (llvm::pred_size(current) > maxPreds) {
|
||||
return false;
|
||||
}
|
||||
auto* branch = llvm::dyn_cast<llvm::BranchInst>(current->getTerminator());
|
||||
if (!branch) {
|
||||
return false;
|
||||
}
|
||||
if (branch->isConditional()) {
|
||||
return true;
|
||||
}
|
||||
if (branch->getNumSuccessors() != 1) {
|
||||
return false;
|
||||
}
|
||||
current = branch->getSuccessor(0);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool canGeneralizeStructuredLoopHeader(uint64_t addr) {
|
||||
if (getControlFlow() != ControlFlow::Unflatten ||
|
||||
!currentPathSolveAllowsStructuredLoopGeneralization() ||
|
||||
addr > blockInfo.block_address || !visitedAddresses.contains(addr) ||
|
||||
pendingLoopGeneralizationAddresses.contains(addr) ||
|
||||
generalizedLoopAddresses.contains(addr)) {
|
||||
return false;
|
||||
}
|
||||
auto it = addrToBB.find(addr);
|
||||
if (it == addrToBB.end() || !it->second || it->second->empty()) {
|
||||
return false;
|
||||
}
|
||||
return isStructuredLoopHeaderShape(it->second);
|
||||
}
|
||||
|
||||
void liftBasicBlockFromAddress(uint64_t addr) {
|
||||
++liftStats.blocks_attempted;
|
||||
@@ -757,7 +795,7 @@ public:
|
||||
|
||||
BasicBlock* getLiftedBackedgeBB(uint64_t addr) {
|
||||
if (getControlFlow() != ControlFlow::Unflatten ||
|
||||
!currentPathSolveAllowsLoopGeneralization()) {
|
||||
!currentPathSolveAllowsStructuredLoopGeneralization()) {
|
||||
return nullptr;
|
||||
}
|
||||
if (addr > blockInfo.block_address ||
|
||||
|
||||
+195
-8
@@ -412,11 +412,24 @@ private:
|
||||
}
|
||||
|
||||
|
||||
bool runLoopGeneralizationDirectJumpBlocked(std::string& details) {
|
||||
bool runLoopGeneralizationConditionalBranchAllowed(std::string& details) {
|
||||
LifterUnderTest lifter;
|
||||
lifter.currentPathSolveContext =
|
||||
LifterUnderTest::PathSolveContext::ConditionalBranch;
|
||||
if (!lifter.currentPathSolveAllowsStructuredLoopGeneralization()) {
|
||||
details =
|
||||
" conditional-branch loop context should allow structured loop-header reuse\n";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool runLoopGeneralizationDirectJumpAllowed(std::string& details) {
|
||||
LifterUnderTest lifter;
|
||||
lifter.currentPathSolveContext = LifterUnderTest::PathSolveContext::DirectJump;
|
||||
if (lifter.currentPathSolveAllowsLoopGeneralization()) {
|
||||
details = " direct-jump loop context must stay disabled until VMP-safe generalization exists\n";
|
||||
if (!lifter.currentPathSolveAllowsStructuredLoopGeneralization()) {
|
||||
details =
|
||||
" direct-jump latch context should allow structured loop-header reuse\n";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@@ -424,9 +437,170 @@ private:
|
||||
|
||||
bool runLoopGeneralizationIndirectJumpBlocked(std::string& details) {
|
||||
LifterUnderTest lifter;
|
||||
lifter.currentPathSolveContext = LifterUnderTest::PathSolveContext::IndirectJump;
|
||||
if (lifter.currentPathSolveAllowsLoopGeneralization()) {
|
||||
details = " indirect-jump dispatcher context must not generalize loop state\n";
|
||||
lifter.currentPathSolveContext =
|
||||
LifterUnderTest::PathSolveContext::IndirectJump;
|
||||
if (lifter.currentPathSolveAllowsStructuredLoopGeneralization()) {
|
||||
details =
|
||||
" indirect-jump dispatcher context must not generalize loop state\n";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool runLoopGeneralizationRetBlocked(std::string& details) {
|
||||
LifterUnderTest lifter;
|
||||
lifter.currentPathSolveContext = LifterUnderTest::PathSolveContext::Ret;
|
||||
if (lifter.currentPathSolveAllowsStructuredLoopGeneralization()) {
|
||||
details = " return-path loop context must not generalize loop state\n";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool runStructuredLoopHeaderAllowsConditionalBackedge(std::string& details) {
|
||||
LifterUnderTest lifter;
|
||||
lifter.currentPathSolveContext =
|
||||
LifterUnderTest::PathSolveContext::ConditionalBranch;
|
||||
|
||||
auto* current = llvm::BasicBlock::Create(lifter.context, "current", lifter.fnc);
|
||||
auto* header = llvm::BasicBlock::Create(lifter.context, "loop_header", lifter.fnc);
|
||||
auto* body = llvm::BasicBlock::Create(lifter.context, "loop_body", lifter.fnc);
|
||||
auto* exit = llvm::BasicBlock::Create(lifter.context, "loop_exit", lifter.fnc);
|
||||
|
||||
llvm::IRBuilder<> currentBuilder(current);
|
||||
currentBuilder.CreateBr(header);
|
||||
|
||||
llvm::IRBuilder<> headerBuilder(header);
|
||||
headerBuilder.CreateCondBr(llvm::ConstantInt::getTrue(lifter.context), body, exit);
|
||||
|
||||
llvm::IRBuilder<> bodyBuilder(body);
|
||||
bodyBuilder.CreateRet(llvm::ConstantInt::get(llvm::Type::getInt64Ty(lifter.context), 0));
|
||||
|
||||
llvm::IRBuilder<> exitBuilder(exit);
|
||||
exitBuilder.CreateRet(llvm::ConstantInt::get(llvm::Type::getInt64Ty(lifter.context), 1));
|
||||
|
||||
lifter.blockInfo = BBInfo(0x2000, current);
|
||||
lifter.visitedAddresses.insert(0x1000);
|
||||
lifter.addrToBB[0x1000] = header;
|
||||
|
||||
if (!lifter.canGeneralizeStructuredLoopHeader(0x1000)) {
|
||||
details = " visited conditional loop header should be eligible for structured reuse\n";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool runStructuredLoopHeaderAllowsJumpChain(std::string& details) {
|
||||
LifterUnderTest lifter;
|
||||
lifter.currentPathSolveContext =
|
||||
LifterUnderTest::PathSolveContext::DirectJump;
|
||||
|
||||
auto* current = llvm::BasicBlock::Create(lifter.context, "current", lifter.fnc);
|
||||
auto* trampoline =
|
||||
llvm::BasicBlock::Create(lifter.context, "loop_trampoline", lifter.fnc);
|
||||
auto* header = llvm::BasicBlock::Create(lifter.context, "loop_header", lifter.fnc);
|
||||
auto* body = llvm::BasicBlock::Create(lifter.context, "loop_body", lifter.fnc);
|
||||
auto* exit = llvm::BasicBlock::Create(lifter.context, "loop_exit", lifter.fnc);
|
||||
|
||||
llvm::IRBuilder<> currentBuilder(current);
|
||||
currentBuilder.CreateBr(trampoline);
|
||||
|
||||
llvm::IRBuilder<> trampolineBuilder(trampoline);
|
||||
trampolineBuilder.CreateBr(header);
|
||||
|
||||
llvm::IRBuilder<> headerBuilder(header);
|
||||
headerBuilder.CreateCondBr(llvm::ConstantInt::getTrue(lifter.context), body, exit);
|
||||
|
||||
llvm::IRBuilder<> bodyBuilder(body);
|
||||
bodyBuilder.CreateRet(
|
||||
llvm::ConstantInt::get(llvm::Type::getInt64Ty(lifter.context), 0));
|
||||
|
||||
llvm::IRBuilder<> exitBuilder(exit);
|
||||
exitBuilder.CreateRet(
|
||||
llvm::ConstantInt::get(llvm::Type::getInt64Ty(lifter.context), 1));
|
||||
|
||||
lifter.blockInfo = BBInfo(0x2000, current);
|
||||
lifter.visitedAddresses.insert(0x1000);
|
||||
lifter.addrToBB[0x1000] = trampoline;
|
||||
|
||||
if (!lifter.canGeneralizeStructuredLoopHeader(0x1000)) {
|
||||
details =
|
||||
" direct-jump trampoline chain into a conditional header should be eligible for structured reuse\n";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool runStructuredLoopHeaderRejectsNonConditionalTerminator(
|
||||
std::string& details) {
|
||||
LifterUnderTest lifter;
|
||||
lifter.currentPathSolveContext =
|
||||
LifterUnderTest::PathSolveContext::ConditionalBranch;
|
||||
|
||||
auto* current = llvm::BasicBlock::Create(lifter.context, "current", lifter.fnc);
|
||||
auto* header = llvm::BasicBlock::Create(lifter.context, "loop_header", lifter.fnc);
|
||||
auto* exit = llvm::BasicBlock::Create(lifter.context, "loop_exit", lifter.fnc);
|
||||
|
||||
llvm::IRBuilder<> currentBuilder(current);
|
||||
currentBuilder.CreateBr(header);
|
||||
|
||||
llvm::IRBuilder<> headerBuilder(header);
|
||||
headerBuilder.CreateBr(exit);
|
||||
|
||||
llvm::IRBuilder<> exitBuilder(exit);
|
||||
exitBuilder.CreateRet(llvm::ConstantInt::get(llvm::Type::getInt64Ty(lifter.context), 0));
|
||||
|
||||
lifter.blockInfo = BBInfo(0x2000, current);
|
||||
lifter.visitedAddresses.insert(0x1000);
|
||||
lifter.addrToBB[0x1000] = header;
|
||||
|
||||
if (lifter.canGeneralizeStructuredLoopHeader(0x1000)) {
|
||||
details = " non-conditional header must not be treated as a structured loop header\n";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool runStructuredLoopHeaderRejectsMultiplePredecessors(std::string& details) {
|
||||
LifterUnderTest lifter;
|
||||
lifter.currentPathSolveContext =
|
||||
LifterUnderTest::PathSolveContext::ConditionalBranch;
|
||||
|
||||
auto* current = llvm::BasicBlock::Create(lifter.context, "current", lifter.fnc);
|
||||
auto* alternate = llvm::BasicBlock::Create(lifter.context, "alternate", lifter.fnc);
|
||||
auto* third = llvm::BasicBlock::Create(lifter.context, "third", lifter.fnc);
|
||||
auto* header = llvm::BasicBlock::Create(lifter.context, "loop_header", lifter.fnc);
|
||||
auto* body = llvm::BasicBlock::Create(lifter.context, "loop_body", lifter.fnc);
|
||||
auto* exit = llvm::BasicBlock::Create(lifter.context, "loop_exit", lifter.fnc);
|
||||
|
||||
llvm::IRBuilder<> currentBuilder(current);
|
||||
currentBuilder.CreateBr(header);
|
||||
|
||||
llvm::IRBuilder<> alternateBuilder(alternate);
|
||||
alternateBuilder.CreateBr(header);
|
||||
|
||||
llvm::IRBuilder<> thirdBuilder(third);
|
||||
thirdBuilder.CreateBr(header);
|
||||
|
||||
llvm::IRBuilder<> headerBuilder(header);
|
||||
headerBuilder.CreateCondBr(llvm::ConstantInt::getTrue(lifter.context), body, exit);
|
||||
|
||||
llvm::IRBuilder<> bodyBuilder(body);
|
||||
bodyBuilder.CreateRet(
|
||||
llvm::ConstantInt::get(llvm::Type::getInt64Ty(lifter.context), 0));
|
||||
|
||||
llvm::IRBuilder<> exitBuilder(exit);
|
||||
exitBuilder.CreateRet(
|
||||
llvm::ConstantInt::get(llvm::Type::getInt64Ty(lifter.context), 1));
|
||||
|
||||
lifter.blockInfo = BBInfo(0x2000, current);
|
||||
lifter.visitedAddresses.insert(0x1000);
|
||||
lifter.addrToBB[0x1000] = header;
|
||||
|
||||
if (lifter.canGeneralizeStructuredLoopHeader(0x1000)) {
|
||||
details =
|
||||
" header with more than two predecessors must not be generalized as a structured loop\n";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@@ -537,10 +711,23 @@ private:
|
||||
&InstructionTester::runScasRepeatPrefixesRejected);
|
||||
runCustom("loop_addrsize_override_rejected",
|
||||
&InstructionTester::runLoopAddressSizeOverrideRejected);
|
||||
runCustom("loop_generalization_direct_jump_blocked",
|
||||
&InstructionTester::runLoopGeneralizationDirectJumpBlocked);
|
||||
runCustom("loop_generalization_conditional_branch_allowed",
|
||||
&InstructionTester::runLoopGeneralizationConditionalBranchAllowed);
|
||||
runCustom("loop_generalization_direct_jump_allowed",
|
||||
&InstructionTester::runLoopGeneralizationDirectJumpAllowed);
|
||||
runCustom("loop_generalization_indirect_jump_blocked",
|
||||
&InstructionTester::runLoopGeneralizationIndirectJumpBlocked);
|
||||
runCustom("loop_generalization_ret_blocked",
|
||||
&InstructionTester::runLoopGeneralizationRetBlocked);
|
||||
runCustom("structured_loop_header_allows_conditional_backedge",
|
||||
&InstructionTester::runStructuredLoopHeaderAllowsConditionalBackedge);
|
||||
runCustom("structured_loop_header_allows_jump_chain",
|
||||
&InstructionTester::runStructuredLoopHeaderAllowsJumpChain);
|
||||
|
||||
runCustom("structured_loop_header_rejects_non_conditional_terminator",
|
||||
&InstructionTester::runStructuredLoopHeaderRejectsNonConditionalTerminator);
|
||||
runCustom("structured_loop_header_rejects_multiple_predecessors",
|
||||
&InstructionTester::runStructuredLoopHeaderRejectsMultiplePredecessors);
|
||||
runCustom("generalized_loop_without_bypass_tag_keeps_normal_restore",
|
||||
&InstructionTester::runGeneralizedLoopWithoutBypassTagKeepsNormalRestore);
|
||||
runCustom("generalized_loop_with_bypass_tag_uses_generalized_restore",
|
||||
|
||||
@@ -100,9 +100,9 @@
|
||||
"patterns": [
|
||||
{ "line_all": ["and i32", ", 1"] },
|
||||
{ "line_all": ["and i32", ", 7"] },
|
||||
"phi i64",
|
||||
"select i1",
|
||||
"icmp ugt i32",
|
||||
"phi i32",
|
||||
"add i32",
|
||||
"sub i32",
|
||||
"br i1"
|
||||
],
|
||||
"semantic": [
|
||||
@@ -120,10 +120,10 @@
|
||||
"patterns": [
|
||||
{ "line_all": ["and i32", ", 1"] },
|
||||
{ "line_all": ["and i32", ", 7"] },
|
||||
"select i1",
|
||||
"phi i64",
|
||||
"icmp ugt i32",
|
||||
"br i1"
|
||||
"phi i32",
|
||||
"add i32",
|
||||
"sub i32",
|
||||
"ret i64 %common.ret.op"
|
||||
],
|
||||
"semantic": [
|
||||
{ "inputs": { "RCX": 0 }, "expected": 40, "label": "even program returns constant handler" },
|
||||
@@ -137,17 +137,14 @@
|
||||
{
|
||||
"name": "stack_vm_loop",
|
||||
"symbol": "stack_vm_loop_target",
|
||||
"skip": true,
|
||||
"skip_reason": "Safe VMP-mode lifting currently disables loop-header generalization; this stack-based VM loop still exceeds the block budget without it.",
|
||||
"patterns": [
|
||||
{ "line_all": ["and i32", ", 1"] },
|
||||
{ "line_all": ["and i32", ", 7"] },
|
||||
"switch i32",
|
||||
"phi i32",
|
||||
"sub i32",
|
||||
"select i1",
|
||||
"add i32",
|
||||
"ret i64"
|
||||
"add nsw i32",
|
||||
"mul i33",
|
||||
"lshr exact i33",
|
||||
"ret i64 %common.ret.op"
|
||||
],
|
||||
"semantic": [
|
||||
{ "inputs": { "RCX": 0 }, "expected": 40, "label": "even program returns constant handler" },
|
||||
@@ -265,8 +262,6 @@
|
||||
{
|
||||
"name": "calc_sum_to_n",
|
||||
"symbol": "calc_sum_to_n",
|
||||
"skip": true,
|
||||
"skip_reason": "Safe VMP-mode lifting currently disables loop-header generalization; this counted-loop sample still explodes the block budget without it.",
|
||||
"patterns": ["phi i32", "icmp slt i32", "add i32", "br i1"],
|
||||
"semantic": [
|
||||
{ "inputs": { "RCX": 0 }, "expected": 0, "label": "n=0" },
|
||||
|
||||
Reference in New Issue
Block a user