From 20f80b672ee67b096c22490e7e9c987d60f71715 Mon Sep 17 00:00:00 2001 From: naci Date: Sun, 19 Apr 2026 19:31:41 +0300 Subject: [PATCH] lifter: prefer file-backed queued targets (#105) The next Themida follow-up after PR #104 exposed another low-target alias problem: queued/control-flow destinations like 0x52532 were treated as already paged because the synthetic stack range covered them, so they never widened to their file-backed image RVA forms. This patch splits target normalization into two policies: PathSolver keeps the broad paged normalization it needs for resolved loop/control-flow work, while getOrCreateBB/getUnvisitedAddr use a stricter file-backed normalization that prefers image-backed addresses over low stack aliases. It also queues the fake indirect-call return targets in Semantics.ipp so those destinations actually enter the worklist. Observed effect on example2-virt.bin @ 0x140001000: 24 attempted / 1086 instructions -> 35 attempted / 1565 instructions, with new reached addresses in the 0x14001xxxx and 0x14002xxxx ranges. Verification: - build_iced lifter rewrite_microtests - rewrite_microtests.exe solve_path_widens_mapped_rva_target normalize_runtime_target_widens_mapped_rva_target solve_load_infers_concrete_base_from_tracked_load generalized_loop_restore_merges_backedge_register_state - python test.py quick - python test.py vmp - build_iced\lifter.exe ..\testthemida\example2-virt.bin 0x140001000 Co-authored-by: yusufcanislek --- lifter/analysis/PathSolver.ipp | 20 +------ lifter/core/LifterClass.hpp | 64 ++++++++++++++++++++++ lifter/semantics/Semantics.ipp | 11 ++-- lifter/semantics/Semantics_ControlFlow.ipp | 15 +++-- lifter/test/Tester.hpp | 20 +++++++ 5 files changed, 100 insertions(+), 30 deletions(-) diff --git a/lifter/analysis/PathSolver.ipp b/lifter/analysis/PathSolver.ipp index 3f7c5e5..7099662 100644 --- a/lifter/analysis/PathSolver.ipp +++ b/lifter/analysis/PathSolver.ipp @@ -27,25 +27,7 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(PATH_info)::solvePath( // (from different branch paths), so cached results don't carry over. pv_cache.clear(); auto normalizeTargetAddress = [&](uint64_t target) -> uint64_t { - if (isMemPaged(target)) { - return target; - } - - if (target <= std::numeric_limits::max() && - file.imageBase > std::numeric_limits::max()) { - const uint64_t highBits = file.imageBase & 0xFFFFFFFF00000000ULL; - const uint64_t widenedLow32 = highBits | target; - if (isMemPaged(widenedLow32)) { - return widenedLow32; - } - - const uint64_t widenedRva = file.imageBase + target; - if (isMemPaged(widenedRva)) { - return widenedRva; - } - } - - return target; + return normalizeRuntimeTargetAddress(target); }; struct ResolvedTargetBlock { diff --git a/lifter/core/LifterClass.hpp b/lifter/core/LifterClass.hpp index 9d1dd58..ed13d4f 100644 --- a/lifter/core/LifterClass.hpp +++ b/lifter/core/LifterClass.hpp @@ -823,6 +823,13 @@ public: out = std::move(unvisitedBlocks.back()); unvisitedBlocks.pop_back(); + const uint64_t normalizedAddr = + normalizeFileBackedRuntimeTargetAddress(out.block_address); + if (normalizedAddr != out.block_address) { + addrToBB[normalizedAddr] = out.block; + out.block_address = normalizedAddr; + } + // In Basic mode, skip blocks that already have instructions // (they were processed in a previous iteration). if (getControlFlow() == ControlFlow::Basic && !out.block->empty() && @@ -1017,6 +1024,7 @@ public: BasicBlock* getOrCreateBB(uint64_t addr, std::string name) { + addr = normalizeFileBackedRuntimeTargetAddress(addr); if (getControlFlow() == ControlFlow::Basic) { auto it = addrToBB.find(addr); if (it != addrToBB.end()) { @@ -1024,6 +1032,14 @@ public: return it->second; } } + if (getControlFlow() == ControlFlow::Unflatten) { + auto it = addrToBB.find(addr); + if (it != addrToBB.end() && it->second && !it->second->empty() && + liftProgressDiagEnabled) { + std::cout << "[diag] overwriting existing bb for 0x" << std::hex << addr + << std::dec << " old=" << it->second->getName().str() << "\n"; + } + } auto bb = createBudgetedBasicBlock(name, addr); if (bb == liftAbortBlock) { return bb; @@ -1333,6 +1349,54 @@ public: --it; return address >= it->first && address < it->second; + + } + + bool isFileBackedRuntimeAddress(uint64_t address) { + uint64_t ignored = 0; + return file.readMemory(address, 1, ignored); + } + + uint64_t normalizeRuntimeTargetAddress(uint64_t target) { + if (isMemPaged(target)) { + return target; + } + + if (target <= std::numeric_limits::max() && + file.imageBase > std::numeric_limits::max()) { + const uint64_t highBits = file.imageBase & 0xFFFFFFFF00000000ULL; + const uint64_t widenedLow32 = highBits | target; + const uint64_t widenedRva = file.imageBase + target; + if (isMemPaged(widenedLow32)) { + return widenedLow32; + } + if (isMemPaged(widenedRva)) { + return widenedRva; + } + } + + return target; + } + + uint64_t normalizeFileBackedRuntimeTargetAddress(uint64_t target) { + if (isFileBackedRuntimeAddress(target)) { + return target; + } + + if (target <= std::numeric_limits::max() && + file.imageBase > std::numeric_limits::max()) { + const uint64_t highBits = file.imageBase & 0xFFFFFFFF00000000ULL; + const uint64_t widenedLow32 = highBits | target; + const uint64_t widenedRva = file.imageBase + target; + if (isFileBackedRuntimeAddress(widenedLow32)) { + return widenedLow32; + } + if (isFileBackedRuntimeAddress(widenedRva)) { + return widenedRva; + } + } + + return target; } std::set diff --git a/lifter/semantics/Semantics.ipp b/lifter/semantics/Semantics.ipp index b08342b..301d7ce 100644 --- a/lifter/semantics/Semantics.ipp +++ b/lifter/semantics/Semantics.ipp @@ -156,12 +156,14 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::liftInstruction() { return; } auto RIP_value = cast(next_jump); - auto jump_address = RIP_value->getZExtValue(); + auto jump_address = + normalizeRuntimeTargetAddress(RIP_value->getZExtValue()); auto bb = getOrCreateBB(jump_address, "bb_call"); builder->CreateBr(bb); blockInfo = BBInfo(jump_address, bb); + addUnvisitedAddr(blockInfo); run = 0; return; } @@ -181,10 +183,7 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::liftInstruction() { STACKP_VALUE) { printvalueforce2(jump_address); - // TODO: ideally remove this part - auto bb = getOrCreateBB(jump_address, "bb_indirectly_called"); // actually call the function first - auto functionName = file.getName(jump_address); debugging::doIfDebug([&]() { outs() << "calling : " << functionName @@ -198,11 +197,13 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::liftInstruction() { // get [rsp], jump there auto RIP_value = cast(next_jump); - jump_address = RIP_value->getZExtValue(); + jump_address = normalizeRuntimeTargetAddress(RIP_value->getZExtValue()); + auto bb = getOrCreateBB(jump_address, "bb_indirectly_called"); builder->CreateBr(bb); blockInfo = BBInfo(jump_address, bb); + addUnvisitedAddr(blockInfo); run = 0; return; } diff --git a/lifter/semantics/Semantics_ControlFlow.ipp b/lifter/semantics/Semantics_ControlFlow.ipp index 168766a..676723d 100644 --- a/lifter/semantics/Semantics_ControlFlow.ipp +++ b/lifter/semantics/Semantics_ControlFlow.ipp @@ -219,12 +219,15 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::lift_call() { break; } auto registerCValue = cast(registerValue); - if (inlinePolicy.isOutline(registerCValue->getZExtValue()) || - shouldOutlineCall(registerCValue->getZExtValue())) { + uint64_t rawTargetAddr = registerCValue->getZExtValue(); + uint64_t normalizedTargetAddr = normalizeRuntimeTargetAddress(rawTargetAddr); + auto* normalizedTargetValue = + builder->getIntN(registerCValue->getBitWidth(), normalizedTargetAddr); + if (inlinePolicy.isOutline(normalizedTargetAddr) || + shouldOutlineCall(normalizedTargetAddr)) { // --- Emit external call (outlined known-address target) --- - uint64_t targetAddr = registerCValue->getZExtValue(); - auto importName = resolveImportName(targetAddr); + auto importName = resolveImportName(normalizedTargetAddr); if (!importName.empty()) { // Named import: emit a proper LLVM function declaration. @@ -242,7 +245,7 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::lift_call() { fx.target = CallTargetClass::UnknownDirect; auto idltvm = builder->CreateIntToPtr( - registerValue, PointerType::get(context, 0)); + normalizedTargetValue, PointerType::get(context, 0)); auto callResult = builder->CreateCall( parseArgsType(nullptr, context), idltvm, parseArgs(nullptr)); @@ -255,7 +258,7 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::lift_call() { emittedExternalCall = true; break; } - jump_address = registerCValue->getZExtValue(); + jump_address = normalizedTargetAddr; break; } default: diff --git a/lifter/test/Tester.hpp b/lifter/test/Tester.hpp index 64e6544..9755e7f 100644 --- a/lifter/test/Tester.hpp +++ b/lifter/test/Tester.hpp @@ -1070,6 +1070,24 @@ private: } + bool runNormalizeRuntimeTargetWidensMappedRvaTarget(std::string& details) { + LifterUnderTest lifter; + lifter.file.imageBase = 0x140000000ULL; + lifter.markMemPaged(0x140052532ULL, 0x140052540ULL); + const uint64_t normalized = lifter.normalizeRuntimeTargetAddress(0x52532ULL); + if (normalized != 0x140052532ULL) { + std::ostringstream os; + os << " normalizeRuntimeTargetAddress widened to 0x" << std::hex + << normalized << " instead of mapped RVA target 0x140052532\n"; + details = os.str(); + return false; + } + return true; + } + + + + bool runGeneralizedLoopRestoreMergesBackedgeRegisterState( std::string& details) { LifterUnderTest lifter; @@ -1244,6 +1262,8 @@ private: &InstructionTester::runSolveLoadInfersConcreteBaseFromTrackedLoad); runCustom("solve_path_widens_mapped_rva_target", &InstructionTester::runSolvePathWidensMappedRvaTarget); + runCustom("normalize_runtime_target_widens_mapped_rva_target", + &InstructionTester::runNormalizeRuntimeTargetWidensMappedRvaTarget); return failures; }