lifter: prefer file-backed queued targets (#105)

The next Themida follow-up after PR #104 exposed another low-target alias problem: queued/control-flow destinations like 0x52532 were treated as already paged because the synthetic stack range covered them, so they never widened to their file-backed image RVA forms.

This patch splits target normalization into two policies: PathSolver keeps the broad paged normalization it needs for resolved loop/control-flow work, while getOrCreateBB/getUnvisitedAddr use a stricter file-backed normalization that prefers image-backed addresses over low stack aliases. It also queues the fake indirect-call return targets in Semantics.ipp so those destinations actually enter the worklist.

Observed effect on example2-virt.bin @ 0x140001000: 24 attempted / 1086 instructions -> 35 attempted / 1565 instructions, with new reached addresses in the 0x14001xxxx and 0x14002xxxx ranges.

Verification:

- build_iced lifter rewrite_microtests

- rewrite_microtests.exe solve_path_widens_mapped_rva_target normalize_runtime_target_widens_mapped_rva_target solve_load_infers_concrete_base_from_tracked_load generalized_loop_restore_merges_backedge_register_state

- python test.py quick

- python test.py vmp

- build_iced\lifter.exe ..\testthemida\example2-virt.bin 0x140001000

Co-authored-by: yusufcanislek <yusuf.canislek@meetdandy.com>
This commit is contained in:
naci
2026-04-19 19:31:41 +03:00
committed by GitHub
parent 563c36c060
commit 20f80b672e
5 changed files with 100 additions and 30 deletions
+1 -19
View File
@@ -27,25 +27,7 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(PATH_info)::solvePath(
// (from different branch paths), so cached results don't carry over.
pv_cache.clear();
auto normalizeTargetAddress = [&](uint64_t target) -> uint64_t {
if (isMemPaged(target)) {
return target;
}
if (target <= std::numeric_limits<uint32_t>::max() &&
file.imageBase > std::numeric_limits<uint32_t>::max()) {
const uint64_t highBits = file.imageBase & 0xFFFFFFFF00000000ULL;
const uint64_t widenedLow32 = highBits | target;
if (isMemPaged(widenedLow32)) {
return widenedLow32;
}
const uint64_t widenedRva = file.imageBase + target;
if (isMemPaged(widenedRva)) {
return widenedRva;
}
}
return target;
return normalizeRuntimeTargetAddress(target);
};
struct ResolvedTargetBlock {
+64
View File
@@ -823,6 +823,13 @@ public:
out = std::move(unvisitedBlocks.back());
unvisitedBlocks.pop_back();
const uint64_t normalizedAddr =
normalizeFileBackedRuntimeTargetAddress(out.block_address);
if (normalizedAddr != out.block_address) {
addrToBB[normalizedAddr] = out.block;
out.block_address = normalizedAddr;
}
// In Basic mode, skip blocks that already have instructions
// (they were processed in a previous iteration).
if (getControlFlow() == ControlFlow::Basic && !out.block->empty() &&
@@ -1017,6 +1024,7 @@ public:
BasicBlock* getOrCreateBB(uint64_t addr, std::string name) {
addr = normalizeFileBackedRuntimeTargetAddress(addr);
if (getControlFlow() == ControlFlow::Basic) {
auto it = addrToBB.find(addr);
if (it != addrToBB.end()) {
@@ -1024,6 +1032,14 @@ public:
return it->second;
}
}
if (getControlFlow() == ControlFlow::Unflatten) {
auto it = addrToBB.find(addr);
if (it != addrToBB.end() && it->second && !it->second->empty() &&
liftProgressDiagEnabled) {
std::cout << "[diag] overwriting existing bb for 0x" << std::hex << addr
<< std::dec << " old=" << it->second->getName().str() << "\n";
}
}
auto bb = createBudgetedBasicBlock(name, addr);
if (bb == liftAbortBlock) {
return bb;
@@ -1333,6 +1349,54 @@ public:
--it;
return address >= it->first && address < it->second;
}
bool isFileBackedRuntimeAddress(uint64_t address) {
uint64_t ignored = 0;
return file.readMemory(address, 1, ignored);
}
uint64_t normalizeRuntimeTargetAddress(uint64_t target) {
if (isMemPaged(target)) {
return target;
}
if (target <= std::numeric_limits<uint32_t>::max() &&
file.imageBase > std::numeric_limits<uint32_t>::max()) {
const uint64_t highBits = file.imageBase & 0xFFFFFFFF00000000ULL;
const uint64_t widenedLow32 = highBits | target;
const uint64_t widenedRva = file.imageBase + target;
if (isMemPaged(widenedLow32)) {
return widenedLow32;
}
if (isMemPaged(widenedRva)) {
return widenedRva;
}
}
return target;
}
uint64_t normalizeFileBackedRuntimeTargetAddress(uint64_t target) {
if (isFileBackedRuntimeAddress(target)) {
return target;
}
if (target <= std::numeric_limits<uint32_t>::max() &&
file.imageBase > std::numeric_limits<uint32_t>::max()) {
const uint64_t highBits = file.imageBase & 0xFFFFFFFF00000000ULL;
const uint64_t widenedLow32 = highBits | target;
const uint64_t widenedRva = file.imageBase + target;
if (isFileBackedRuntimeAddress(widenedLow32)) {
return widenedLow32;
}
if (isFileBackedRuntimeAddress(widenedRva)) {
return widenedRva;
}
}
return target;
}
std::set<llvm::APInt, APIntComparator>
+6 -5
View File
@@ -156,12 +156,14 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::liftInstruction() {
return;
}
auto RIP_value = cast<ConstantInt>(next_jump);
auto jump_address = RIP_value->getZExtValue();
auto jump_address =
normalizeRuntimeTargetAddress(RIP_value->getZExtValue());
auto bb = getOrCreateBB(jump_address, "bb_call");
builder->CreateBr(bb);
blockInfo = BBInfo(jump_address, bb);
addUnvisitedAddr(blockInfo);
run = 0;
return;
}
@@ -181,10 +183,7 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::liftInstruction() {
STACKP_VALUE) {
printvalueforce2(jump_address);
// TODO: ideally remove this part
auto bb = getOrCreateBB(jump_address, "bb_indirectly_called");
// actually call the function first
auto functionName = file.getName(jump_address);
debugging::doIfDebug([&]() {
outs() << "calling : " << functionName
@@ -198,11 +197,13 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::liftInstruction() {
// get [rsp], jump there
auto RIP_value = cast<ConstantInt>(next_jump);
jump_address = RIP_value->getZExtValue();
jump_address = normalizeRuntimeTargetAddress(RIP_value->getZExtValue());
auto bb = getOrCreateBB(jump_address, "bb_indirectly_called");
builder->CreateBr(bb);
blockInfo = BBInfo(jump_address, bb);
addUnvisitedAddr(blockInfo);
run = 0;
return;
}
+9 -6
View File
@@ -219,12 +219,15 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::lift_call() {
break;
}
auto registerCValue = cast<ConstantInt>(registerValue);
if (inlinePolicy.isOutline(registerCValue->getZExtValue()) ||
shouldOutlineCall(registerCValue->getZExtValue())) {
uint64_t rawTargetAddr = registerCValue->getZExtValue();
uint64_t normalizedTargetAddr = normalizeRuntimeTargetAddress(rawTargetAddr);
auto* normalizedTargetValue =
builder->getIntN(registerCValue->getBitWidth(), normalizedTargetAddr);
if (inlinePolicy.isOutline(normalizedTargetAddr) ||
shouldOutlineCall(normalizedTargetAddr)) {
// --- Emit external call (outlined known-address target) ---
uint64_t targetAddr = registerCValue->getZExtValue();
auto importName = resolveImportName(targetAddr);
auto importName = resolveImportName(normalizedTargetAddr);
if (!importName.empty()) {
// Named import: emit a proper LLVM function declaration.
@@ -242,7 +245,7 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::lift_call() {
fx.target = CallTargetClass::UnknownDirect;
auto idltvm = builder->CreateIntToPtr(
registerValue, PointerType::get(context, 0));
normalizedTargetValue, PointerType::get(context, 0));
auto callResult = builder->CreateCall(
parseArgsType(nullptr, context), idltvm, parseArgs(nullptr));
@@ -255,7 +258,7 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::lift_call() {
emittedExternalCall = true;
break;
}
jump_address = registerCValue->getZExtValue();
jump_address = normalizedTargetAddr;
break;
}
default:
+20
View File
@@ -1070,6 +1070,24 @@ private:
}
bool runNormalizeRuntimeTargetWidensMappedRvaTarget(std::string& details) {
LifterUnderTest lifter;
lifter.file.imageBase = 0x140000000ULL;
lifter.markMemPaged(0x140052532ULL, 0x140052540ULL);
const uint64_t normalized = lifter.normalizeRuntimeTargetAddress(0x52532ULL);
if (normalized != 0x140052532ULL) {
std::ostringstream os;
os << " normalizeRuntimeTargetAddress widened to 0x" << std::hex
<< normalized << " instead of mapped RVA target 0x140052532\n";
details = os.str();
return false;
}
return true;
}
bool runGeneralizedLoopRestoreMergesBackedgeRegisterState(
std::string& details) {
LifterUnderTest lifter;
@@ -1244,6 +1262,8 @@ private:
&InstructionTester::runSolveLoadInfersConcreteBaseFromTrackedLoad);
runCustom("solve_path_widens_mapped_rva_target",
&InstructionTester::runSolvePathWidensMappedRvaTarget);
runCustom("normalize_runtime_target_widens_mapped_rva_target",
&InstructionTester::runNormalizeRuntimeTargetWidensMappedRvaTarget);
return failures;
}