mirror of
https://github.com/NaC-L/Mergen.git
synced 2026-05-12 09:40:34 +00:00
lifter: prefer file-backed queued targets (#105)
The next Themida follow-up after PR #104 exposed another low-target alias problem: queued/control-flow destinations like 0x52532 were treated as already paged because the synthetic stack range covered them, so they never widened to their file-backed image RVA forms. This patch splits target normalization into two policies: PathSolver keeps the broad paged normalization it needs for resolved loop/control-flow work, while getOrCreateBB/getUnvisitedAddr use a stricter file-backed normalization that prefers image-backed addresses over low stack aliases. It also queues the fake indirect-call return targets in Semantics.ipp so those destinations actually enter the worklist. Observed effect on example2-virt.bin @ 0x140001000: 24 attempted / 1086 instructions -> 35 attempted / 1565 instructions, with new reached addresses in the 0x14001xxxx and 0x14002xxxx ranges. Verification: - build_iced lifter rewrite_microtests - rewrite_microtests.exe solve_path_widens_mapped_rva_target normalize_runtime_target_widens_mapped_rva_target solve_load_infers_concrete_base_from_tracked_load generalized_loop_restore_merges_backedge_register_state - python test.py quick - python test.py vmp - build_iced\lifter.exe ..\testthemida\example2-virt.bin 0x140001000 Co-authored-by: yusufcanislek <yusuf.canislek@meetdandy.com>
This commit is contained in:
@@ -27,25 +27,7 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(PATH_info)::solvePath(
|
||||
// (from different branch paths), so cached results don't carry over.
|
||||
pv_cache.clear();
|
||||
auto normalizeTargetAddress = [&](uint64_t target) -> uint64_t {
|
||||
if (isMemPaged(target)) {
|
||||
return target;
|
||||
}
|
||||
|
||||
if (target <= std::numeric_limits<uint32_t>::max() &&
|
||||
file.imageBase > std::numeric_limits<uint32_t>::max()) {
|
||||
const uint64_t highBits = file.imageBase & 0xFFFFFFFF00000000ULL;
|
||||
const uint64_t widenedLow32 = highBits | target;
|
||||
if (isMemPaged(widenedLow32)) {
|
||||
return widenedLow32;
|
||||
}
|
||||
|
||||
const uint64_t widenedRva = file.imageBase + target;
|
||||
if (isMemPaged(widenedRva)) {
|
||||
return widenedRva;
|
||||
}
|
||||
}
|
||||
|
||||
return target;
|
||||
return normalizeRuntimeTargetAddress(target);
|
||||
};
|
||||
|
||||
struct ResolvedTargetBlock {
|
||||
|
||||
@@ -823,6 +823,13 @@ public:
|
||||
out = std::move(unvisitedBlocks.back());
|
||||
unvisitedBlocks.pop_back();
|
||||
|
||||
const uint64_t normalizedAddr =
|
||||
normalizeFileBackedRuntimeTargetAddress(out.block_address);
|
||||
if (normalizedAddr != out.block_address) {
|
||||
addrToBB[normalizedAddr] = out.block;
|
||||
out.block_address = normalizedAddr;
|
||||
}
|
||||
|
||||
// In Basic mode, skip blocks that already have instructions
|
||||
// (they were processed in a previous iteration).
|
||||
if (getControlFlow() == ControlFlow::Basic && !out.block->empty() &&
|
||||
@@ -1017,6 +1024,7 @@ public:
|
||||
|
||||
|
||||
BasicBlock* getOrCreateBB(uint64_t addr, std::string name) {
|
||||
addr = normalizeFileBackedRuntimeTargetAddress(addr);
|
||||
if (getControlFlow() == ControlFlow::Basic) {
|
||||
auto it = addrToBB.find(addr);
|
||||
if (it != addrToBB.end()) {
|
||||
@@ -1024,6 +1032,14 @@ public:
|
||||
return it->second;
|
||||
}
|
||||
}
|
||||
if (getControlFlow() == ControlFlow::Unflatten) {
|
||||
auto it = addrToBB.find(addr);
|
||||
if (it != addrToBB.end() && it->second && !it->second->empty() &&
|
||||
liftProgressDiagEnabled) {
|
||||
std::cout << "[diag] overwriting existing bb for 0x" << std::hex << addr
|
||||
<< std::dec << " old=" << it->second->getName().str() << "\n";
|
||||
}
|
||||
}
|
||||
auto bb = createBudgetedBasicBlock(name, addr);
|
||||
if (bb == liftAbortBlock) {
|
||||
return bb;
|
||||
@@ -1333,6 +1349,54 @@ public:
|
||||
|
||||
--it;
|
||||
return address >= it->first && address < it->second;
|
||||
|
||||
}
|
||||
|
||||
bool isFileBackedRuntimeAddress(uint64_t address) {
|
||||
uint64_t ignored = 0;
|
||||
return file.readMemory(address, 1, ignored);
|
||||
}
|
||||
|
||||
uint64_t normalizeRuntimeTargetAddress(uint64_t target) {
|
||||
if (isMemPaged(target)) {
|
||||
return target;
|
||||
}
|
||||
|
||||
if (target <= std::numeric_limits<uint32_t>::max() &&
|
||||
file.imageBase > std::numeric_limits<uint32_t>::max()) {
|
||||
const uint64_t highBits = file.imageBase & 0xFFFFFFFF00000000ULL;
|
||||
const uint64_t widenedLow32 = highBits | target;
|
||||
const uint64_t widenedRva = file.imageBase + target;
|
||||
if (isMemPaged(widenedLow32)) {
|
||||
return widenedLow32;
|
||||
}
|
||||
if (isMemPaged(widenedRva)) {
|
||||
return widenedRva;
|
||||
}
|
||||
}
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
uint64_t normalizeFileBackedRuntimeTargetAddress(uint64_t target) {
|
||||
if (isFileBackedRuntimeAddress(target)) {
|
||||
return target;
|
||||
}
|
||||
|
||||
if (target <= std::numeric_limits<uint32_t>::max() &&
|
||||
file.imageBase > std::numeric_limits<uint32_t>::max()) {
|
||||
const uint64_t highBits = file.imageBase & 0xFFFFFFFF00000000ULL;
|
||||
const uint64_t widenedLow32 = highBits | target;
|
||||
const uint64_t widenedRva = file.imageBase + target;
|
||||
if (isFileBackedRuntimeAddress(widenedLow32)) {
|
||||
return widenedLow32;
|
||||
}
|
||||
if (isFileBackedRuntimeAddress(widenedRva)) {
|
||||
return widenedRva;
|
||||
}
|
||||
}
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
std::set<llvm::APInt, APIntComparator>
|
||||
|
||||
@@ -156,12 +156,14 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::liftInstruction() {
|
||||
return;
|
||||
}
|
||||
auto RIP_value = cast<ConstantInt>(next_jump);
|
||||
auto jump_address = RIP_value->getZExtValue();
|
||||
auto jump_address =
|
||||
normalizeRuntimeTargetAddress(RIP_value->getZExtValue());
|
||||
|
||||
auto bb = getOrCreateBB(jump_address, "bb_call");
|
||||
builder->CreateBr(bb);
|
||||
|
||||
blockInfo = BBInfo(jump_address, bb);
|
||||
addUnvisitedAddr(blockInfo);
|
||||
run = 0;
|
||||
return;
|
||||
}
|
||||
@@ -181,10 +183,7 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::liftInstruction() {
|
||||
STACKP_VALUE) {
|
||||
printvalueforce2(jump_address);
|
||||
|
||||
// TODO: ideally remove this part
|
||||
auto bb = getOrCreateBB(jump_address, "bb_indirectly_called");
|
||||
// actually call the function first
|
||||
|
||||
auto functionName = file.getName(jump_address);
|
||||
debugging::doIfDebug([&]() {
|
||||
outs() << "calling : " << functionName
|
||||
@@ -198,11 +197,13 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::liftInstruction() {
|
||||
|
||||
// get [rsp], jump there
|
||||
auto RIP_value = cast<ConstantInt>(next_jump);
|
||||
jump_address = RIP_value->getZExtValue();
|
||||
jump_address = normalizeRuntimeTargetAddress(RIP_value->getZExtValue());
|
||||
auto bb = getOrCreateBB(jump_address, "bb_indirectly_called");
|
||||
|
||||
builder->CreateBr(bb);
|
||||
|
||||
blockInfo = BBInfo(jump_address, bb);
|
||||
addUnvisitedAddr(blockInfo);
|
||||
run = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -219,12 +219,15 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::lift_call() {
|
||||
break;
|
||||
}
|
||||
auto registerCValue = cast<ConstantInt>(registerValue);
|
||||
if (inlinePolicy.isOutline(registerCValue->getZExtValue()) ||
|
||||
shouldOutlineCall(registerCValue->getZExtValue())) {
|
||||
uint64_t rawTargetAddr = registerCValue->getZExtValue();
|
||||
uint64_t normalizedTargetAddr = normalizeRuntimeTargetAddress(rawTargetAddr);
|
||||
auto* normalizedTargetValue =
|
||||
builder->getIntN(registerCValue->getBitWidth(), normalizedTargetAddr);
|
||||
if (inlinePolicy.isOutline(normalizedTargetAddr) ||
|
||||
shouldOutlineCall(normalizedTargetAddr)) {
|
||||
|
||||
// --- Emit external call (outlined known-address target) ---
|
||||
uint64_t targetAddr = registerCValue->getZExtValue();
|
||||
auto importName = resolveImportName(targetAddr);
|
||||
auto importName = resolveImportName(normalizedTargetAddr);
|
||||
|
||||
if (!importName.empty()) {
|
||||
// Named import: emit a proper LLVM function declaration.
|
||||
@@ -242,7 +245,7 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::lift_call() {
|
||||
fx.target = CallTargetClass::UnknownDirect;
|
||||
|
||||
auto idltvm = builder->CreateIntToPtr(
|
||||
registerValue, PointerType::get(context, 0));
|
||||
normalizedTargetValue, PointerType::get(context, 0));
|
||||
auto callResult = builder->CreateCall(
|
||||
parseArgsType(nullptr, context), idltvm, parseArgs(nullptr));
|
||||
|
||||
@@ -255,7 +258,7 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(void)::lift_call() {
|
||||
emittedExternalCall = true;
|
||||
break;
|
||||
}
|
||||
jump_address = registerCValue->getZExtValue();
|
||||
jump_address = normalizedTargetAddr;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
||||
@@ -1070,6 +1070,24 @@ private:
|
||||
}
|
||||
|
||||
|
||||
bool runNormalizeRuntimeTargetWidensMappedRvaTarget(std::string& details) {
|
||||
LifterUnderTest lifter;
|
||||
lifter.file.imageBase = 0x140000000ULL;
|
||||
lifter.markMemPaged(0x140052532ULL, 0x140052540ULL);
|
||||
const uint64_t normalized = lifter.normalizeRuntimeTargetAddress(0x52532ULL);
|
||||
if (normalized != 0x140052532ULL) {
|
||||
std::ostringstream os;
|
||||
os << " normalizeRuntimeTargetAddress widened to 0x" << std::hex
|
||||
<< normalized << " instead of mapped RVA target 0x140052532\n";
|
||||
details = os.str();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
bool runGeneralizedLoopRestoreMergesBackedgeRegisterState(
|
||||
std::string& details) {
|
||||
LifterUnderTest lifter;
|
||||
@@ -1244,6 +1262,8 @@ private:
|
||||
&InstructionTester::runSolveLoadInfersConcreteBaseFromTrackedLoad);
|
||||
runCustom("solve_path_widens_mapped_rva_target",
|
||||
&InstructionTester::runSolvePathWidensMappedRvaTarget);
|
||||
runCustom("normalize_runtime_target_widens_mapped_rva_target",
|
||||
&InstructionTester::runNormalizeRuntimeTargetWidensMappedRvaTarget);
|
||||
|
||||
return failures;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user