Fix structured loop recovery regressions

2026-05-12 09:40:34 +00:00 · 2026-04-05 23:33:30 +03:00
parent 825b29946d
commit 2eaa22ee63
10 changed files with 503 additions and 10 deletions
@@ -0,0 +1,25 @@
+## Summary
+<!-- What changed and why? Keep this to the user-visible or system-visible effect. -->
+
+## Changes
+| File | Description |
+|---|---|
+| | |
+
+## Correctness Notes
+<!-- Fill in only the sections relevant to this PR. -->
+- Custom passes / IR mutation:
+- Runtime image / memory mapping:
+- Call ABI / outlining behavior:
+- Rewrite manifest / oracle / semantic coverage:
+
+## Verification
+- [ ] Build (if applicable):
+- [ ] Targeted tests:
+- [ ] Full gate (if applicable):
+- [ ] Manual validation (if applicable):
+
+## Reviewer Pointers
+- Reviewer workflow and severity rubric: `docs/REVIEWER_RULES.md`
+- Quick subsystem checklist: `REVIEW_CHECKLIST.md`
+- Rewrite gate expectations: `docs/REWRITE_BASELINE.md`
@@ -78,6 +78,28 @@ jobs:
        run: cmd /c scripts\dev\build_iced.cmd

      - name: Run rewrite gate
-        env:
-          CLANG_CL_EXE: C:\Program Files\LLVM\bin\clang-cl.exe
        run: ${{ matrix.gate.test_command }}
+
+      - name: Collect full-handler coverage summary
+        if: ${{ matrix.gate.job_name == 'rewrite-strict-gate' }}
+        shell: pwsh
+        run: |
+          $coverageJson = "rewrite-full-handler-coverage.json"
+          $json = python test.py report --full --json
+          $json | Out-File -FilePath $coverageJson -Encoding utf8
+          $coverage = $json | ConvertFrom-Json
+          Add-Content -Path $env:GITHUB_STEP_SUMMARY -Value "## Full-handler coverage"
+          Add-Content -Path $env:GITHUB_STEP_SUMMARY -Value ""
+          Add-Content -Path $env:GITHUB_STEP_SUMMARY -Value "- Vector kind: $($coverage.summary.vector_kind)"
+          Add-Content -Path $env:GITHUB_STEP_SUMMARY -Value "- Vectors file: $($coverage.summary.vectors_file)"
+          Add-Content -Path $env:GITHUB_STEP_SUMMARY -Value "- Total handlers: $($coverage.summary.total_handlers)"
+          Add-Content -Path $env:GITHUB_STEP_SUMMARY -Value "- Covered handlers: $($coverage.summary.covered_handlers)"
+          Add-Content -Path $env:GITHUB_STEP_SUMMARY -Value "- Skipped handlers: $($coverage.summary.skipped_handlers)"
+          Add-Content -Path $env:GITHUB_STEP_SUMMARY -Value "- Missing handlers: $($coverage.summary.missing_handlers)"
+
+      - name: Upload full-handler coverage artifact
+        if: ${{ matrix.gate.job_name == 'rewrite-strict-gate' }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: rewrite-full-handler-coverage
+          path: rewrite-full-handler-coverage.json
@@ -27,6 +27,7 @@ CMakeLists.txt.user
 output.ll
 output_finalnoopt.ll
 output_no_opts.ll
+/output_diagnostics.json
 /rewrite-regression-work/

 # generated test vectors (regenerated by scripts)
@@ -0,0 +1,117 @@
+# Repository Guidelines
+
+## Project Overview
+Mergen is a function-level x64 PE to LLVM IR lifter for deobfuscation and devirtualization. The active workflow is rewrite/regression driven: changes are expected to preserve lifted IR shape, runtime semantics, and deterministic outputs.
+
+Primary repo entry points:
+- `README.md` — project purpose and high-level entry links
+- `ARCHITECTURE.md` — current pipeline order and invariants
+- `docs/SCOPE.md` — support matrix and quality contract
+- `docs/REWRITE_BASELINE.md` — operational regression workflow
+
+## Architecture & Data Flow
+The core pipeline is:
+1. CLI entry in `lifter/core/Lifter.cpp`
+2. Runtime image validation in `lifter/core/RuntimeImageContext.hpp`
+3. Lifter setup / auto-outline in `lifter/core/LifterStages.hpp`
+4. Memory policy + paged memory setup in `lifter/memory/MemoryPolicySetup.hpp` and `lifter/core/LifterPipelineStages.hpp`
+5. Signature stage in `lifter/core/LifterPipelineStages.hpp`
+6. Lift loop in `lifter/core/LiftDriver.hpp`
+7. Fixpoint optimization in `lifter/core/MergenPB.hpp`
+8. Final post-passes and IR emission
+
+Important invariants:
+- `STACKP_VALUE` is fixed at `0x14FEA0` (`lifter/core/Includes.h`).
+- Stack reserve is clamped to `[0x1000, 0x100000]` (`lifter/memory/MemoryPolicySetup.hpp`).
+- Pass order is intentional: `GEPLoadPass -> ReplaceTruncWithLoadPass -> PromotePseudoStackPass -> PromotePseudoMemory`, then O2, then post-passes such as switch normalization and canonical naming (`ARCHITECTURE.md`, `lifter/core/MergenPB.hpp`). Do not reorder casually.
+- The disassembler boundary is normalized through `lifter/disasm/CommonDisassembler.hpp`; semantics should consume normalized operands, not backend-specific details.
+
+## Key Directories
+- `lifter/core/` — CLI, runtime image setup, pipeline orchestration, ABI/signature handling
+- `lifter/semantics/` — opcode dispatch and instruction semantics (`Semantics.ipp`, `Semantics_*.ipp`, `x86_64_opcodes.x`)
+- `lifter/disasm/` — Iced/Zydis abstraction layer
+- `lifter/memory/` — file-backed memory, page map, pseudo-memory/stack promotion
+- `lifter/analysis/` — custom LLVM passes and path solving
+- `lifter/test/` — in-process instruction/oracle test harness and golden metadata
+- `testcases/rewrite_smoke/` — rewrite smoke corpus sources
+- `scripts/rewrite/` — baseline gate, sample build, manifest validation, oracle generation, semantic checks
+- `scripts/dev/` — preferred configure/build entrypoints
+- `docs/` — current workflow, scope, and reviewer policy docs
+
+## Important Files
+- `cmake.toml` — source of truth for build configuration; `CMakeLists.txt` is generated, do not edit it directly.
+- `test.py` — primary QA entrypoint.
+- `scripts/rewrite/instruction_microtests.json` — source of truth for rewrite smoke samples, expected IR patterns, semantic cases, and CI skips.
+- `lifter/test/test_vectors/oracle_vectors.json` — default instruction oracle vectors.
+- `lifter/test/test_vectors/golden_ir_hashes.json` — determinism gate for tracked IR outputs.
+- `.editorconfig` and `.clang-format` — formatting contract (2 spaces, LF, UTF-8, 100-column LLVM-based style).
+
+## Development Commands
+Preferred Windows build flow:
+```bat
+cmd /c scripts\dev\configure_iced.cmd
+cmd /c scripts\dev\build_iced.cmd
+```
+
+Alternate Zydis-only lane:
+```bat
+cmd /c scripts\dev\configure_zydis.cmd
+cmd /c scripts\dev\build_zydis.cmd
+```
+
+Primary test commands:
+```bat
+python test.py quick
+python test.py all
+python test.py baseline
+python test.py micro --check-flags
+python test.py negative
+python test.py coverage --full
+python test.py report --json
+```
+
+Useful targeted flows:
+```bat
+python test.py micro add
+python test.py semantic branch
+scripts\rewrite\run.cmd
+scripts\rewrite\run_microtests.cmd --check-flags xor
+```
+
+## Runtime / Tooling Preferences
+- Platform focus is Windows. CI uses `scripts/dev/*.cmd` and `windows-latest`.
+- Prefer the iced lane by default; use Zydis only when you need the fallback/backend-specific lane.
+- Configure/build scripts assume Ninja + `clang-cl`; they do not invoke `VsDevCmd.bat`.
+- `LLVM_DIR` must resolve to LLVM 18; CI currently downloads LLVM 18.1.8.
+- Cargo is expected on PATH for the iced lane.
+- Build outputs live in `build_iced/`, `build_zydis/`, or other `build*/` directories; treat them as generated artifacts.
+- Regression artifacts are written outside the repo by default to `../rewrite-regression-work/`.
+
+## Code Conventions & Common Patterns
+- Extend instruction support through the existing opcode table and semantics files; do not add parallel dispatch paths.
+  - Wire new entries in `lifter/semantics/x86_64_opcodes.x`.
+  - Implement behavior in the appropriate `Semantics_*.ipp` file.
+- Preserve the normalized operand model across disassembly and semantics. Cross-check `lifter/disasm/CommonDisassembler.hpp`, backend adapters, and downstream helpers before changing operand enums or widths.
+- Memory accesses should go through the existing operand/memory helpers (`lifter/semantics/OperandUtils.ipp`); bypassing them usually breaks constant folding, page-map behavior, or pseudo-stack promotion.
+- Call handling is ABI-aware. Check `lifter/core/AbiCallContract.hpp` and existing control-flow helpers before changing call lowering.
+- Prefer explicit failures and diagnostics over silent fallbacks. The repo already has structured lift diagnostics (`lifter/core/LiftDiagnostics.hpp`) and strict negative tests in `test.py`.
+- When touching build definitions, update `cmake.toml`; regenerate behavior flows through cmkr into `CMakeLists.txt`.
+- Keep docs and test manifests in the same change when behavior changes. This repo relies on docs/tests as active contracts, not afterthoughts.
+
+## Testing & QA Expectations
+- `python test.py` is the canonical entrypoint. `quick` and `all` are the main gates used in CI.
+- The rewrite baseline is manifest-backed: every source in `testcases/rewrite_smoke/` must have exactly one manifest entry in `scripts/rewrite/instruction_microtests.json`.
+- Golden IR hashing is part of the contract. C/C++-compiled smoke samples are excluded from golden hashes because their IR addresses are toolchain-dependent; they are checked via semantic tests instead.
+- `python test.py negative` matters: it guards explicit failure behavior for malformed manifests, unsafe paths, and bad vector schemas.
+- Use focused verification that matches your change:
+  - Core/semantics/disasm/test harness changes: `python test.py micro --check-flags`
+  - Rewrite script/manifest changes: `python test.py baseline` and `python test.py negative`
+  - Coverage/vector plumbing: `python test.py coverage --full` and `python test.py report --json`
+  - Build script/CMake changes: rerun the affected `scripts\dev\configure_*.cmd` + `build_*.cmd` lane
+
+## Process Notes For AI Assistants
+- Prefer `docs/REWRITE_BASELINE.md` and CI workflows over older generic build docs when commands disagree.
+- Do not edit generated files or artifact outputs unless the task is explicitly about generation.
+- Before changing exported behavior, inspect direct consumers and the matching rewrite/test manifests.
+- If you add a new sample, update both `testcases/rewrite_smoke/` and `scripts/rewrite/instruction_microtests.json` in the same change.
+- If you change semantics or ABI behavior, expect to update oracle vectors, microtests, semantic expectations, and possibly golden hashes.
@@ -0,0 +1,98 @@
+# Architecture Reference
+
+## Key Constants
+
+| Constant | Value | Defined in |
+|---|---|---|
+| `STACKP_VALUE` | `0x14FEA0` | `lifter/core/Includes.h` |
+| Stack reserve min | `0x1000` | `lifter/memory/MemoryPolicySetup.hpp` (`configureDefaultMemoryPolicy`) |
+| Stack reserve max | `0x100000` | `lifter/memory/MemoryPolicySetup.hpp` (`configureDefaultMemoryPolicy`) |
+
+## Pipeline Order
+
+| # | Function | File | Purpose |
+|---|---|---|---|
+| 1 | `createRuntimeImageContext` | `lifter/core/RuntimeImageContext.hpp` | Validates PE headers, builds `RuntimeImageContext` |
+| 2 | `createConfiguredLifterForRuntime` | `lifter/core/LifterStages.hpp` | Calls `loadFile`, parses PE exports, auto-outlines export addresses |
+| 3 | `configureDefaultMemoryPolicy` | `lifter/memory/MemoryPolicySetup.hpp` | Sets `memoryPolicy` (SYMBOLIC default, CONCRETE for PE sections + stack), clamps `stackReserve` to `[0x1000, 0x100000]` |
+| 4 | `prepareRuntimePagedMemory` | `lifter/core/LifterPipelineStages.hpp` | Marks `pageMap` intervals for mapped memory regions |
+| 5 | `runSignatureStage` | `lifter/core/LifterPipelineStages.hpp` | Signature-based analysis |
+| 6 | Lift loop | `lifter/core/LiftDriver.hpp` | Main lifting of instructions to LLVM IR |
+| 7 | `run_opts` | `lifter/core/MergenPB.hpp` | Fixpoint optimization (see below) |
+
+## run_opts Fixpoint Loop
+
+```
+loop {
+    O1 pipeline
+    GEPLoadPass
+    ReplaceTruncWithLoadPass
+    PromotePseudoStackPass
+    PromotePseudoMemory
+} until instruction count stabilizes (delta == 0)
+
+Final O2 pipeline (runs once after fixpoint)
+```
+
+Termination: the loop compares instruction count before and after each iteration. When the count stops changing, the fixpoint is reached.
+
+## Custom Pass Summary
+
+| Pass | Filters on | Produces | Description |
+|---|---|---|---|
+| `GEPLoadPass` | `memory`-base GEPs where: `getPointerOperand() == mem`, `!isSymbolic`, `address_to_mapped_address != 0`, `isIntegerTy()`, `readMemory` succeeds | Constant integer values folded from PE image | Folds constant loads from the PE image through `memory`-base GEPs |
+| `ReplaceTruncWithLoadPass` | `trunc(load wide, ptr)` patterns | `load narrow, ptr` | Rewrites wide-load-then-truncate into narrow load; valid on little-endian |
+| `PromotePseudoStackPass` | `memory`-base GEPs in `[STACKP_VALUE - reserve, STACKP_VALUE + reserve]` via `isStackAddress()` | `stackmemory` alloca GEPs | Replaces pseudo-stack memory accesses with real stack alloca operations |
+| `PromotePseudoMemory` | Remaining `memory`-base GEPs (not handled by above passes) | `inttoptr` | Converts leftover pseudo-memory GEPs to raw pointer operations |
+
+Pass order matters: GEPLoadPass must run before PromotePseudoMemory, otherwise concrete PE loads get converted to `inttoptr` and are lost.
+
+## InlinePolicy
+
+- **CRTP framework**: default policy inlines everything.
+- **Outline set**: `addAddress(va)` registers a VA for outlining (i.e., not inlined).
+- **Check site**: `Semantics_ControlFlow.ipp:202` — when a call target is constant-resolved, the inline policy is consulted.
+- **CLI**: `--outline <addrs>` accepts comma-separated hex addresses, parsed in `Utils.cpp`, stored in `ParseResult::outlineAddresses`.
+- **PE export auto-outline**: export addresses are automatically added to the outline set in `createConfiguredLifterForRuntime`. Forwarded exports are filtered out by checking if the RVA falls within the export directory range.
+
+## Memory Subsystem
+
+### FileReader
+
+CRTP base with concrete implementations `x86FileReader` and `x86_64FileReader`.
+
+| Method | Returns | Failure value |
+|---|---|---|
+| `RvaToFileOffset` | file offset | `0` |
+| `readMemory` | `bool` | `false` |
+| `address_to_mapped_address` | mapped address | `0` |
+
+### MemoryPolicy
+
+| Region | Policy |
+|---|---|
+| Default | SYMBOLIC |
+| PE sections | CONCRETE |
+| Stack | CONCRETE |
+
+Set by `configureDefaultMemoryPolicy` in `lifter/memory/MemoryPolicySetup.hpp`.
+
+### pageMap
+
+`std::map<uint64_t, uint64_t>` — interval map of paged memory regions.
+
+- `markMemPaged(start, end)` — inserts an interval.
+- `isMemPaged` — uses `upper_bound` then decrements iterator to check containment.
+
+### stackReserve
+
+Set by `configureDefaultMemoryPolicy`, clamped to `[0x1000, 0x100000]`. Consumed by:
+- `PromotePseudoStackPass` — defines the stack address window around `STACKP_VALUE`
+- `prepareRuntimePagedMemory` — marks the stack region in `pageMap`
+
+## PE Parsing
+
+- Header types come from the **linuxpe** library.
+- `RvaToFileOffset` returns `0` on failure — callers must check before using the offset.
+- Export directory is parsed in `createConfiguredLifterForRuntime`; forwarded exports are detected by checking if the export RVA falls within the export directory VA range.
+- `RuntimeImageContext` is validated in `createRuntimeImageContext` (step 1 of the pipeline).
@@ -0,0 +1,44 @@
+# LLVM API Cheat Sheet for Mergen Passes
+
+Quick reference for LLVM APIs used in GEPLoadPass, PromotePseudoStackPass,
+ReplaceTruncWithLoadPass, and PromotePseudoMemory.
+
+1. **`Value::replaceAllUsesWith(Value* newVal)`**
+   Replaces every use of this value with `newVal`. Does NOT touch this instruction's own operands.
+   *Gotcha*: After RAUW the instruction is dead but still holds operand references — erase it or it leaks.
+
+2. **`Instruction::eraseFromParent()`**
+   Removes from the parent BasicBlock ilist and deallocates. Pointer is invalid after the call.
+   *Gotcha*: Asserts `use_empty()`. RAUW first, then erase — never the reverse.
+
+3. **`Value::use_empty()`**
+   Returns true when nothing references this value.
+   *Gotcha*: Check AFTER erasing/replacing all users, not before. A stale user makes this return false.
+
+4. **`GetElementPtrInst::getPointerOperand()`**
+   Returns operand 0 — the base pointer. For `gep i8, ptr %memory, i64 <off>` this is `%memory`.
+   *Gotcha*: Prefer over `getOperand(getNumOperands()-2)` which breaks on multi-index GEPs.
+
+5. **`Type::getIntegerBitWidth()`**
+   Returns the bit width of an `iN` type.
+   *Gotcha*: Asserts if `!isIntegerTy()`. Always guard: `if (Ty->isIntegerTy()) { ... getBitWidth ... }`.
+
+6. **`Type::isIntegerTy()`**
+   Returns true only for `iN` types. False for float, vector, pointer, void, struct, array.
+   *Gotcha*: Pointer types are NOT integer types — test before calling `getIntegerBitWidth()` or `computeKnownBits`.
+
+7. **BasicBlock iteration with erasure**
+   Pattern: `for (auto it = BB.begin(); it != BB.end();) { auto* I = &*it++; /* may erase I */ }`
+   *Gotcha*: `it++` must advance BEFORE any erase. A range-for (`for (auto &I : BB)`) crashes on erasure.
+
+8. **`SmallPtrSet<T*, N>`**
+   Inline-storage set for pointer deduplication. Use when collecting Instructions for deferred erasure.
+   *Gotcha*: Inserting the same pointer twice is safe (returns false), but erasing a pointer twice is use-after-free. The set prevents that.
+
+9. **`computeKnownBits(Value*, DataLayout&)`**
+   Computes known-zero/known-one bit masks via ValueTracking.
+   *Gotcha*: Asserts on non-integer, non-pointer types. Guard callers with `isIntegerTy() || isPointerTy()`.
+
+10. **`PreservedAnalyses::none()` vs `::all()`**
+    Return `none()` if ANY IR was modified; `all()` if the pass was a no-op.
+    *Gotcha*: Returning `all()` after mutating IR silently poisons cached analyses for downstream passes.
@@ -0,0 +1,35 @@
+# Mergen Review Checklist
+
+Use this as the quick reviewer companion.
+- Workflow, severity, and verification matrix live in `docs/REVIEWER_RULES.md`.
+- LLVM API gotchas and erase/RAUW reminders live in `LLVM_API_NOTES.md`.
+
+## Custom Pass / IR Safety
+- [ ] Iterator invalidation is avoided during IR erasure (`it++` before erase or collect-then-erase)
+- [ ] `replaceAllUsesWith()` is followed by correct dead-instruction cleanup
+- [ ] Erase collections deduplicate `Instruction*` values before destruction
+- [ ] `getIntegerBitWidth()` / `computeKnownBits()` are guarded by type checks
+- [ ] GEP-based passes filter on the correct base pointer (`memory`, `stackmemory`, etc.)
+- [ ] Passes return the correct preserved-analysis state after mutation
+
+## Runtime Image / Memory Invariants
+- [ ] `RvaToFileOffset`, `readMemory`, and `address_to_mapped_address` failure values are checked before use
+- [ ] Stack arithmetic cannot underflow or diverge from the clamped reserve window
+- [ ] `pageMap`, `memoryPolicy`, and stack-promotion logic derive bounds from the same source of truth
+- [ ] PE export parsing filters forwarded exports before adding outline targets
+
+## Call / ABI Behavior
+- [ ] Unknown or outlined calls preserve the intended ABI contract for the chosen mode
+- [ ] Strict-mode clobbers and memory effects remain consistent with `AbiCallContract.hpp`
+- [ ] Compat-mode behavior remains opt-in and diagnostic-only
+
+## Rewrite Manifests / Vectors / Tests
+- [ ] `scripts/rewrite/instruction_microtests.json` stays in one-to-one sync with `testcases/rewrite_smoke/`
+- [ ] Manifest sample names reject traversal/path separators
+- [ ] Oracle/vector schema fields keep their expected types (`skip` stays boolean, XMM values stay fixed-width)
+- [ ] Golden-hash churn is explained; C/C++-compiled samples are not treated like deterministic asm outputs
+
+## Documentation / Process
+- [ ] `cmake.toml` is updated instead of hand-editing generated `CMakeLists.txt`
+- [ ] Docs changed with behavior when defaults, commands, or invariants moved
+- [ ] Verification commands in the PR match the changed subsystem
@@ -59,22 +59,27 @@ MERGEN_LIFTER_DEFINITION_TEMPLATES(PATH_info)::solvePath(
        visitedAddresses.contains(target) &&
        target <= blockInfo.block_address;
    auto it = addrToBB.find(target);
-    const bool pendingGeneralization =
+    const bool hasPendingGeneralization =
        pendingLoopGeneralizationAddresses.contains(target);
+    const bool canUseStructuredLoopGeneralization =
+        currentPathSolveAllowsStructuredLoopGeneralization();
+    const bool canReusePendingGeneralization =
+        hasPendingGeneralization && canUseStructuredLoopGeneralization;
    const bool wantsGeneralization =
-        pendingGeneralization ||
+        canReusePendingGeneralization ||
        (backwardVisitedTarget && canGeneralizeStructuredLoopHeader(target));
    if (wantsGeneralization) {
      if (currentPathSolveContext == PathSolveContext::DirectJump) {
        stackBypassGeneralizedLoopAddresses.insert(target);
      }
      const bool generalizedBackup =
+          canUseStructuredLoopGeneralization &&
          stackBypassGeneralizedLoopAddresses.contains(target);
-      if (pendingGeneralization && it != addrToBB.end() && it->second &&
+      if (canReusePendingGeneralization && it != addrToBB.end() && it->second &&
          it->second->empty()) {
        return {it->second, false, generalizedBackup};
      }
-      if (!pendingGeneralization) {
+      if (!hasPendingGeneralization) {
        pendingLoopGeneralizationAddresses.insert(target);
      }
      if (it != addrToBB.end() && it->second && !it->second->empty()) {
@@ -217,15 +217,14 @@ public:
    this->counter = snapshot.ct;
  }

-  void branch_backup_impl(BasicBlock* bb, bool generalized) {
+  void branch_backup_impl(BasicBlock* bb, bool /*generalized*/) {
    printvalue2("backing up");
    printvalue2(this->counter);

    auto snapshot = backup_point(vec, vecflag, this->buffer, this->cache,
                                 this->assumptions, this->counter);
-    if (generalized) {
-      snapshot = make_generalized_loop_backup(snapshot);
-    }
+    // Persist the canonical state. Generalized loop restore filters stack-local
+    // state only at load time while the temporary bypass mode is active.
    BBbackup[bb] = std::move(snapshot);
  }

@@ -457,6 +457,65 @@ private:
    return true;
  }

+  bool runPendingGeneralizedLoopBlockedByContext(
+      LifterUnderTest::PathSolveContext context, const char* contextName,
+      std::string& details) {
+    LifterUnderTest lifter;
+    lifter.currentPathSolveContext = context;
+
+    auto* current = llvm::BasicBlock::Create(lifter.context, "current", lifter.fnc);
+    auto* pending =
+        llvm::BasicBlock::Create(lifter.context, "pending_loop_header", lifter.fnc);
+    lifter.builder->SetInsertPoint(current);
+    lifter.blockInfo = BBInfo(0x2000, current);
+    lifter.addrToBB[0x1000] = pending;
+    lifter.pendingLoopGeneralizationAddresses.insert(0x1000);
+
+    uint64_t destination = 0;
+    auto pathResult =
+        lifter.solvePath(lifter.fnc, destination, makeI64(lifter.context, 0x1000));
+    if (pathResult != PATH_solved || destination != 0x1000) {
+      details = std::string("  ") + contextName +
+                " context failed to solve the pending loop-header target\n";
+      return false;
+    }
+
+    auto* branch = llvm::dyn_cast<llvm::BranchInst>(current->getTerminator());
+    if (!branch || branch->getNumSuccessors() != 1) {
+      details = std::string("  ") + contextName +
+                " context did not emit the expected direct branch\n";
+      return false;
+    }
+    if (branch->getSuccessor(0) == pending) {
+      details = std::string("  ") + contextName +
+                " context must not reuse a pending generalized loop header\n";
+      return false;
+    }
+    if (lifter.unvisitedBlocks.empty() ||
+        lifter.unvisitedBlocks.back().block == pending) {
+      details = std::string("  ") + contextName +
+                " context queued the pending generalized loop header instead of a fresh block\n";
+      return false;
+    }
+    if (!lifter.pendingLoopGeneralizationAddresses.contains(0x1000)) {
+      details = std::string("  ") + contextName +
+                " context unexpectedly consumed the pending generalization state\n";
+      return false;
+    }
+    return true;
+  }
+
+  bool runPendingGeneralizedLoopIndirectJumpBlocked(std::string& details) {
+    return runPendingGeneralizedLoopBlockedByContext(
+        LifterUnderTest::PathSolveContext::IndirectJump, "indirect-jump", details);
+  }
+
+  bool runPendingGeneralizedLoopRetBlocked(std::string& details) {
+    return runPendingGeneralizedLoopBlockedByContext(
+        LifterUnderTest::PathSolveContext::Ret, "return-path", details);
+  }
+
+
  bool runStructuredLoopHeaderAllowsConditionalBackedge(std::string& details) {
    LifterUnderTest lifter;
    lifter.currentPathSolveContext =
@@ -736,6 +795,88 @@ private:
    return true;
  }

+  bool runPromotedGeneralizedLoopRestoresCanonicalBackup(
+      std::string& details) {
+    LifterUnderTest lifter;
+    lifter.currentPathSolveContext = LifterUnderTest::PathSolveContext::DirectJump;
+
+    auto* current = llvm::BasicBlock::Create(lifter.context, "current", lifter.fnc);
+    auto* bb = llvm::BasicBlock::Create(lifter.context, "loop_header", lifter.fnc);
+    lifter.builder->SetInsertPoint(current);
+    lifter.blockInfo = BBInfo(0x2000, current);
+    lifter.addrToBB[0x1000] = bb;
+    lifter.pendingLoopGeneralizationAddresses.insert(0x1000);
+    lifter.stackBypassGeneralizedLoopAddresses.insert(0x1000);
+
+    const uint64_t localStackAddr = STACKP_VALUE - 0x20;
+    const uint64_t nonLocalAddr = 0x500000;
+    auto* localValue = llvm::ConstantInt::get(llvm::Type::getInt8Ty(lifter.context), 0xAA);
+    auto* nonLocalValue =
+        llvm::ConstantInt::get(llvm::Type::getInt8Ty(lifter.context), 0x55);
+    lifter.buffer[localStackAddr] = ValueByteReference(localValue, 0);
+    lifter.buffer[nonLocalAddr] = ValueByteReference(nonLocalValue, 0);
+
+    uint64_t destination = 0;
+    auto pathResult =
+        lifter.solvePath(lifter.fnc, destination, makeI64(lifter.context, 0x1000));
+    if (pathResult != PATH_solved || destination != 0x1000) {
+      details =
+          "  failed to queue the pending generalized loop header for backup-restore testing\n";
+      return false;
+    }
+
+    BBInfo out;
+    if (!lifter.getUnvisitedAddr(out) || out.block != bb) {
+      details = "  failed to dequeue the pending generalized loop header\n";
+      return false;
+    }
+    if (lifter.currentBlockRestoreMode !=
+        LifterUnderTest::BlockRestoreMode::GeneralizedLoop) {
+      details =
+          "  pending direct-jump generalized loop should restore through generalized mode first\n";
+      return false;
+    }
+
+    lifter.buffer.clear();
+    lifter.load_generalized_backup(bb);
+    if (lifter.buffer.contains(localStackAddr)) {
+      details =
+          "  generalized restore should drop stack-local backup entries while the pending bypass is active\n";
+      return false;
+    }
+    if (!lifter.buffer.contains(nonLocalAddr)) {
+      details =
+          "  generalized restore should keep non-local backup entries while the pending bypass is active\n";
+      return false;
+    }
+
+    lifter.addUnvisitedAddr(BBInfo(0x1000, bb));
+    if (!lifter.getUnvisitedAddr(out) || out.block != bb) {
+      details = "  failed to dequeue the promoted generalized loop header\n";
+      return false;
+    }
+    if (lifter.currentBlockRestoreMode != LifterUnderTest::BlockRestoreMode::Normal) {
+      details =
+          "  promoted generalized loop should revert to normal restore mode before reloading the backup\n";
+      return false;
+    }
+
+    lifter.buffer.clear();
+    lifter.load_backup(bb);
+    if (!lifter.buffer.contains(localStackAddr)) {
+      details =
+          "  promoted generalized loop did not restore the canonical stack-local backup contents\n";
+      return false;
+    }
+    if (!lifter.buffer.contains(nonLocalAddr)) {
+      details =
+          "  promoted generalized loop lost non-local backup contents after restoring the canonical snapshot\n";
+      return false;
+    }
+    return true;
+  }
+
+

  int runCustomKnownBitsTests(const std::string& suiteFilter) {
    int failures = 0;
@@ -799,6 +940,10 @@ private:
             &InstructionTester::runLoopGeneralizationIndirectJumpBlocked);
    runCustom("loop_generalization_ret_blocked",
             &InstructionTester::runLoopGeneralizationRetBlocked);
+    runCustom("pending_generalized_loop_indirect_jump_blocked",
+             &InstructionTester::runPendingGeneralizedLoopIndirectJumpBlocked);
+    runCustom("pending_generalized_loop_ret_blocked",
+             &InstructionTester::runPendingGeneralizedLoopRetBlocked);
    runCustom("structured_loop_header_allows_conditional_backedge",
             &InstructionTester::runStructuredLoopHeaderAllowsConditionalBackedge);
    runCustom("structured_loop_header_allows_jump_chain",
@@ -816,6 +961,8 @@ private:
             &InstructionTester::runGeneralizedLoopWithBypassTagUsesGeneralizedRestore);
    runCustom("generalized_loop_bypass_tag_clears_after_promotion",
             &InstructionTester::runGeneralizedLoopBypassTagClearsAfterPromotion);
+    runCustom("promoted_generalized_loop_restores_canonical_backup",
+             &InstructionTester::runPromotedGeneralizedLoopRestoresCanonicalBackup);

    return failures;
  }