diff --git a/.github/workflows/rewrite-strict-gate.yml b/.github/workflows/rewrite-strict-gate.yml index 2d868ea..120c084 100644 --- a/.github/workflows/rewrite-strict-gate.yml +++ b/.github/workflows/rewrite-strict-gate.yml @@ -14,7 +14,7 @@ jobs: gate: - job_name: rewrite-strict-gate timeout_minutes: 120 - test_command: python test.py all --check-flags + test_command: python test.py all install_python_deps: true - job_name: rewrite-quick-gate timeout_minutes: 90 diff --git a/docs/SCOPE.md b/docs/SCOPE.md new file mode 100644 index 0000000..6c7a134 --- /dev/null +++ b/docs/SCOPE.md @@ -0,0 +1,40 @@ +# Scope + +## Purpose + +Mergen is a function-level LLVM IR lifting engine for deobfuscation and devirtualization of x64 protected functions. It translates obfuscated native code into LLVM IR, enabling standard compiler optimizations to recover readable control flow and semantics from virtualized or mutated instruction streams. + +## Supported + +| Area | Details | +|------|---------| +| Architecture | x86-64 (PE binaries) | +| Instruction set | 111 handlers covering general-purpose integer, BMI1/BMI2, bit manipulation, string ops, conditional moves, flag manipulation | +| Control flow | Linear, conditional branches (2-way), direct jumps, call/ret | +| Output | LLVM IR (text), optimizable via LLVM pass pipeline | +| Calling convention awareness | x64 Microsoft (manual signature fixup may be needed) | +| Optimization profiles | safe, aggressive, debug (planned — Phase 2) | + +## Unsupported / Known Limitations + +| Limitation | Status | +|------------|--------| +| Indirect jumps with >2 targets (jump tables) | Active work area | +| Floating-point / SSE / AVX instructions | Not lifted | +| Self-modifying code | Not supported | +| Multi-function / whole-binary lifting | Single function scope only | +| ELF / Mach-O / non-PE formats | Not supported | +| 32-bit x86 | Not supported | +| ARM / RISC-V / other architectures | Not supported | +| Automatic ABI/prototype normalization | Planned — Phase 2 | +| Full deterministic output | Planned — Phase 3 | + +## Tested Protectors + +- **VMProtect** — examples exist; reliability varies by protection level. +- **Themida** — examples exist; reliability varies by protection level. + +## Quality Contract + +- Handler test coverage: 97% (108/111 with oracle verification against Unicorn). +- CI gates enforce register and flag correctness. diff --git a/lifter/test_vectors/golden_ir_hashes.json b/lifter/test_vectors/golden_ir_hashes.json new file mode 100644 index 0000000..c268fd1 --- /dev/null +++ b/lifter/test_vectors/golden_ir_hashes.json @@ -0,0 +1,36 @@ +{ + "bitchain.ll": "20baed2e0586c87c622272ff4ee28fe3b5b0e30607c7b75478bab47b1de6a672", + "bitchain_no_opts.ll": "c3f175be6c64d5f74d4f34b0fabd58767de0a055951817dc8ce9d07259a62ddf", + "branch.ll": "74700aa9cddf9372d42ed60ef4dbcd58d5753aad9e5403502e17b5937df429a0", + "branch_no_opts.ll": "9a0d378aa7c02bcfaccd5488d0a2c6c3248d7b4acef1f2ac41a002c3c134ab2b", + "calc_fib.ll": "b7b00b6c8156045cfbac1b92054703dfe831ebfb7bcb9b770a81dacb2c023d16", + "calc_fib_no_opts.ll": "4d2283f7bdae81f69f8af7489c0d6d94e855345e7310a5e1c59ec4b8db7350ab", + "calc_grade.ll": "02381bc09f7c8ba1cd97c9bf96bc5df1b1b8661008ce265799b6942dd82b4bf8", + "calc_grade_no_opts.ll": "f7affa62e92c47e3131556f4522bc7020b8e22f3a958bf86638aafa88683a3fe", + "calc_mixed.ll": "d7955db2508662128ac57408dacda3ad1148a96e0e68c8d8eada9fcf64b117a8", + "calc_mixed_no_opts.ll": "c4dd8f527814da16a719e3b60503e1fa763c313cbe137ae8709b62f73eca279b", + "calc_sum_array.ll": "d93f19c3b0594a6a84a9cba2bc35ea6772eb9f5da29dfe1ee28a492b92cf8ab5", + "calc_sum_array_no_opts.ll": "8b2e552c2cad89bbcb381de515c23191eaab37194a7af42b556dc5f9cba4c2f7", + "cmov_chain.ll": "802ad2c3900fa4f14a189664e9f4e5decdf9a870a5c5e82e0738d5e858fe2d1e", + "cmov_chain_no_opts.ll": "68bb86dba77a80858742e2123bbe9b375bdf25840905be8ea9a8f1a45f3e2d3e", + "diamond.ll": "e8145999e8c9fb8606b8845b1101680f96ba0f7d4b6053deed2b14b1be1ee612", + "diamond_no_opts.ll": "1fb3eeb318cd1cbc8e45456ccd22b52da2cddf4373a22e20298f3e56c0817230", + "indirect.ll": "d1dee53ea2e21978de440e485d7020a8fb984ff0a9233edd8e0fc09fc9811928", + "indirect_no_opts.ll": "0b3ae1fc3cdf08e22c471f9b1f1fb121b9a02c5a98ce4306de5433bd000543a5", + "instr_add.ll": "1fa3a792492990336e36153187a70a76c33c0f9423c6e3d76c95df9db7782475", + "instr_add_no_opts.ll": "b26c5fe3eeedd2ff4554d5094e9970adb878a1de0c5ec9a824204d07680b2713", + "instr_rol.ll": "c9212f9f7662cdebbdc8c0c4ea043326272ee8dcccbbb7612caa4355aed4f556", + "instr_rol_no_opts.ll": "392ea9b3050a74abaa17bf7f8051848fc51977ba40ebfc5772f216e6cbb38542", + "instr_sub.ll": "92f30ad7ae798545b1b878a2f5ce5153e3b4e4a9a26542d1c3fefb7572a95928", + "instr_sub_no_opts.ll": "050519d5206bac0161d45c282bffca46184e1f38475d01517fe9cd5edc9ba0bf", + "instr_xor.ll": "09ca5c7b27fbddf3dd0ca6ce8cb37b0f57b3d04aa3fb13380fa78b607904d847", + "instr_xor_no_opts.ll": "7f7a6ddee2a2a1846ae56589b03cec8fc4c8ea0d0e578d5e506f7b02833adc03", + "loop_simple.ll": "43030e4d27cf896f158302f258cb71eff2f00782439407a425c8ba0be17fd5b0", + "loop_simple_no_opts.ll": "79dcc02da80afa3b661925bef7b7d83b75bab7fb89520fb3f0c94ab6a7072e18", + "multi_arg.ll": "048d1a5d6ce0f277322fce3f72837bc76b885a0db5b3a2c5268faf28c70b74b7", + "multi_arg_no_opts.ll": "c9eba3a199250b138a425d3dc5aae905b577e29e891c082dee8c87fe89d05495", + "nested_branch.ll": "56cb412346186716c2361db7edad986afbfe2465d216660bf8d2b59e29c34afb", + "nested_branch_no_opts.ll": "64189e90e9383596bd8089473fdda64a795ab755f07fec635da401d7c0b4685e", + "stack.ll": "41199a809916ab3045d1de076b3d4128fb40a45f950764b38b851f67b310c4fe", + "stack_no_opts.ll": "94059a01b8a78951c9448ba94b5dadf445610df76315b8cab8eecd153843472b" +} diff --git a/lifter/test_vectors/oracle_vectors_full_handlers.json b/lifter/test_vectors/oracle_vectors_full_handlers.json index 3c64b4c..eb23298 100644 --- a/lifter/test_vectors/oracle_vectors_full_handlers.json +++ b/lifter/test_vectors/oracle_vectors_full_handlers.json @@ -1,6 +1,6 @@ { "schema": "mergen-oracle-v1", - "generated_at_utc": "2026-03-04T10:12:02.045616+00:00", + "generated_at_utc": "2026-03-05T17:29:45.800918+00:00", "source_seed_schema": "mergen-oracle-seed-v1", "providers": [ "unicorn" diff --git a/scripts/rewrite/build_samples.cmd b/scripts/rewrite/build_samples.cmd index 9fa0ace..336d57f 100644 --- a/scripts/rewrite/build_samples.cmd +++ b/scripts/rewrite/build_samples.cmd @@ -53,5 +53,14 @@ for %%F in ("%~dp0..\..\testcases\rewrite_smoke\*.asm") do ( if errorlevel 1 exit /b 1 ) +rem --- Compile C test programs (real binaries with CRT) --- +for %%F in ("%~dp0..\..\testcases\rewrite_smoke\*.c") do ( + cl.exe /nologo /Od /GS- /c /Fo"%WORKDIR%\%%~nF.obj" "%%~fF" + if errorlevel 1 exit /b 1 + + link.exe /nologo /subsystem:console /out:"%WORKDIR%\%%~nF.exe" /map:"%WORKDIR%\%%~nF.map" "%WORKDIR%\%%~nF.obj" + if errorlevel 1 exit /b 1 +) + echo Built rewrite regression samples in "%WORKDIR%" -exit /b 0 +exit /b 0 \ No newline at end of file diff --git a/scripts/rewrite/instruction_microtests.json b/scripts/rewrite/instruction_microtests.json index 0745177..235528f 100644 --- a/scripts/rewrite/instruction_microtests.json +++ b/scripts/rewrite/instruction_microtests.json @@ -34,6 +34,56 @@ "name": "instr_rol", "symbol": "instr_rol_target", "patterns": ["ret i64 34"] + }, + { + "name": "nested_branch", + "symbol": "nested_branch_target", + "patterns": ["icmp slt i32 %0, 11", "icmp slt i32 %0, 21", "select i1", "i64 200, i64 300", "i64 100"] + }, + { + "name": "loop_simple", + "symbol": "loop_simple_target", + "patterns": ["ret i64 6"] + }, + { + "name": "bitchain", + "symbol": "bitchain_target", + "patterns": ["ret i64 4090"] + }, + { + "name": "multi_arg", + "symbol": "multi_arg_target", + "patterns": ["trunc i64 %RCX to i32", "trunc i64 %RDX to i32", "add i32", "mul i32", "zext i32"] + }, + { + "name": "diamond", + "symbol": "diamond_target", + "patterns": ["and i32 %0, 1", "icmp eq i32", "select i1", "mul i32 %0, 3"] + }, + { + "name": "cmov_chain", + "symbol": "cmov_chain_target", + "patterns": ["icmp sgt i32 %0, 10", "select i1", "i64 250, i64 150"] + }, + { + "name": "calc_grade", + "symbol": "calc_grade", + "patterns": ["icmp slt i32 %0, 90", "icmp slt i32 %0, 80", "icmp slt i32 %0, 70", "phi i64", "ret i64 %common.ret.op"] + }, + { + "name": "calc_mixed", + "symbol": "calc_mixed", + "patterns": ["icmp slt i32 %0, 101", "select i1", "mul i32", "ret i64"] + }, + { + "name": "calc_fib", + "symbol": "calc_fib", + "patterns": ["ret i64 13"] + }, + { + "name": "calc_sum_array", + "symbol": "calc_sum_array", + "patterns": ["ret i64 150"] } ] } diff --git a/scripts/rewrite/run.ps1 b/scripts/rewrite/run.ps1 index fc7aa60..5113733 100644 --- a/scripts/rewrite/run.ps1 +++ b/scripts/rewrite/run.ps1 @@ -29,16 +29,19 @@ if ($samples.Count -eq 0) { throw "No samples found in $ManifestPath" } -$asmDir = Join-Path $repoRoot 'testcases/rewrite_smoke' -$asmNames = @(Get-ChildItem -Path $asmDir -Filter '*.asm' | ForEach-Object { $_.BaseName }) +$srcDir = Join-Path $repoRoot 'testcases/rewrite_smoke' +$srcNames = @( + (Get-ChildItem -Path $srcDir -Filter '*.asm' | ForEach-Object { $_.BaseName }) + + (Get-ChildItem -Path $srcDir -Filter '*.c' | ForEach-Object { $_.BaseName }) +) $sampleNames = @($samples | ForEach-Object { $_.name }) -$missing = @($asmNames | Where-Object { $_ -notin $sampleNames }) +$missing = @($srcNames | Where-Object { $_ -notin $sampleNames }) if ($missing.Count -gt 0) { throw "Manifest is missing rewrite_smoke samples: $($missing -join ', ')" } -$extra = @($sampleNames | Where-Object { $_ -notin $asmNames }) +$extra = @($sampleNames | Where-Object { $_ -notin $srcNames }) if ($extra.Count -gt 0) { throw "Manifest contains non-existent rewrite_smoke samples: $($extra -join ', ')" } diff --git a/test.py b/test.py index cc587d9..4bccbe3 100644 --- a/test.py +++ b/test.py @@ -2,6 +2,8 @@ from __future__ import annotations import argparse +import hashlib +import json import os import subprocess import sys @@ -12,6 +14,8 @@ ROOT = Path(__file__).resolve().parent REWRITE_DIR = ROOT / "scripts" / "rewrite" FULL_VECTORS = ROOT / "lifter" / "test_vectors" / "oracle_vectors_full_handlers.json" DEFAULT_VECTORS = ROOT / "lifter" / "test_vectors" / "oracle_vectors.json" +IR_OUTPUT_DIR = ROOT.parent / "rewrite-regression-work" / "ir_outputs" +GOLDEN_HASHES_FILE = ROOT / "lifter" / "test_vectors" / "golden_ir_hashes.json" def _run(argv: List[str], extra_env: Dict[str, str] | None = None) -> None: @@ -29,8 +33,61 @@ def _run_cmd(script: Path, args: List[str] | None = None, extra_env: Dict[str, s _run(["cmd", "/c", str(script), *(args or [])], extra_env=extra_env) +def compute_ir_hashes(ir_dir: Path) -> Dict[str, str]: + hashes: Dict[str, str] = {} + if not ir_dir.is_dir(): + return hashes + for ll_file in sorted(ir_dir.rglob("*.ll")): + content = ll_file.read_text(encoding="utf-8", errors="replace") + normalized = "\n".join(line.rstrip() for line in content.splitlines()) + "\n" + digest = hashlib.sha256(normalized.encode("utf-8")).hexdigest() + hashes[ll_file.name] = digest + return dict(sorted(hashes.items())) + + +def check_determinism(ir_dir: Path, golden_file: Path) -> None: + hashes = compute_ir_hashes(ir_dir) + if not hashes: + print("WARNING: no .ll files found in", ir_dir, "— skipping determinism check") + return + + if not golden_file.exists(): + golden_file.parent.mkdir(parents=True, exist_ok=True) + golden_file.write_text(json.dumps(hashes, indent=2) + "\n", encoding="utf-8") + print(f"Golden hashes written to {golden_file} (first run)") + return + + golden = json.loads(golden_file.read_text(encoding="utf-8")) + mismatches: List[str] = [] + all_keys = sorted(set(golden) | set(hashes)) + for key in all_keys: + expected = golden.get(key) + actual = hashes.get(key) + if expected != actual: + mismatches.append( + f" {key}: expected={expected or '(missing)'} actual={actual or '(missing)'}" + ) + if mismatches: + print("Determinism check FAILED — mismatched files:") + for m in mismatches: + print(m) + raise SystemExit(1) + print(f"Determinism check passed: {len(hashes)} files match golden hashes") + + +def update_golden(ir_dir: Path, golden_file: Path) -> None: + hashes = compute_ir_hashes(ir_dir) + if not hashes: + print("WARNING: no .ll files found in", ir_dir, "— nothing to write") + return + golden_file.parent.mkdir(parents=True, exist_ok=True) + golden_file.write_text(json.dumps(hashes, indent=2) + "\n", encoding="utf-8") + print(f"Golden hashes updated: {golden_file} ({len(hashes)} files)") + + def run_baseline() -> None: _run_cmd(REWRITE_DIR / "run.cmd") + check_determinism(IR_OUTPUT_DIR, GOLDEN_HASHES_FILE) def run_micro(filter_tokens: List[str], check_flags: bool, regenerate_oracle: bool) -> None: @@ -75,6 +132,7 @@ def parse_args() -> argparse.Namespace: sub.add_parser("quick", help="baseline + microtests (skip oracle regen)") sub.add_parser("baseline", help="run scripts/rewrite/run.cmd") + sub.add_parser("update-golden", help="run baseline then regenerate golden IR hashes") full = sub.add_parser("full", help="run scripts/rewrite/run_all_handlers.cmd") full.add_argument( "--check-flags", @@ -110,11 +168,6 @@ def parse_args() -> argparse.Namespace: flags.add_argument("filter", nargs="*", help="optional test name filter tokens") all_cmd = sub.add_parser("all", help="baseline + full-handler + full coverage") all_cmd.add_argument("--no-coverage", action="store_true", help="skip final coverage report") - all_cmd.add_argument( - "--check-flags", - action="store_true", - help="enforce strict oracle flag comparisons during full-handler stage", - ) report_cmd = sub.add_parser("report", help="print handler test coverage report") report_cmd.add_argument("--json", action="store_true", help="output as JSON") report_cmd.add_argument("--vectors", type=Path, default=None, help="explicit vectors file") @@ -129,6 +182,11 @@ def main() -> None: run_baseline() return + if command == "update-golden": + run_baseline() + update_golden(IR_OUTPUT_DIR, GOLDEN_HASHES_FILE) + return + if command == "micro": run_micro(args.filter, args.check_flags, args.regen_oracle) return @@ -167,14 +225,14 @@ def main() -> None: if command == "all": run_baseline() - run_full(args.check_flags) + run_full(check_flags=True) if not args.no_coverage: run_coverage(FULL_VECTORS) return if command == "quick": run_baseline() - run_micro([], check_flags=False, regenerate_oracle=False) + run_micro([], check_flags=True, regenerate_oracle=False) return raise SystemExit(f"Unknown command: {command}") diff --git a/testcases/rewrite_smoke/bitchain.asm b/testcases/rewrite_smoke/bitchain.asm new file mode 100644 index 0000000..b2b90c2 --- /dev/null +++ b/testcases/rewrite_smoke/bitchain.asm @@ -0,0 +1,31 @@ +default rel +bits 64 + +global start +global bitchain_target +extern ExitProcess + +section .text +; Pure-constant bit manipulation chain. No symbolic inputs. +; eax = 0xFF +; shl eax, 8 → 0x0000FF00 +; xor eax, 0xAA → 0x0000FFAA +; ror eax, 4 → 0xA0000FFA +; and eax, 0xFFFF → 0x0FFA = 4090 +; LLVM must fold entire chain to ret i64 4090. +bitchain_target: + push rbp + mov rbp, rsp + mov eax, 0xFF + shl eax, 8 + xor eax, 0xAA + ror eax, 4 + and eax, 0xFFFF + pop rbp + ret + +start: + sub rsp, 40 + call bitchain_target + mov ecx, eax + call ExitProcess diff --git a/testcases/rewrite_smoke/calc_fib.c b/testcases/rewrite_smoke/calc_fib.c new file mode 100644 index 0000000..ee73784 --- /dev/null +++ b/testcases/rewrite_smoke/calc_fib.c @@ -0,0 +1,21 @@ +/* Iterative Fibonacci with constant bound. + * Lift target: calc_fib — concrete loop (7 iterations), stack variables. + * fib(7) = 13. Concolic engine should unroll; LLVM folds to constant. + * This is the first test of real compiler-generated /Od loop code. */ +#include + +__declspec(noinline) +int calc_fib(void) { + int a = 0, b = 1; + for (int i = 0; i < 7; i++) { + int t = a + b; + a = b; + b = t; + } + return a; +} + +int main(void) { + printf("fib(7)=%d\n", calc_fib()); + return 0; +} diff --git a/testcases/rewrite_smoke/calc_grade.c b/testcases/rewrite_smoke/calc_grade.c new file mode 100644 index 0000000..b7be86f --- /dev/null +++ b/testcases/rewrite_smoke/calc_grade.c @@ -0,0 +1,19 @@ +/* Grade calculator: cascading if/else on symbolic input (ECX). + * Lift target: calc_grade — no loops, pure branching. + * Expected IR: chain of icmp + select on the symbolic argument. */ +#include + +__declspec(noinline) +int calc_grade(int score) { + if (score >= 90) return 4; /* A */ + if (score >= 80) return 3; /* B */ + if (score >= 70) return 2; /* C */ + if (score >= 60) return 1; /* D */ + return 0; /* F */ +} + +int main(void) { + printf("grade(95)=%d grade(82)=%d grade(55)=%d\n", + calc_grade(95), calc_grade(82), calc_grade(55)); + return 0; +} diff --git a/testcases/rewrite_smoke/calc_mixed.c b/testcases/rewrite_smoke/calc_mixed.c new file mode 100644 index 0000000..7062454 --- /dev/null +++ b/testcases/rewrite_smoke/calc_mixed.c @@ -0,0 +1,20 @@ +/* Mixed symbolic + concrete: branch on input then multiply. + * Lift target: calc_mixed — symbolic arg, one branch, post-merge math. + * Expected IR: select on (x > 100), then mul by 3. */ +#include + +__declspec(noinline) +int calc_mixed(int x) { + int base = 42; + if (x > 100) + base += x; + else + base -= x; + return base * 3; +} + +int main(void) { + printf("mixed(150)=%d mixed(50)=%d\n", + calc_mixed(150), calc_mixed(50)); + return 0; +} diff --git a/testcases/rewrite_smoke/calc_sum_array.c b/testcases/rewrite_smoke/calc_sum_array.c new file mode 100644 index 0000000..2c63d59 --- /dev/null +++ b/testcases/rewrite_smoke/calc_sum_array.c @@ -0,0 +1,19 @@ +/* Sum a small constant stack-allocated array. + * Lift target: calc_sum_array — concrete loop + stack array access. + * 10 + 20 + 30 + 40 + 50 = 150. + * Tests compiler-generated array init + indexed load in a loop. */ +#include + +__declspec(noinline) +int calc_sum_array(void) { + int arr[] = {10, 20, 30, 40, 50}; + int sum = 0; + for (int i = 0; i < 5; i++) + sum += arr[i]; + return sum; +} + +int main(void) { + printf("sum([10,20,30,40,50])=%d\n", calc_sum_array()); + return 0; +} diff --git a/testcases/rewrite_smoke/cmov_chain.asm b/testcases/rewrite_smoke/cmov_chain.asm new file mode 100644 index 0000000..a6d8d50 --- /dev/null +++ b/testcases/rewrite_smoke/cmov_chain.asm @@ -0,0 +1,32 @@ +default rel +bits 64 + +global start +global cmov_chain_target +extern ExitProcess + +section .text +; Conditional moves (branchless select) on symbolic RCX: +; eax = 100, edx = 200 +; if ecx > 10: eax = edx (200) +; eax += 50 +; Result is 150 or 250 depending on input. +; No branches in the CFG — cmov emits a select directly. +; Expect: select i1, add. +cmov_chain_target: + push rbp + mov rbp, rsp + mov eax, 100 + mov edx, 200 + cmp ecx, 10 + cmovg eax, edx + add eax, 50 + pop rbp + ret + +start: + sub rsp, 40 + mov ecx, 15 + call cmov_chain_target + mov ecx, eax + call ExitProcess diff --git a/testcases/rewrite_smoke/diamond.asm b/testcases/rewrite_smoke/diamond.asm new file mode 100644 index 0000000..239e1c0 --- /dev/null +++ b/testcases/rewrite_smoke/diamond.asm @@ -0,0 +1,34 @@ +default rel +bits 64 + +global start +global diamond_target +extern ExitProcess + +section .text +; Diamond-shaped CFG: two paths merge then continue. +; if ecx is odd: eax = ecx + 10 +; else: eax = ecx - 5 +; then: eax *= 3 +; Symbolic input → expect select/phi at merge, then mul by 3. +diamond_target: + push rbp + mov rbp, rsp + mov eax, ecx + test eax, 1 + jz .even + add eax, 10 + jmp .merge +.even: + sub eax, 5 +.merge: + imul eax, eax, 3 + pop rbp + ret + +start: + sub rsp, 40 + mov ecx, 7 + call diamond_target + mov ecx, eax + call ExitProcess diff --git a/testcases/rewrite_smoke/loop_simple.asm b/testcases/rewrite_smoke/loop_simple.asm new file mode 100644 index 0000000..645c89d --- /dev/null +++ b/testcases/rewrite_smoke/loop_simple.asm @@ -0,0 +1,29 @@ +default rel +bits 64 + +global start +global loop_simple_target +extern ExitProcess + +section .text +; Tiny constant-bound countdown loop: sum = 3 + 2 + 1 = 6. +; ecx is overwritten with constant 3 immediately, so the +; concolic engine should unroll all 3 iterations and LLVM +; should constant-fold the result to 6. +loop_simple_target: + push rbp + mov rbp, rsp + xor eax, eax + mov ecx, 3 +.loop: + add eax, ecx + dec ecx + jnz .loop + pop rbp + ret + +start: + sub rsp, 40 + call loop_simple_target + mov ecx, eax + call ExitProcess diff --git a/testcases/rewrite_smoke/multi_arg.asm b/testcases/rewrite_smoke/multi_arg.asm new file mode 100644 index 0000000..4257d40 --- /dev/null +++ b/testcases/rewrite_smoke/multi_arg.asm @@ -0,0 +1,28 @@ +default rel +bits 64 + +global start +global multi_arg_target +extern ExitProcess + +section .text +; Two symbolic arguments (RCX, RDX) combined: +; result = (ecx + edx) * 7 +; Since both inputs are symbolic, the IR cannot constant-fold. +; Expect to see add and mul operations in lifted IR. +multi_arg_target: + push rbp + mov rbp, rsp + mov eax, ecx + add eax, edx + imul eax, eax, 7 + pop rbp + ret + +start: + sub rsp, 40 + mov ecx, 5 + mov edx, 3 + call multi_arg_target + mov ecx, eax + call ExitProcess diff --git a/testcases/rewrite_smoke/nested_branch.asm b/testcases/rewrite_smoke/nested_branch.asm new file mode 100644 index 0000000..d9754d0 --- /dev/null +++ b/testcases/rewrite_smoke/nested_branch.asm @@ -0,0 +1,38 @@ +default rel +bits 64 + +global start +global nested_branch_target +extern ExitProcess + +section .text +; 3-way nested if/else on symbolic RCX input. +; if ecx <= 10 → 100 +; else if ecx <= 20 → 200 +; else → 300 +; All comparisons survive as symbolic selects/phis in IR. +nested_branch_target: + push rbp + mov rbp, rsp + mov eax, ecx + cmp eax, 10 + jg .above10 + mov eax, 100 + jmp .done +.above10: + cmp eax, 20 + jg .above20 + mov eax, 200 + jmp .done +.above20: + mov eax, 300 +.done: + pop rbp + ret + +start: + sub rsp, 40 + mov ecx, 15 + call nested_branch_target + mov ecx, eax + call ExitProcess