From 41b6e8e8d19a46865c91da1b0fbd8ccf4f9b9bae Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 24 Apr 2026 07:23:45 +0300 Subject: [PATCH] tests: add Themida devirtualization import-equivalence check Adds python test.py themida that lifts every sample in scripts/rewrite/themida_samples.json and asserts the resulting IR calls every import declared in required_imports. Names are pinned against a lift of the non-virtualized reference binary via --update. This is a correctness gate that complements the existing coverage gate ('2544 instructions, 0 errors'). Currently red on example2-virt.bin: the lifter unrolls the VM without surfacing GetStdHandle / WriteConsoleA / ReadConsoleA / CharUpperA from the guest program. That gap is the active devirtualization frontier; this test makes it visible instead of silently green. Samples whose binaries are absent (`../testthemida/*.bin` lives outside the repo) are skipped rather than failed, so the check runs cleanly in CI without the binaries present. --- .gitignore | 1 + docs/LOOP_HANDLING.md | 16 +- scripts/rewrite/check_themida_equivalence.py | 245 +++++++++++++++++++ scripts/rewrite/themida_samples.json | 19 ++ test.py | 24 ++ 5 files changed, 303 insertions(+), 2 deletions(-) create mode 100644 scripts/rewrite/check_themida_equivalence.py create mode 100644 scripts/rewrite/themida_samples.json diff --git a/.gitignore b/.gitignore index 961abfb..2d38955 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,7 @@ debug*.txt emu_*.py check_*.py !scripts/rewrite/check_semantic.py +!scripts/rewrite/check_themida_equivalence.py .tmp* =* scriptsdev/ diff --git a/docs/LOOP_HANDLING.md b/docs/LOOP_HANDLING.md index 1fb3128..11b123e 100644 --- a/docs/LOOP_HANDLING.md +++ b/docs/LOOP_HANDLING.md @@ -174,13 +174,25 @@ python test.py micro python test.py baseline ``` -For changes that touch register/flag phi shape, also re-run the Themida sample to confirm the 2544-instruction benchmark holds: +For changes that touch register/flag phi shape, also run the two Themida gates — a coverage gate and a correctness gate. Both must stay green (or at least not regress). + +**Coverage gate** — confirms the VM unrolls without crashing: ``` build_iced\lifter.exe ..\testthemida\example2-virt.bin 0x140001000 ``` -and inspect `output_diagnostics.json` for `lift_stats.instructions_lifted == 2544` and `summary.warning == 0`, `summary.error == 0`. +Inspect `output_diagnostics.json` for `lift_stats.instructions_lifted == 2544` and `summary.warning == 0`, `summary.error == 0`. This only certifies that the lifter walked the VM's 2544 handler instructions without reporting errors — it does **not** certify that the recovered IR is semantically equivalent to the original function. + +**Correctness gate** — confirms the devirtualized IR calls the same external imports as the non-virtualized reference: + +``` +python test.py themida +``` + +Fails hard if any required import from `scripts/rewrite/themida_samples.json` is missing from the lifted IR. Required imports are pinned against a lift of the non-virt reference binary; regenerate with `python test.py themida --update` when the reference changes (not when the virt output changes). Passing this gate means the VM devirtualization recovered the guest program's external-call semantics, not just the VM's own state-machine activity. + +This gate is currently **red** on `example2-virt.bin`: the lifter unrolls the VM but does not surface the guest's `GetStdHandle` / `WriteConsoleA` / `ReadConsoleA` / `CharUpperA` calls. That gap is the active Themida-frontier work item — the coverage gate passing while the correctness gate fails is exactly the failure mode the two-gate split is designed to make visible. ## Known limitations diff --git a/scripts/rewrite/check_themida_equivalence.py b/scripts/rewrite/check_themida_equivalence.py new file mode 100644 index 0000000..e135917 --- /dev/null +++ b/scripts/rewrite/check_themida_equivalence.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python3 +"""Themida devirtualization semantic-equivalence check. + +Premise: a virtualized function, after devirtualization, must call the same +external imports as its non-virtualized counterpart. Structural divergence in +the lifted IR (block count, SSA names, pass-ordering artifacts) is allowed; +semantic divergence at the import boundary is not. + +For each entry in ``themida_samples.json``: +- lift the virtualized binary +- extract external call names from the resulting IR +- compare against the manifest's ``required_imports`` list +- fail hard on any required import that is absent + +Use ``--update`` to regenerate ``required_imports`` from the reference binary. +Samples whose binaries are not present on disk are skipped, not failed, because +the binaries live outside the repository (``../testthemida/``) and are not +available in CI. +""" +from __future__ import annotations + +import argparse +import json +import os +import re +import subprocess +import sys +from pathlib import Path +from typing import Dict, List, Set, Tuple + +ROOT = Path(__file__).resolve().parents[2] +LIFTER = ROOT / "build_iced" / "lifter.exe" +DEFAULT_MANIFEST = Path(__file__).resolve().parent / "themida_samples.json" +WORKROOT = ROOT.parent / "rewrite-regression-work" / "themida_ir" + +# Matches a `call` that targets a named function: +# %N = call i64 @GetStdHandle(...) +# call void @"VirtualizerSDK64.dll#103"(...) +# tail call ptr @foo(...) +# Captures the callee identifier (unquoted or the contents of the quotes). +_CALL_RE = re.compile( + r'''^\s*(?:%\S+\s*=\s*)?(?:tail\s+|musttail\s+|notail\s+)?''' + r'''call\s+\S+\s+@(?:"([^"]+)"|([A-Za-z_][\w.]*))\s*\(''', + re.MULTILINE, +) + + +def _lift(binary: Path, entry: str, workdir: Path) -> Path: + if not LIFTER.exists(): + raise SystemExit( + f"Lifter not found: {LIFTER}. Build it with " + "'cmd /c scripts\\dev\\build_iced.cmd' first." + ) + if not binary.exists(): + raise SystemExit(f"Binary not found: {binary}") + + workdir.mkdir(parents=True, exist_ok=True) + + # Clear previous artifacts so a stale file can't mask a new failure. + for stale in ("output.ll", "output_no_opts.ll", "output_diagnostics.json"): + (workdir / stale).unlink(missing_ok=True) + + result = subprocess.run( + [str(LIFTER), str(binary), entry], + cwd=workdir, + env=os.environ.copy(), + capture_output=True, + text=True, + timeout=300, + ) + ir_path = workdir / "output_no_opts.ll" + if not ir_path.exists(): + tail_stdout = (result.stdout or "")[-600:] + tail_stderr = (result.stderr or "")[-600:] + raise SystemExit( + f"Lifter did not emit output_no_opts.ll for {binary} @ {entry}\n" + f"exit_code={result.returncode}\n" + f"stdout-tail:\n{tail_stdout}\n" + f"stderr-tail:\n{tail_stderr}" + ) + return ir_path + + +def _extract_call_names(ir_text: str) -> Dict[str, int]: + """Return a multiset of call-target identifiers found in IR text. + + Intramodule calls to ``@main`` and outlined ``@sub_*`` thunks are excluded + — we only care about named external imports that the lifter resolved. + """ + counts: Dict[str, int] = {} + for match in _CALL_RE.finditer(ir_text): + name = match.group(1) or match.group(2) + if not name: + continue + if name == "main" or name.startswith("sub_") or name.startswith("llvm."): + continue + counts[name] = counts.get(name, 0) + 1 + return counts + + +def _filter_imports(imports: Dict[str, int], ignore_patterns: List[str]) -> Dict[str, int]: + if not ignore_patterns: + return dict(imports) + compiled = [re.compile(p) for p in ignore_patterns] + return {k: v for k, v in imports.items() if not any(p.search(k) for p in compiled)} + + +def _diff( + required: Set[str], + actual: Dict[str, int], +) -> Tuple[List[str], bool]: + lines: List[str] = [] + ok = True + + missing = sorted(required - set(actual)) + if missing: + ok = False + for name in missing: + lines.append(f" MISSING required import: @{name}") + + extra = sorted(set(actual) - required) + # Extras are informational — a smarter devirtualizer may legitimately + # surface additional imports. Never fatal. + for name in extra: + lines.append(f" extra import (not required): @{name} x{actual[name]}") + + return lines, ok + + +def _check_sample(sample: dict) -> bool: + name = sample["name"] + virt_rel = sample["virt_binary"] + virt = (ROOT / virt_rel).resolve() + entry = sample["entry"] + ignore = sample.get("ignore_imports", []) + required = set(sample.get("required_imports", [])) + + if not virt.exists(): + print(f"SKIP: {name} — virt binary not present at {virt}") + return True + + if not required: + print( + f"FAIL: {name} — manifest has no required_imports; " + f"run `python test.py themida --update` first." + ) + return False + + ir_path = _lift(virt, entry, WORKROOT / name) + ir_text = ir_path.read_text(encoding="utf-8", errors="replace") + actual = _filter_imports(_extract_call_names(ir_text), ignore) + + lines, ok = _diff(required, actual) + summary = ( + f"{len(actual)} distinct imports, {sum(actual.values())} calls " + f"(required {len(required)})" + ) + if ok: + print(f"PASS: {name} — {summary}") + for line in lines: + print(line) + else: + print(f"FAIL: {name} — {summary}") + for line in lines: + print(line) + req_str = ", ".join(sorted(required)) or "(none)" + act_str = ", ".join(f"{k}x{v}" for k, v in sorted(actual.items())) or "(none)" + print(f" required: {req_str}") + print(f" actual: {act_str}") + return ok + + +def _update_sample(sample: dict) -> dict: + name = sample["name"] + ref_rel = sample.get("reference_binary") + entry = sample["entry"] + ignore = sample.get("ignore_imports", []) + + if not ref_rel: + print(f"SKIP update: {name} — no reference_binary in manifest") + return sample + + ref = (ROOT / ref_rel).resolve() + if not ref.exists(): + print(f"SKIP update: {name} — reference binary not present at {ref}") + return sample + + ir_path = _lift(ref, entry, WORKROOT / f"{name}_reference") + ir_text = ir_path.read_text(encoding="utf-8", errors="replace") + imports = _filter_imports(_extract_call_names(ir_text), ignore) + + updated = dict(sample) + updated["required_imports"] = sorted(imports.keys()) + print( + f"UPDATED {name}: {len(imports)} required imports from {ref.name} " + f"({sum(imports.values())} total calls in reference)" + ) + return updated + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--manifest", type=Path, default=DEFAULT_MANIFEST) + parser.add_argument( + "--update", + action="store_true", + help="regenerate required_imports from reference binaries", + ) + parser.add_argument( + "filter", + nargs="*", + help="only process samples whose name contains any of these tokens", + ) + args = parser.parse_args() + + manifest = json.loads(args.manifest.read_text(encoding="utf-8")) + all_samples = manifest["samples"] + + def matches(sample: dict) -> bool: + return not args.filter or any(tok in sample["name"] for tok in args.filter) + + if args.update: + manifest["samples"] = [ + _update_sample(s) if matches(s) else s for s in all_samples + ] + args.manifest.write_text( + json.dumps(manifest, indent=2) + "\n", encoding="utf-8" + ) + print(f"Wrote {args.manifest}") + return + + selected = [s for s in all_samples if matches(s)] + if not selected: + raise SystemExit("No samples matched filter") + + all_ok = True + for s in selected: + all_ok &= _check_sample(s) + if not all_ok: + raise SystemExit("Themida equivalence check FAILED") + print("All Themida equivalence checks passed.") + + +if __name__ == "__main__": + main() diff --git a/scripts/rewrite/themida_samples.json b/scripts/rewrite/themida_samples.json new file mode 100644 index 0000000..0e1afff --- /dev/null +++ b/scripts/rewrite/themida_samples.json @@ -0,0 +1,19 @@ +{ + "samples": [ + { + "name": "example2", + "reference_binary": "../testthemida/example2.bin", + "virt_binary": "../testthemida/example2-virt.bin", + "entry": "0x140001000", + "ignore_imports": [ + "^VirtualizerSDK64\\.dll#" + ], + "required_imports": [ + "CharUpperA", + "GetStdHandle", + "ReadConsoleA", + "WriteConsoleA" + ] + } + ] +} diff --git a/test.py b/test.py index 497c0cb..8f7160e 100644 --- a/test.py +++ b/test.py @@ -18,6 +18,7 @@ DEFAULT_VECTORS = ROOT / "lifter" / "test" / "test_vectors" / "oracle_vectors.js IR_OUTPUT_DIR = ROOT.parent / "rewrite-regression-work" / "ir_outputs" GOLDEN_HASHES_FILE = ROOT / "lifter" / "test" / "test_vectors" / "golden_ir_hashes.json" SEMANTIC_SCRIPT = REWRITE_DIR / "check_semantic.py" +THEMIDA_SCRIPT = REWRITE_DIR / "check_themida_equivalence.py" # C-compiled samples produce toolchain-dependent IR (different addresses across # compiler versions/machines). Exclude them from golden hash determinism checks; @@ -254,6 +255,15 @@ def run_vmp(filter_tokens: List[str]) -> None: _run(args) +def run_themida(filter_tokens: List[str], update: bool) -> None: + args = [sys.executable, str(THEMIDA_SCRIPT)] + if update: + args.append("--update") + if filter_tokens: + args.extend(filter_tokens) + _run(args) + + def run_negative_checks() -> None: lifter_path = ROOT / "build_iced" / "lifter.exe" if not lifter_path.exists(): @@ -423,6 +433,16 @@ def parse_args() -> argparse.Namespace: help="attempt local VMP target lifts (recommended for big control-flow/semantics changes)", ) vmp.add_argument("filter", nargs="*", help="optional VMP target name filter tokens") + themida = sub.add_parser( + "themida", + help="run Themida devirtualization import-equivalence checks", + ) + themida.add_argument( + "--update", + action="store_true", + help="regenerate required_imports from reference binaries", + ) + themida.add_argument("filter", nargs="*", help="optional sample name filter tokens") return parser.parse_args() @@ -483,6 +503,10 @@ def main() -> None: run_vmp(args.filter) return + if command == "themida": + run_themida(args.filter, args.update) + return + if command == "flags": run_flagstress(args.filter) return