Fix InstructionCache DenseMap corruption: empty/tombstone keys were identical

The InstructionKey::InstructionKeyInfo had getEmptyKey() and getTombstoneKey()
both returning InstructionKey(nullptr, nullptr). LLVM DenseMap requires these
to be distinct sentinel values. This violated the DenseMap contract, causing
bucket corruption during copy/iteration (the old FIXME about 'last item
corrupted').

Fix: use reinterpret_cast sentinel pointers -1 and -2, matching LLVM convention.
Also cleaned up the non-const copy constructor (removed dead local copy and
stale FIXME comment).

Also adds:
- switch_sparse.asm test (non-consecutive case values: 10, 50, 200, 1000)
- calc_cout.cpp test (skipped - documents inline policy limitation with STL)
- C++ compilation support in build_samples.cmd
- Skip mechanism for manifest entries (skip: true + skip_reason)
- Fix test.py update-golden to not run determinism check before updating

68 pattern checks, 40 golden hashes, 108 handler microtests — all green.
This commit is contained in:
yusufcanislek
2026-03-06 00:47:45 +03:00
parent 1b1cb573d7
commit 33f24ed0fc
10 changed files with 109 additions and 17 deletions
+8 -12
View File
@@ -79,13 +79,17 @@ struct InstructionKey {
return lhs == rhs;
}
// Define empty and tombstone keys
// Define empty and tombstone keys — MUST be distinct for DenseMap.
static inline InstructionKey getEmptyKey() {
return InstructionKey(nullptr, static_cast<Value*>(nullptr));
return InstructionKey(
reinterpret_cast<Value*>(static_cast<uintptr_t>(-1)),
reinterpret_cast<Value*>(static_cast<uintptr_t>(-1)));
}
static inline InstructionKey getTombstoneKey() {
return InstructionKey(nullptr, static_cast<Value*>(nullptr));
return InstructionKey(
reinterpret_cast<Value*>(static_cast<uintptr_t>(-2)),
reinterpret_cast<Value*>(static_cast<uintptr_t>(-2)));
}
};
};
@@ -110,16 +114,8 @@ public:
}
InstructionCache() = default;
InstructionCache(InstructionCache& other) {
// we want to copy each SmallDenseMap individually
// crash on last item, why?
// FIXME: last item on array is corrupted.
for (size_t i = 0; i < opcodeCaches.size(); ++i) {
// reserve because its faster
auto src = other.opcodeCaches[i];
opcodeCaches[i].reserve(src.size());
opcodeCaches[i].reserve(other.opcodeCaches[i].size());
for (auto& kv : other.opcodeCaches[i]) {
opcodeCaches[i].try_emplace(kv.first, kv.second);
}
+3 -1
View File
@@ -36,5 +36,7 @@
"stack.ll": "41199a809916ab3045d1de076b3d4128fb40a45f950764b38b851f67b310c4fe",
"stack_no_opts.ll": "94059a01b8a78951c9448ba94b5dadf445610df76315b8cab8eecd153843472b",
"switch_3way.ll": "e706ce0da37dbe02fd52fae223c39f74a8b84c4946b971d8425fe868a4e73256",
"switch_3way_no_opts.ll": "5527b1a564babe40dd9ea1ff7d1ea3796e814c22823be7841ee1be0dbd1c7524"
"switch_3way_no_opts.ll": "5527b1a564babe40dd9ea1ff7d1ea3796e814c22823be7841ee1be0dbd1c7524",
"switch_sparse.ll": "06b9ec694dcf18ffb7041a437fa3e4f2e50c061569cc98bcb239b5f77c3a15f4",
"switch_sparse_no_opts.ll": "b5fea064fea49272e541476f1e087d6f34f10e9f2d90d0eb0b6ebdda9ca7ea6c"
}
@@ -1,6 +1,6 @@
{
"schema": "mergen-oracle-v1",
"generated_at_utc": "2026-03-05T17:43:33.532361+00:00",
"generated_at_utc": "2026-03-05T19:49:19.307536+00:00",
"source_seed_schema": "mergen-oracle-seed-v1",
"providers": [
"unicorn"
+9
View File
@@ -62,5 +62,14 @@ for %%F in ("%~dp0..\..\testcases\rewrite_smoke\*.c") do (
if errorlevel 1 exit /b 1
)
rem --- Compile C++ test programs (real binaries with CRT + STL) ---
for %%F in ("%~dp0..\..\testcases\rewrite_smoke\*.cpp") do (
cl.exe /nologo /Od /GS- /EHsc /c /Fo"%WORKDIR%\%%~nF.obj" "%%~fF"
if errorlevel 1 exit /b 1
link.exe /nologo /subsystem:console /out:"%WORKDIR%\%%~nF.exe" /map:"%WORKDIR%\%%~nF.map" "%WORKDIR%\%%~nF.obj"
if errorlevel 1 exit /b 1
)
echo Built rewrite regression samples in "%WORKDIR%"
exit /b 0
+13 -1
View File
@@ -94,6 +94,18 @@
"name": "calc_switch",
"symbol": "calc_switch",
"patterns": ["switch i32 %0", "i32 1, label", "i32 2, label", "i32 3, label", "i32 4, label", "i32 5, label", "phi i64"]
},
{
"name": "switch_sparse",
"symbol": "switch_sparse_target",
"patterns": ["switch i32 %0", "i32 10, label", "i32 50, label", "i32 200, label", "i32 1000, label", "phi i64", "[ 11,", "[ 55,", "[ 222,", "[ 1337,", "[ 4294967295,"]
},
{
"name": "calc_cout",
"symbol": "calc_cout",
"skip": true,
"skip_reason": "Statically-linked STL (cout) inlined by lifter; GEPTracker UNREACHABLE on complex library code. Blocked on inline policy improvements (Phase 2).",
"patterns": []
}
]
}
}
+7 -1
View File
@@ -32,7 +32,8 @@ if ($samples.Count -eq 0) {
$srcDir = Join-Path $repoRoot 'testcases/rewrite_smoke'
$srcNames = @(
(Get-ChildItem -Path $srcDir -Filter '*.asm' | ForEach-Object { $_.BaseName }) +
(Get-ChildItem -Path $srcDir -Filter '*.c' | ForEach-Object { $_.BaseName })
(Get-ChildItem -Path $srcDir -Filter '*.c' | ForEach-Object { $_.BaseName }) +
(Get-ChildItem -Path $srcDir -Filter '*.cpp' | ForEach-Object { $_.BaseName })
)
$sampleNames = @($samples | ForEach-Object { $_.name })
@@ -52,6 +53,11 @@ New-Item -ItemType Directory -Path $irDir -Force | Out-Null
Push-Location $repoRoot
try {
foreach ($sample in $samples) {
if ($sample.PSObject.Properties['skip'] -and $sample.skip) {
Write-Host "SKIP: $($sample.name) (known limitation)"
continue
}
$mapPath = Join-Path $WorkDir "$($sample.name).map"
if (-not (Test-Path $mapPath)) {
throw "Map file not found: $mapPath"
+4
View File
@@ -19,6 +19,10 @@ if ($checks.Count -eq 0) {
$failed = $false
foreach ($check in $checks) {
if ($check.PSObject.Properties['skip'] -and $check.skip) {
Write-Host "SKIP: $($check.name) (known limitation)"
continue
}
$file = Join-Path $irDir "$($check.name).ll"
if (-not (Test-Path $file)) {
Write-Host "FAIL: missing $file"
+1 -1
View File
@@ -183,7 +183,7 @@ def main() -> None:
return
if command == "update-golden":
run_baseline()
_run_cmd(REWRITE_DIR / "run.cmd")
update_golden(IR_OUTPUT_DIR, GOLDEN_HASHES_FILE)
return
+16
View File
@@ -0,0 +1,16 @@
/* Test: function with cout call.
* Lift target: calc_cout — external call handling.
* The computation is pure, but it calls cout before returning. */
#include <iostream>
__declspec(noinline)
int calc_cout(int x) {
int result = x * 3 + 7;
std::cout << result;
return result;
}
int main() {
int r = calc_cout(10);
return r;
}
+47
View File
@@ -0,0 +1,47 @@
default rel
bits 64
global start
global switch_sparse_target
extern ExitProcess
section .text
; Sparse switch on symbolic ECX input.
; Case values are NOT consecutive: 10, 50, 200, 1000.
; Tests multi-target branch resolution with large gaps between cases.
switch_sparse_target:
push rbp
mov rbp, rsp
mov eax, ecx
cmp eax, 10
je .case10
cmp eax, 50
je .case50
cmp eax, 200
je .case200
cmp eax, 1000
je .case1000
; default
mov eax, -1
jmp .done
.case10:
mov eax, 11
jmp .done
.case50:
mov eax, 55
jmp .done
.case200:
mov eax, 222
jmp .done
.case1000:
mov eax, 1337
.done:
pop rbp
ret
start:
sub rsp, 40
mov ecx, 200
call switch_sparse_target
mov ecx, eax
call ExitProcess