mirror of
https://github.com/NaC-L/Mergen.git
synced 2026-05-12 09:40:34 +00:00
Fix InstructionCache DenseMap corruption: empty/tombstone keys were identical
The InstructionKey::InstructionKeyInfo had getEmptyKey() and getTombstoneKey() both returning InstructionKey(nullptr, nullptr). LLVM DenseMap requires these to be distinct sentinel values. This violated the DenseMap contract, causing bucket corruption during copy/iteration (the old FIXME about 'last item corrupted'). Fix: use reinterpret_cast sentinel pointers -1 and -2, matching LLVM convention. Also cleaned up the non-const copy constructor (removed dead local copy and stale FIXME comment). Also adds: - switch_sparse.asm test (non-consecutive case values: 10, 50, 200, 1000) - calc_cout.cpp test (skipped - documents inline policy limitation with STL) - C++ compilation support in build_samples.cmd - Skip mechanism for manifest entries (skip: true + skip_reason) - Fix test.py update-golden to not run determinism check before updating 68 pattern checks, 40 golden hashes, 108 handler microtests — all green.
This commit is contained in:
+8
-12
@@ -79,13 +79,17 @@ struct InstructionKey {
|
||||
return lhs == rhs;
|
||||
}
|
||||
|
||||
// Define empty and tombstone keys
|
||||
// Define empty and tombstone keys — MUST be distinct for DenseMap.
|
||||
static inline InstructionKey getEmptyKey() {
|
||||
return InstructionKey(nullptr, static_cast<Value*>(nullptr));
|
||||
return InstructionKey(
|
||||
reinterpret_cast<Value*>(static_cast<uintptr_t>(-1)),
|
||||
reinterpret_cast<Value*>(static_cast<uintptr_t>(-1)));
|
||||
}
|
||||
|
||||
static inline InstructionKey getTombstoneKey() {
|
||||
return InstructionKey(nullptr, static_cast<Value*>(nullptr));
|
||||
return InstructionKey(
|
||||
reinterpret_cast<Value*>(static_cast<uintptr_t>(-2)),
|
||||
reinterpret_cast<Value*>(static_cast<uintptr_t>(-2)));
|
||||
}
|
||||
};
|
||||
};
|
||||
@@ -110,16 +114,8 @@ public:
|
||||
}
|
||||
InstructionCache() = default;
|
||||
InstructionCache(InstructionCache& other) {
|
||||
// we want to copy each SmallDenseMap individually
|
||||
// crash on last item, why?
|
||||
// FIXME: last item on array is corrupted.
|
||||
for (size_t i = 0; i < opcodeCaches.size(); ++i) {
|
||||
|
||||
// reserve because its faster
|
||||
|
||||
auto src = other.opcodeCaches[i];
|
||||
opcodeCaches[i].reserve(src.size());
|
||||
|
||||
opcodeCaches[i].reserve(other.opcodeCaches[i].size());
|
||||
for (auto& kv : other.opcodeCaches[i]) {
|
||||
opcodeCaches[i].try_emplace(kv.first, kv.second);
|
||||
}
|
||||
|
||||
@@ -36,5 +36,7 @@
|
||||
"stack.ll": "41199a809916ab3045d1de076b3d4128fb40a45f950764b38b851f67b310c4fe",
|
||||
"stack_no_opts.ll": "94059a01b8a78951c9448ba94b5dadf445610df76315b8cab8eecd153843472b",
|
||||
"switch_3way.ll": "e706ce0da37dbe02fd52fae223c39f74a8b84c4946b971d8425fe868a4e73256",
|
||||
"switch_3way_no_opts.ll": "5527b1a564babe40dd9ea1ff7d1ea3796e814c22823be7841ee1be0dbd1c7524"
|
||||
"switch_3way_no_opts.ll": "5527b1a564babe40dd9ea1ff7d1ea3796e814c22823be7841ee1be0dbd1c7524",
|
||||
"switch_sparse.ll": "06b9ec694dcf18ffb7041a437fa3e4f2e50c061569cc98bcb239b5f77c3a15f4",
|
||||
"switch_sparse_no_opts.ll": "b5fea064fea49272e541476f1e087d6f34f10e9f2d90d0eb0b6ebdda9ca7ea6c"
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"schema": "mergen-oracle-v1",
|
||||
"generated_at_utc": "2026-03-05T17:43:33.532361+00:00",
|
||||
"generated_at_utc": "2026-03-05T19:49:19.307536+00:00",
|
||||
"source_seed_schema": "mergen-oracle-seed-v1",
|
||||
"providers": [
|
||||
"unicorn"
|
||||
|
||||
@@ -62,5 +62,14 @@ for %%F in ("%~dp0..\..\testcases\rewrite_smoke\*.c") do (
|
||||
if errorlevel 1 exit /b 1
|
||||
)
|
||||
|
||||
rem --- Compile C++ test programs (real binaries with CRT + STL) ---
|
||||
for %%F in ("%~dp0..\..\testcases\rewrite_smoke\*.cpp") do (
|
||||
cl.exe /nologo /Od /GS- /EHsc /c /Fo"%WORKDIR%\%%~nF.obj" "%%~fF"
|
||||
if errorlevel 1 exit /b 1
|
||||
|
||||
link.exe /nologo /subsystem:console /out:"%WORKDIR%\%%~nF.exe" /map:"%WORKDIR%\%%~nF.map" "%WORKDIR%\%%~nF.obj"
|
||||
if errorlevel 1 exit /b 1
|
||||
)
|
||||
|
||||
echo Built rewrite regression samples in "%WORKDIR%"
|
||||
exit /b 0
|
||||
@@ -94,6 +94,18 @@
|
||||
"name": "calc_switch",
|
||||
"symbol": "calc_switch",
|
||||
"patterns": ["switch i32 %0", "i32 1, label", "i32 2, label", "i32 3, label", "i32 4, label", "i32 5, label", "phi i64"]
|
||||
},
|
||||
{
|
||||
"name": "switch_sparse",
|
||||
"symbol": "switch_sparse_target",
|
||||
"patterns": ["switch i32 %0", "i32 10, label", "i32 50, label", "i32 200, label", "i32 1000, label", "phi i64", "[ 11,", "[ 55,", "[ 222,", "[ 1337,", "[ 4294967295,"]
|
||||
},
|
||||
{
|
||||
"name": "calc_cout",
|
||||
"symbol": "calc_cout",
|
||||
"skip": true,
|
||||
"skip_reason": "Statically-linked STL (cout) inlined by lifter; GEPTracker UNREACHABLE on complex library code. Blocked on inline policy improvements (Phase 2).",
|
||||
"patterns": []
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -32,7 +32,8 @@ if ($samples.Count -eq 0) {
|
||||
$srcDir = Join-Path $repoRoot 'testcases/rewrite_smoke'
|
||||
$srcNames = @(
|
||||
(Get-ChildItem -Path $srcDir -Filter '*.asm' | ForEach-Object { $_.BaseName }) +
|
||||
(Get-ChildItem -Path $srcDir -Filter '*.c' | ForEach-Object { $_.BaseName })
|
||||
(Get-ChildItem -Path $srcDir -Filter '*.c' | ForEach-Object { $_.BaseName }) +
|
||||
(Get-ChildItem -Path $srcDir -Filter '*.cpp' | ForEach-Object { $_.BaseName })
|
||||
)
|
||||
$sampleNames = @($samples | ForEach-Object { $_.name })
|
||||
|
||||
@@ -52,6 +53,11 @@ New-Item -ItemType Directory -Path $irDir -Force | Out-Null
|
||||
Push-Location $repoRoot
|
||||
try {
|
||||
foreach ($sample in $samples) {
|
||||
if ($sample.PSObject.Properties['skip'] -and $sample.skip) {
|
||||
Write-Host "SKIP: $($sample.name) (known limitation)"
|
||||
continue
|
||||
}
|
||||
|
||||
$mapPath = Join-Path $WorkDir "$($sample.name).map"
|
||||
if (-not (Test-Path $mapPath)) {
|
||||
throw "Map file not found: $mapPath"
|
||||
|
||||
@@ -19,6 +19,10 @@ if ($checks.Count -eq 0) {
|
||||
$failed = $false
|
||||
|
||||
foreach ($check in $checks) {
|
||||
if ($check.PSObject.Properties['skip'] -and $check.skip) {
|
||||
Write-Host "SKIP: $($check.name) (known limitation)"
|
||||
continue
|
||||
}
|
||||
$file = Join-Path $irDir "$($check.name).ll"
|
||||
if (-not (Test-Path $file)) {
|
||||
Write-Host "FAIL: missing $file"
|
||||
|
||||
@@ -183,7 +183,7 @@ def main() -> None:
|
||||
return
|
||||
|
||||
if command == "update-golden":
|
||||
run_baseline()
|
||||
_run_cmd(REWRITE_DIR / "run.cmd")
|
||||
update_golden(IR_OUTPUT_DIR, GOLDEN_HASHES_FILE)
|
||||
return
|
||||
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
/* Test: function with cout call.
|
||||
* Lift target: calc_cout — external call handling.
|
||||
* The computation is pure, but it calls cout before returning. */
|
||||
#include <iostream>
|
||||
|
||||
__declspec(noinline)
|
||||
int calc_cout(int x) {
|
||||
int result = x * 3 + 7;
|
||||
std::cout << result;
|
||||
return result;
|
||||
}
|
||||
|
||||
int main() {
|
||||
int r = calc_cout(10);
|
||||
return r;
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
default rel
|
||||
bits 64
|
||||
|
||||
global start
|
||||
global switch_sparse_target
|
||||
extern ExitProcess
|
||||
|
||||
section .text
|
||||
; Sparse switch on symbolic ECX input.
|
||||
; Case values are NOT consecutive: 10, 50, 200, 1000.
|
||||
; Tests multi-target branch resolution with large gaps between cases.
|
||||
switch_sparse_target:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
mov eax, ecx
|
||||
cmp eax, 10
|
||||
je .case10
|
||||
cmp eax, 50
|
||||
je .case50
|
||||
cmp eax, 200
|
||||
je .case200
|
||||
cmp eax, 1000
|
||||
je .case1000
|
||||
; default
|
||||
mov eax, -1
|
||||
jmp .done
|
||||
.case10:
|
||||
mov eax, 11
|
||||
jmp .done
|
||||
.case50:
|
||||
mov eax, 55
|
||||
jmp .done
|
||||
.case200:
|
||||
mov eax, 222
|
||||
jmp .done
|
||||
.case1000:
|
||||
mov eax, 1337
|
||||
.done:
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
start:
|
||||
sub rsp, 40
|
||||
mov ecx, 200
|
||||
call switch_sparse_target
|
||||
mov ecx, eax
|
||||
call ExitProcess
|
||||
Reference in New Issue
Block a user