Files
FluidAudio/Tests/FluidAudioTests/Shared/MLArrayCacheTests.swift
T
Alex d9eef864d2 ASR tech debt cleanup: remove dead code, fix bugs, add benchmark script 28/03/2026 (#460)
## Summary

Systematic cleanup of the ASR module addressing tech debt items from
#457. Net reduction of ~430 lines while fixing real bugs and improving
maintainability.

### Bug fixes
- **`enableFP16` silently ignored** —
`optimizedConfiguration(enableFP16:)` delegated to a shared factory that
hardcoded `allowLowPrecisionAccumulationOnGPU = true`, ignoring the
caller's parameter
- **`MLArrayCache.returnArray` only reset float32 data** — cached arrays
of other types (float16, int32) retained stale data from previous use
- **CTC model auto-detection broken** —
`Repo.parakeetCtc110m.folderName` returned `"parakeet-ctc-110m"` instead
of `"parakeet-ctc-110m-coreml"` because the `folderName` switch fell
through to a `default` case that stripped the `-coreml` suffix. Same for
`parakeetCtc06b`.
- **Duplicate tokens at chunk merge boundary** — `mergeByMidpoint` used
`<=`/`>=` so tokens exactly at the cutoff appeared in both left and
right chunks

### Dead code removal
- Deleted `ANEOptimizer` indirection layer (166 lines) — was a
pass-through wrapping `MLModel` with no optimization
- Deleted `PerformanceMonitor` actor and `AggregatedMetrics` — never
instantiated, component times hardcoded to 0
- Deleted `getFloat16Array` from MLArrayCache — never called
- Deleted `sliceEncoderOutput` from AsrTranscription — never called (30
lines)
- Deleted `loadWithANEOptimization` from AsrModels — never called
- Removed unused `tokenTimings` parameter chain through
`processTranscriptionResult`
- Removed unused `import OSLog` / `import CoreML` across 5 files
- Removed `nonisolated(unsafe)` from SlidingWindowAsrManager (types
already Sendable)

### Duplication elimination
- Extracted `clearCachedCtcData()` helper (replaced 3× triple-nil
assignments)
- Extracted `decoderState(for:)` / `setDecoderState(_:for:)` (replaced
4× switch blocks)
- Extracted `frameAlignedAudio()` (replaced 2× duplicated
frame-alignment blocks)
- Added `ASRConstants.secondsPerEncoderFrame` (replaced 5× magic `0.08`)
- Replaced hardcoded `16_000` with `config.sampleRate` /
`ASRConstants.sampleRate`
- Extracted `MLModelConfigurationUtils.defaultConfiguration()` (replaced
5× copy-pasted config methods)
- Extracted `MLModelConfigurationUtils.defaultModelsDirectory()`
(replaced 3× copy-pasted directory methods)
- Consolidated duplicate `vocabularyFile` / `vocabularyFileArray`
constants

### File organization
- Moved `PerformanceMetrics.swift`, `ProgressEmitter.swift`,
`MLArrayCache.swift` from `ASR/Parakeet/` to `Shared/` (used by multiple
modules)
- Renamed `StreamingAudioSourceFactory` → `AudioSourceFactory`,
`StreamingAudioSampleSource` → `AudioSampleSource` (types used by both
ASR and Diarizer)
- Renamed files to match type names: `SortformerDiarizerPipeline.swift`
→ `SortformerDiarizer.swift`, `LSEENDDiarizerAPI.swift` →
`LSEENDDiarizer.swift`, `NemotronPipeline.swift` →
`NemotronStreamingAsrManager+Pipeline.swift`
- Replaced force unwraps in `RnntDecoder.swift` with `guard let` +
descriptive errors
- Removed stale TODO about decoder state in AsrManager

### Benchmark script
- Added `Scripts/run_parakeet_benchmarks.sh` — runs all 6 benchmarks
(v3, v2, TDT-CTC-110M, CTC earnings, EOU 320ms, Nemotron 1120ms) with
WER comparison against `benchmarks100.md` baselines and regression
detection
- Referenced from `Documentation/ASR/benchmarks100.md`

## Verified — no regressions

```
Model                       Baseline    Current      Delta
Parakeet TDT v3 (0.6B)          2.6%      2.64%     +0.04%
Parakeet TDT v2 (0.6B)          3.8%      3.79%     -0.01%
CTC-TDT 110M                    3.6%      3.56%     -0.04%
CTC Earnings                  16.54%     16.51%     -0.03%
EOU 320ms (120M)               7.11%      7.11%     +0.00%
Nemotron 1120ms (0.6B)         1.99%      1.99%     +0.00%
```

## Test plan
- [x] `swift build` passes
- [x] `swift test` passes (all existing tests, updated for removed dead
code)
- [x] All 6 ASR benchmarks match baselines (100 files each)
- [ ] `swift format lint` passes
2026-03-28 23:44:10 -04:00

221 lines
7.0 KiB
Swift

@preconcurrency import CoreML
import Foundation
import XCTest
@testable import FluidAudio
final class MLArrayCacheTests: XCTestCase {
var cache: MLArrayCache!
override func setUp() async throws {
cache = MLArrayCache(maxCacheSize: 10)
}
// MARK: - Basic Cache Operations
func testGetArrayCreatesANEAligned() async throws {
let shape: [NSNumber] = [1, 100]
let array = try await cache.getArray(shape: shape, dataType: .float32)
XCTAssertEqual(array.shape, shape)
XCTAssertEqual(array.dataType, .float32)
// In CI, we don't test alignment since we use standard arrays
let isCI = ProcessInfo.processInfo.environment["CI"] != nil
if !isCI {
// Verify ANE alignment only in non-CI environment
let pointerValue = Int(bitPattern: array.dataPointer)
XCTAssertEqual(pointerValue % ANEMemoryUtils.aneAlignment, 0)
}
}
func testCacheHitOnSecondRequest() async throws {
let shape: [NSNumber] = [2, 50]
// First request - cache miss
let array1 = try await cache.getArray(shape: shape, dataType: .float32)
// Return array to cache
await cache.returnArray(array1)
// Second request - should be cache hit
let array2 = try await cache.getArray(shape: shape, dataType: .float32)
// Arrays should have same shape and type
XCTAssertEqual(array2.shape, shape)
XCTAssertEqual(array2.dataType, .float32)
}
func testReturnArrayResetsData() async throws {
let shape: [NSNumber] = [10]
let array = try await cache.getArray(shape: shape, dataType: .float32)
// Set some values
for i in 0..<array.count {
array[i] = NSNumber(value: Float(i) * 2.0)
}
// Return to cache
await cache.returnArray(array)
// Get from cache again
let cachedArray = try await cache.getArray(shape: shape, dataType: .float32)
// Data should be reset to zero
for i in 0..<cachedArray.count {
XCTAssertEqual(cachedArray[i].floatValue, 0.0)
}
}
// MARK: - Cache Size Management
func testCacheSizeLimit() async throws {
// Create cache with small size
let smallCache = MLArrayCache(maxCacheSize: 4)
// Create arrays with same shape
let shape: [NSNumber] = [100]
var arrays: [MLMultiArray] = []
// Get 4 arrays
for _ in 0..<4 {
arrays.append(try await smallCache.getArray(shape: shape, dataType: .float32))
}
// Return all to cache
for array in arrays {
await smallCache.returnArray(array)
}
// Try to return one more - should not exceed limit
let extraArray = try await smallCache.getArray(shape: shape, dataType: .float32)
await smallCache.returnArray(extraArray)
// Cache should still work
let finalArray = try await smallCache.getArray(shape: shape, dataType: .float32)
XCTAssertNotNil(finalArray)
}
// MARK: - Pre-warming Tests
func testPrewarmCache() async {
let shapes: [(shape: [NSNumber], dataType: MLMultiArrayDataType)] = [
([1, 100], .float32),
([2, 50], .float32),
([1, 1024], .float16),
]
await cache.prewarm(shapes: shapes)
// Arrays should be available from cache
for (shape, dataType) in shapes {
do {
let array = try await cache.getArray(shape: shape, dataType: dataType)
XCTAssertEqual(array.shape, shape)
XCTAssertEqual(array.dataType, dataType)
} catch {
XCTFail("Failed to get pre-warmed array: \(error)")
}
}
}
// MARK: - Clear Cache Tests
func testClearCache() async throws {
let shape: [NSNumber] = [50]
// Add array to cache
let array = try await cache.getArray(shape: shape, dataType: .float32)
await cache.returnArray(array)
// Clear cache
await cache.clear()
// Next request should be cache miss (new array)
let newArray = try await cache.getArray(shape: shape, dataType: .float32)
XCTAssertNotNil(newArray)
}
// MARK: - Different Data Types
func testDifferentDataTypes() async throws {
let shape: [NSNumber] = [10, 10]
// Test different data types
let float32 = try await cache.getArray(shape: shape, dataType: .float32)
XCTAssertEqual(float32.dataType, .float32)
let float16 = try await cache.getArray(shape: shape, dataType: .float16)
XCTAssertEqual(float16.dataType, .float16)
let int32 = try await cache.getArray(shape: shape, dataType: .int32)
XCTAssertEqual(int32.dataType, .int32)
// Return all to cache
await cache.returnArray(float32)
await cache.returnArray(float16)
await cache.returnArray(int32)
// Should get correct types back
let cachedFloat32 = try await cache.getArray(shape: shape, dataType: .float32)
XCTAssertEqual(cachedFloat32.dataType, .float32)
}
// MARK: - Thread Safety Tests
func testConcurrentAccess() async throws {
let shape: [NSNumber] = [10]
let testCache = cache!
// Perform limited concurrent operations
await withTaskGroup(of: Void.self) { group in
// Reduced number of concurrent tasks
for i in 0..<3 {
group.addTask {
do {
let array = try await testCache.getArray(shape: shape, dataType: .float32)
array[0] = NSNumber(value: Float(i))
await testCache.returnArray(array)
} catch {
// Can't call XCTFail from Sendable closure - just print
print("Concurrent access failed: \(error)")
}
}
}
}
// Cache should still be functional
let finalArray = try await cache.getArray(shape: shape, dataType: .float32)
XCTAssertNotNil(finalArray)
}
func testGetArrayDoesNotReuseActiveBuffer() async throws {
let shape: [NSNumber] = [1, 64]
let array1 = try await cache.getArray(shape: shape, dataType: .float32)
let array2 = try await cache.getArray(shape: shape, dataType: .float32)
XCTAssertFalse(array1 === array2, "Cache should not hand out the same array while it is still borrowed")
await cache.returnArray(array1)
await cache.returnArray(array2)
}
// Removed performance test - can cause timing issues
// MARK: - Global Cache Tests
func testSharedCacheInstance() async throws {
// Test the global shared instance
let shape: [NSNumber] = [256]
let array = try await sharedMLArrayCache.getArray(shape: shape, dataType: .float32)
XCTAssertEqual(array.shape, shape)
// Return to shared cache
await sharedMLArrayCache.returnArray(array)
}
}