mirror of
https://github.com/FluidInference/FluidAudio.git
synced 2026-05-12 20:20:36 +00:00
d9eef864d2
## Summary Systematic cleanup of the ASR module addressing tech debt items from #457. Net reduction of ~430 lines while fixing real bugs and improving maintainability. ### Bug fixes - **`enableFP16` silently ignored** — `optimizedConfiguration(enableFP16:)` delegated to a shared factory that hardcoded `allowLowPrecisionAccumulationOnGPU = true`, ignoring the caller's parameter - **`MLArrayCache.returnArray` only reset float32 data** — cached arrays of other types (float16, int32) retained stale data from previous use - **CTC model auto-detection broken** — `Repo.parakeetCtc110m.folderName` returned `"parakeet-ctc-110m"` instead of `"parakeet-ctc-110m-coreml"` because the `folderName` switch fell through to a `default` case that stripped the `-coreml` suffix. Same for `parakeetCtc06b`. - **Duplicate tokens at chunk merge boundary** — `mergeByMidpoint` used `<=`/`>=` so tokens exactly at the cutoff appeared in both left and right chunks ### Dead code removal - Deleted `ANEOptimizer` indirection layer (166 lines) — was a pass-through wrapping `MLModel` with no optimization - Deleted `PerformanceMonitor` actor and `AggregatedMetrics` — never instantiated, component times hardcoded to 0 - Deleted `getFloat16Array` from MLArrayCache — never called - Deleted `sliceEncoderOutput` from AsrTranscription — never called (30 lines) - Deleted `loadWithANEOptimization` from AsrModels — never called - Removed unused `tokenTimings` parameter chain through `processTranscriptionResult` - Removed unused `import OSLog` / `import CoreML` across 5 files - Removed `nonisolated(unsafe)` from SlidingWindowAsrManager (types already Sendable) ### Duplication elimination - Extracted `clearCachedCtcData()` helper (replaced 3× triple-nil assignments) - Extracted `decoderState(for:)` / `setDecoderState(_:for:)` (replaced 4× switch blocks) - Extracted `frameAlignedAudio()` (replaced 2× duplicated frame-alignment blocks) - Added `ASRConstants.secondsPerEncoderFrame` (replaced 5× magic `0.08`) - Replaced hardcoded `16_000` with `config.sampleRate` / `ASRConstants.sampleRate` - Extracted `MLModelConfigurationUtils.defaultConfiguration()` (replaced 5× copy-pasted config methods) - Extracted `MLModelConfigurationUtils.defaultModelsDirectory()` (replaced 3× copy-pasted directory methods) - Consolidated duplicate `vocabularyFile` / `vocabularyFileArray` constants ### File organization - Moved `PerformanceMetrics.swift`, `ProgressEmitter.swift`, `MLArrayCache.swift` from `ASR/Parakeet/` to `Shared/` (used by multiple modules) - Renamed `StreamingAudioSourceFactory` → `AudioSourceFactory`, `StreamingAudioSampleSource` → `AudioSampleSource` (types used by both ASR and Diarizer) - Renamed files to match type names: `SortformerDiarizerPipeline.swift` → `SortformerDiarizer.swift`, `LSEENDDiarizerAPI.swift` → `LSEENDDiarizer.swift`, `NemotronPipeline.swift` → `NemotronStreamingAsrManager+Pipeline.swift` - Replaced force unwraps in `RnntDecoder.swift` with `guard let` + descriptive errors - Removed stale TODO about decoder state in AsrManager ### Benchmark script - Added `Scripts/run_parakeet_benchmarks.sh` — runs all 6 benchmarks (v3, v2, TDT-CTC-110M, CTC earnings, EOU 320ms, Nemotron 1120ms) with WER comparison against `benchmarks100.md` baselines and regression detection - Referenced from `Documentation/ASR/benchmarks100.md` ## Verified — no regressions ``` Model Baseline Current Delta Parakeet TDT v3 (0.6B) 2.6% 2.64% +0.04% Parakeet TDT v2 (0.6B) 3.8% 3.79% -0.01% CTC-TDT 110M 3.6% 3.56% -0.04% CTC Earnings 16.54% 16.51% -0.03% EOU 320ms (120M) 7.11% 7.11% +0.00% Nemotron 1120ms (0.6B) 1.99% 1.99% +0.00% ``` ## Test plan - [x] `swift build` passes - [x] `swift test` passes (all existing tests, updated for removed dead code) - [x] All 6 ASR benchmarks match baselines (100 files each) - [ ] `swift format lint` passes
221 lines
7.0 KiB
Swift
221 lines
7.0 KiB
Swift
@preconcurrency import CoreML
|
|
import Foundation
|
|
import XCTest
|
|
|
|
@testable import FluidAudio
|
|
|
|
final class MLArrayCacheTests: XCTestCase {
|
|
|
|
var cache: MLArrayCache!
|
|
|
|
override func setUp() async throws {
|
|
cache = MLArrayCache(maxCacheSize: 10)
|
|
}
|
|
|
|
// MARK: - Basic Cache Operations
|
|
|
|
func testGetArrayCreatesANEAligned() async throws {
|
|
let shape: [NSNumber] = [1, 100]
|
|
let array = try await cache.getArray(shape: shape, dataType: .float32)
|
|
|
|
XCTAssertEqual(array.shape, shape)
|
|
XCTAssertEqual(array.dataType, .float32)
|
|
|
|
// In CI, we don't test alignment since we use standard arrays
|
|
let isCI = ProcessInfo.processInfo.environment["CI"] != nil
|
|
if !isCI {
|
|
// Verify ANE alignment only in non-CI environment
|
|
let pointerValue = Int(bitPattern: array.dataPointer)
|
|
XCTAssertEqual(pointerValue % ANEMemoryUtils.aneAlignment, 0)
|
|
}
|
|
}
|
|
|
|
func testCacheHitOnSecondRequest() async throws {
|
|
let shape: [NSNumber] = [2, 50]
|
|
|
|
// First request - cache miss
|
|
let array1 = try await cache.getArray(shape: shape, dataType: .float32)
|
|
|
|
// Return array to cache
|
|
await cache.returnArray(array1)
|
|
|
|
// Second request - should be cache hit
|
|
let array2 = try await cache.getArray(shape: shape, dataType: .float32)
|
|
|
|
// Arrays should have same shape and type
|
|
XCTAssertEqual(array2.shape, shape)
|
|
XCTAssertEqual(array2.dataType, .float32)
|
|
}
|
|
|
|
func testReturnArrayResetsData() async throws {
|
|
let shape: [NSNumber] = [10]
|
|
let array = try await cache.getArray(shape: shape, dataType: .float32)
|
|
|
|
// Set some values
|
|
for i in 0..<array.count {
|
|
array[i] = NSNumber(value: Float(i) * 2.0)
|
|
}
|
|
|
|
// Return to cache
|
|
await cache.returnArray(array)
|
|
|
|
// Get from cache again
|
|
let cachedArray = try await cache.getArray(shape: shape, dataType: .float32)
|
|
|
|
// Data should be reset to zero
|
|
for i in 0..<cachedArray.count {
|
|
XCTAssertEqual(cachedArray[i].floatValue, 0.0)
|
|
}
|
|
}
|
|
|
|
// MARK: - Cache Size Management
|
|
|
|
func testCacheSizeLimit() async throws {
|
|
// Create cache with small size
|
|
let smallCache = MLArrayCache(maxCacheSize: 4)
|
|
|
|
// Create arrays with same shape
|
|
let shape: [NSNumber] = [100]
|
|
var arrays: [MLMultiArray] = []
|
|
|
|
// Get 4 arrays
|
|
for _ in 0..<4 {
|
|
arrays.append(try await smallCache.getArray(shape: shape, dataType: .float32))
|
|
}
|
|
|
|
// Return all to cache
|
|
for array in arrays {
|
|
await smallCache.returnArray(array)
|
|
}
|
|
|
|
// Try to return one more - should not exceed limit
|
|
let extraArray = try await smallCache.getArray(shape: shape, dataType: .float32)
|
|
await smallCache.returnArray(extraArray)
|
|
|
|
// Cache should still work
|
|
let finalArray = try await smallCache.getArray(shape: shape, dataType: .float32)
|
|
XCTAssertNotNil(finalArray)
|
|
}
|
|
|
|
// MARK: - Pre-warming Tests
|
|
|
|
func testPrewarmCache() async {
|
|
let shapes: [(shape: [NSNumber], dataType: MLMultiArrayDataType)] = [
|
|
([1, 100], .float32),
|
|
([2, 50], .float32),
|
|
([1, 1024], .float16),
|
|
]
|
|
|
|
await cache.prewarm(shapes: shapes)
|
|
|
|
// Arrays should be available from cache
|
|
for (shape, dataType) in shapes {
|
|
do {
|
|
let array = try await cache.getArray(shape: shape, dataType: dataType)
|
|
XCTAssertEqual(array.shape, shape)
|
|
XCTAssertEqual(array.dataType, dataType)
|
|
} catch {
|
|
XCTFail("Failed to get pre-warmed array: \(error)")
|
|
}
|
|
}
|
|
}
|
|
|
|
// MARK: - Clear Cache Tests
|
|
|
|
func testClearCache() async throws {
|
|
let shape: [NSNumber] = [50]
|
|
|
|
// Add array to cache
|
|
let array = try await cache.getArray(shape: shape, dataType: .float32)
|
|
await cache.returnArray(array)
|
|
|
|
// Clear cache
|
|
await cache.clear()
|
|
|
|
// Next request should be cache miss (new array)
|
|
let newArray = try await cache.getArray(shape: shape, dataType: .float32)
|
|
XCTAssertNotNil(newArray)
|
|
}
|
|
|
|
// MARK: - Different Data Types
|
|
|
|
func testDifferentDataTypes() async throws {
|
|
let shape: [NSNumber] = [10, 10]
|
|
|
|
// Test different data types
|
|
let float32 = try await cache.getArray(shape: shape, dataType: .float32)
|
|
XCTAssertEqual(float32.dataType, .float32)
|
|
|
|
let float16 = try await cache.getArray(shape: shape, dataType: .float16)
|
|
XCTAssertEqual(float16.dataType, .float16)
|
|
|
|
let int32 = try await cache.getArray(shape: shape, dataType: .int32)
|
|
XCTAssertEqual(int32.dataType, .int32)
|
|
|
|
// Return all to cache
|
|
await cache.returnArray(float32)
|
|
await cache.returnArray(float16)
|
|
await cache.returnArray(int32)
|
|
|
|
// Should get correct types back
|
|
let cachedFloat32 = try await cache.getArray(shape: shape, dataType: .float32)
|
|
XCTAssertEqual(cachedFloat32.dataType, .float32)
|
|
}
|
|
|
|
// MARK: - Thread Safety Tests
|
|
|
|
func testConcurrentAccess() async throws {
|
|
let shape: [NSNumber] = [10]
|
|
let testCache = cache!
|
|
|
|
// Perform limited concurrent operations
|
|
await withTaskGroup(of: Void.self) { group in
|
|
// Reduced number of concurrent tasks
|
|
for i in 0..<3 {
|
|
group.addTask {
|
|
do {
|
|
let array = try await testCache.getArray(shape: shape, dataType: .float32)
|
|
array[0] = NSNumber(value: Float(i))
|
|
await testCache.returnArray(array)
|
|
} catch {
|
|
// Can't call XCTFail from Sendable closure - just print
|
|
print("Concurrent access failed: \(error)")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Cache should still be functional
|
|
let finalArray = try await cache.getArray(shape: shape, dataType: .float32)
|
|
XCTAssertNotNil(finalArray)
|
|
}
|
|
|
|
func testGetArrayDoesNotReuseActiveBuffer() async throws {
|
|
let shape: [NSNumber] = [1, 64]
|
|
|
|
let array1 = try await cache.getArray(shape: shape, dataType: .float32)
|
|
let array2 = try await cache.getArray(shape: shape, dataType: .float32)
|
|
|
|
XCTAssertFalse(array1 === array2, "Cache should not hand out the same array while it is still borrowed")
|
|
|
|
await cache.returnArray(array1)
|
|
await cache.returnArray(array2)
|
|
}
|
|
|
|
// Removed performance test - can cause timing issues
|
|
|
|
// MARK: - Global Cache Tests
|
|
|
|
func testSharedCacheInstance() async throws {
|
|
// Test the global shared instance
|
|
let shape: [NSNumber] = [256]
|
|
let array = try await sharedMLArrayCache.getArray(shape: shape, dataType: .float32)
|
|
|
|
XCTAssertEqual(array.shape, shape)
|
|
|
|
// Return to shared cache
|
|
await sharedMLArrayCache.returnArray(array)
|
|
}
|
|
|
|
}
|