mirror of
https://github.com/FluidInference/FluidAudio.git
synced 2026-05-12 20:20:36 +00:00
Make parakeetTdtCtc110m folderName consistent with other Parakeet models (#453)
## Summary - Simplifies `folderName` property by removing 4 redundant special cases - Keeps `kokoro` and `sortformer` special cases to avoid breaking changes for cached models - Uses default rule for other models: strip `-coreml` suffix from name - Eliminates inconsistency by applying consistent pattern - **Fixes offline diarizer PLDA parameters download issue** ## Context This addresses the inconsistency raised in #442. The original code had 11 special cases (6 for shortened names + 5 for nested directories). Many just removed the `-coreml` suffix, which can be handled by a default rule. **Before (11 special cases):** ```swift case .kokoro: return "kokoro" case .parakeetEou160: return "parakeet-eou-streaming/160ms" case .parakeetEou320: return "parakeet-eou-streaming/320ms" case .parakeetEou1280: return "parakeet-eou-streaming/1280ms" case .nemotronStreaming1120: return "nemotron-streaming/1120ms" case .nemotronStreaming560: return "nemotron-streaming/560ms" case .sortformer: return "sortformer" case .lseend: return "ls-eend" case .pocketTts: return "pocket-tts" case .multilingualG2p: return "charsiu-g2p-byt5" case .parakeetTdtCtc110m: return "parakeet-tdt-ctc-110m" default: return name ``` **After (7 special cases):** ```swift case .kokoro: return "kokoro" // Keep for backwards compat case .parakeetEou160: return "parakeet-eou-streaming/160ms" case .parakeetEou320: return "parakeet-eou-streaming/320ms" case .parakeetEou1280: return "parakeet-eou-streaming/1280ms" case .nemotronStreaming1120: return "nemotron-streaming/1120ms" case .nemotronStreaming560: return "nemotron-streaming/560ms" case .sortformer: return "sortformer" // Keep for backwards compat default: return name.replacingOccurrences(of: "-coreml", with: "") ``` ## Changes - **Removed special cases** for: `lseend`, `pocketTts`, `multilingualG2p`, `parakeetTdtCtc110m` (now use default) - **Kept special cases** for: `kokoro`, `sortformer` (avoid breaking cached model paths) - **All Parakeet models now consistent**: `.parakeet`, `.parakeetV2`, `.parakeetTdtCtc110m` all use default - **Added `plda-parameters.json`** to `OfflineDiarizer.requiredModels` to fix CI benchmark failure ## Offline Diarizer Fix The diarization benchmark was failing in CI with: ``` PLDA parameters file not found in /Users/runner/Library/Application Support/FluidAudio/Models ``` This was because `plda-parameters.json` wasn't in the `requiredModels` set, so it never got downloaded when using `--auto-download`. ## Breaking Changes None - kept `kokoro` and `sortformer` special cases to preserve existing folder names. Fixes #442 ## Test plan - [x] Build completes successfully - [x] All tests pass - [x] parakeetTdtCtc110m now consistent with other Parakeet models - [x] No breaking changes for kokoro or sortformer users - [ ] CI diarization benchmark should now pass
This commit is contained in:
@@ -17,6 +17,7 @@ public struct OfflineDiarizerModels: Sendable {
|
||||
private static func loadPLDAPsi(from directory: URL) throws -> [Double] {
|
||||
let candidatePaths = [
|
||||
directory.appendingPathComponent("plda-parameters.json", isDirectory: false),
|
||||
directory.appendingPathComponent("speaker-diarization/plda-parameters.json", isDirectory: false),
|
||||
directory.appendingPathComponent("speaker-diarization-coreml/plda-parameters.json", isDirectory: false),
|
||||
directory.appendingPathComponent("speaker-diarization-offline/plda-parameters.json", isDirectory: false),
|
||||
]
|
||||
|
||||
@@ -129,16 +129,8 @@ public enum Repo: String, CaseIterable {
|
||||
return "nemotron-streaming/560ms"
|
||||
case .sortformer:
|
||||
return "sortformer"
|
||||
case .lseend:
|
||||
return "ls-eend"
|
||||
case .pocketTts:
|
||||
return "pocket-tts"
|
||||
case .multilingualG2p:
|
||||
return "charsiu-g2p-byt5"
|
||||
case .parakeetTdtCtc110m:
|
||||
return "parakeet-tdt-ctc-110m"
|
||||
default:
|
||||
return name
|
||||
return name.replacingOccurrences(of: "-coreml", with: "")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -166,6 +158,7 @@ public enum ModelNames {
|
||||
public static let fbank = "FBank"
|
||||
public static let embedding = "Embedding"
|
||||
public static let pldaRho = "PldaRho"
|
||||
public static let pldaParameters = "plda-parameters.json"
|
||||
|
||||
public static let segmentationFile = segmentation + ".mlmodelc"
|
||||
public static let fbankFile = fbank + ".mlmodelc"
|
||||
@@ -182,6 +175,7 @@ public enum ModelNames {
|
||||
fbankPath,
|
||||
embeddingPath,
|
||||
pldaRhoPath,
|
||||
pldaParameters,
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@@ -125,7 +125,7 @@ final class ModelNamesTests: XCTestCase {
|
||||
// Verify name (repo slug with -coreml suffix)
|
||||
XCTAssertEqual(repo.name, "parakeet-tdt-ctc-110m-coreml")
|
||||
|
||||
// Verify folder name (simplified local folder name)
|
||||
// Verify folder name (simplified - strips -coreml suffix by default)
|
||||
XCTAssertEqual(repo.folderName, "parakeet-tdt-ctc-110m")
|
||||
|
||||
// Should have no subpath (not a variant repo)
|
||||
|
||||
Reference in New Issue
Block a user