From 770af29a655bacc44d7d1794a8d4e964de72010c Mon Sep 17 00:00:00 2001 From: Alex-Wengg Date: Mon, 4 May 2026 00:31:06 -0400 Subject: [PATCH] feat(tts/kokoro-ane/zh): include g2pw.mlmodelc in requiredModelsZh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire the g2pW CoreML bundle into the bulk `ensureModels(.mandarin)` grab so the polyphone-disambiguation path is on by default for any fresh Mandarin checkout. Without this entry the model was only fetchable via the lazy fallback that this PR also documents as nil-on- failure — meaning users hit the dict-only path even when their network worked fine. The two auxiliary text files (`vocab.txt`, `POLYPHONIC_CHARS.txt`) stay on the lazy `ensureMandarinG2pw` helper: `DownloadUtils.downloadRepo`'s subPath matcher does not whitelist `.txt` (only `.json`/`.model`/`.bin`), so adding them to `requiredModelsZh` would trigger an infinite re-download loop on each startup. The manual fetch already handles them correctly. --- Sources/FluidAudio/ModelNames.swift | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/Sources/FluidAudio/ModelNames.swift b/Sources/FluidAudio/ModelNames.swift index d60a1c9f..4b37329b 100644 --- a/Sources/FluidAudio/ModelNames.swift +++ b/Sources/FluidAudio/ModelNames.swift @@ -1006,6 +1006,17 @@ public enum ModelNames { /// `/voices/zf_001.bin`. public static let defaultVoiceFileZh = "voices/zf_001.bin" + /// Mandarin g2pW polyphone-disambiguator CoreML bundle. Lives under + /// `/g2pw/` — included in `requiredModelsZh` so the bulk + /// `ensureModels(.mandarin)` grab pulls it without an extra round + /// trip. The two auxiliary text files (`vocab.txt`, + /// `POLYPHONIC_CHARS.txt`) ship via the lazy + /// `KokoroAneResourceDownloader.ensureMandarinG2pw` helper because + /// `DownloadUtils.downloadRepo` does not whitelist `.txt` for + /// subPath repos and a manual fetch keeps the bulk-grab matcher + /// idempotent. + public static let g2pwModelZh = "g2pw/g2pw.mlmodelc" + /// All seven .mlmodelc bundles. public static let requiredCoreMLModels: Set = [ albert, postAlbert, alignment, prosody, noise, vocoder, tail, @@ -1017,9 +1028,11 @@ public enum ModelNames { } /// CoreML bundles + the vocab JSON + the Mandarin default voice .bin - /// (which lives under `voices/`). + /// (under `voices/`) + the g2pW CoreML bundle (under `g2pw/`). public static var requiredModelsZh: Set { - requiredCoreMLModels.union([vocab, defaultVoiceFileZh]) + requiredCoreMLModels.union([ + vocab, defaultVoiceFileZh, g2pwModelZh, + ]) } }