diff --git a/.github/workflows/offline-pipeline.yml b/.github/workflows/offline-pipeline.yml index 11f6c765..1413eeb2 100644 --- a/.github/workflows/offline-pipeline.yml +++ b/.github/workflows/offline-pipeline.yml @@ -68,7 +68,6 @@ jobs: run: | # The output is now an array, so we need to access the first element DER=$(jq '.[0].der' offline_results.json) - JER=$(jq '.[0].jer' offline_results.json) RTF=$(jq '.[0].rtfx' offline_results.json) DURATION="1049" # ES2004a duration in seconds SPEAKER_COUNT=$(jq '.[0].detectedSpeakers' offline_results.json) @@ -84,7 +83,6 @@ jobs: INFERENCE_TIME=$(jq '.[0].timings.totalInferenceSeconds' offline_results.json) echo "DER=${DER}" >> $GITHUB_OUTPUT - echo "JER=${JER}" >> $GITHUB_OUTPUT echo "RTF=${RTF}" >> $GITHUB_OUTPUT echo "DURATION=${DURATION}" >> $GITHUB_OUTPUT echo "SPEAKER_COUNT=${SPEAKER_COUNT}" >> $GITHUB_OUTPUT @@ -103,7 +101,6 @@ jobs: with: script: | const der = parseFloat('${{ steps.extract.outputs.DER }}'); - const jer = parseFloat('${{ steps.extract.outputs.JER }}'); const rtf = parseFloat('${{ steps.extract.outputs.RTF }}'); const duration = parseFloat('${{ steps.extract.outputs.DURATION }}').toFixed(1); const speakerCount = '${{ steps.extract.outputs.SPEAKER_COUNT }}'; @@ -123,7 +120,6 @@ jobs: comment += '| Metric | Value | Target | Status | Description |\n'; comment += '|--------|-------|--------|---------|-------------|\n'; comment += `| **DER** | **${der.toFixed(1)}%** | <20% | ${der < 20 ? '✅' : '⚠️'} | Diarization Error Rate (lower is better) |\n`; - comment += `| **JER** | **${jer.toFixed(1)}%** | <18% | ${jer < 18 ? '✅' : '⚠️'} | Jaccard Error Rate |\n`; comment += `| **RTFx** | **${rtf.toFixed(2)}x** | >1.0x | ${rtf > 1.0 ? '✅' : '⚠️'} | Real-Time Factor (higher is faster) |\n\n`; comment += '### Offline VBx Pipeline Timing Breakdown\n'; diff --git a/Documentation/SpeakerDiarization.md b/Documentation/SpeakerDiarization.md index 45e12d2c..a842cd13 100644 --- a/Documentation/SpeakerDiarization.md +++ b/Documentation/SpeakerDiarization.md @@ -32,6 +32,50 @@ for segment in result.segments { } ``` +## Source Layout + +The diarizer module mirrors the three-stage pipeline and its offline counterpart. Files live under `Sources/FluidAudio/Diarizer/`: + +``` +Core/ +├── DiarizerManager.swift # Real-time orchestrator and chunk scheduler +├── DiarizerTypes.swift # Public models/configs shared across stages +└── DiarizerModels.swift # Core ML bundle management + +Segmentation/ +├── SegmentationProcessor.swift # VAD + powerset segmentation inference +├── SlidingWindow.swift # Frame windowing helpers +└── AudioValidation.swift # Streaming quality/embedding validation + +Extraction/ +└── EmbeddingExtractor.swift # WeSpeaker embedding inference + +Clustering/ +├── SpeakerManager.swift # Active speaker tracking and assignment +├── SpeakerTypes.swift # Speaker/raw embedding representations +└── SpeakerOperations.swift # Distance/scoring utilities + +Offline/ +├── Core/ +│ ├── OfflineDiarizerManager.swift +│ ├── OfflineDiarizerTypes.swift +│ └── OfflineDiarizerModels.swift +├── Segmentation/ +│ └── OfflineSegmentationProcessor.swift +├── Extraction/ +│ ├── OfflineEmbeddingExtractor.swift +│ ├── PLDATransform.swift +│ └── WeightInterpolation.swift +├── Clustering/ +│ ├── AHCClustering.swift +│ └── VBxClustering.swift +└── Utils/ + ├── OfflineReconstruction.swift + └── VDSPOperations.swift +``` + +Use this layout as the reference when adding new diarization capabilities so orchestration, segmentation, embedding extraction, and clustering stay isolated. + ## Manual Model Loading If you deploy in an offline environment, stage the Core ML bundles manually and skip the automatic HuggingFace downloader. diff --git a/Sources/FastClusterWrapper/README.md b/Sources/FastClusterWrapper/README.md index d026cfd0..16464db3 100644 --- a/Sources/FastClusterWrapper/README.md +++ b/Sources/FastClusterWrapper/README.md @@ -31,7 +31,7 @@ Computes agglomerative hierarchical clustering using centroid linkage on the inp ## Integration -Used by `Sources/FluidAudio/Diarizer/Offline/AHCClustering.swift` to perform speaker embedding clustering, which is a core component of the diarization pipeline. +Used by `Sources/FluidAudio/Diarizer/Offline/Clustering/AHCClustering.swift` to perform speaker embedding clustering, which is a core component of the diarization pipeline. ## Source diff --git a/Sources/FluidAudio/Diarizer/SpeakerManager.swift b/Sources/FluidAudio/Diarizer/Clustering/SpeakerManager.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/SpeakerManager.swift rename to Sources/FluidAudio/Diarizer/Clustering/SpeakerManager.swift diff --git a/Sources/FluidAudio/Diarizer/SpeakerOperations.swift b/Sources/FluidAudio/Diarizer/Clustering/SpeakerOperations.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/SpeakerOperations.swift rename to Sources/FluidAudio/Diarizer/Clustering/SpeakerOperations.swift diff --git a/Sources/FluidAudio/Diarizer/SpeakerTypes.swift b/Sources/FluidAudio/Diarizer/Clustering/SpeakerTypes.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/SpeakerTypes.swift rename to Sources/FluidAudio/Diarizer/Clustering/SpeakerTypes.swift diff --git a/Sources/FluidAudio/Diarizer/DiarizerManager.swift b/Sources/FluidAudio/Diarizer/Core/DiarizerManager.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/DiarizerManager.swift rename to Sources/FluidAudio/Diarizer/Core/DiarizerManager.swift diff --git a/Sources/FluidAudio/Diarizer/DiarizerModels.swift b/Sources/FluidAudio/Diarizer/Core/DiarizerModels.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/DiarizerModels.swift rename to Sources/FluidAudio/Diarizer/Core/DiarizerModels.swift diff --git a/Sources/FluidAudio/Diarizer/DiarizerTypes.swift b/Sources/FluidAudio/Diarizer/Core/DiarizerTypes.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/DiarizerTypes.swift rename to Sources/FluidAudio/Diarizer/Core/DiarizerTypes.swift diff --git a/Sources/FluidAudio/Diarizer/EmbeddingExtractor.swift b/Sources/FluidAudio/Diarizer/Extraction/EmbeddingExtractor.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/EmbeddingExtractor.swift rename to Sources/FluidAudio/Diarizer/Extraction/EmbeddingExtractor.swift diff --git a/Sources/FluidAudio/Diarizer/Offline/AHCClustering.swift b/Sources/FluidAudio/Diarizer/Offline/Clustering/AHCClustering.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/Offline/AHCClustering.swift rename to Sources/FluidAudio/Diarizer/Offline/Clustering/AHCClustering.swift diff --git a/Sources/FluidAudio/Diarizer/Offline/VBxClustering.swift b/Sources/FluidAudio/Diarizer/Offline/Clustering/VBxClustering.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/Offline/VBxClustering.swift rename to Sources/FluidAudio/Diarizer/Offline/Clustering/VBxClustering.swift diff --git a/Sources/FluidAudio/Diarizer/Offline/OfflineDiarizerManager.swift b/Sources/FluidAudio/Diarizer/Offline/Core/OfflineDiarizerManager.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/Offline/OfflineDiarizerManager.swift rename to Sources/FluidAudio/Diarizer/Offline/Core/OfflineDiarizerManager.swift diff --git a/Sources/FluidAudio/Diarizer/Offline/OfflineDiarizerModels.swift b/Sources/FluidAudio/Diarizer/Offline/Core/OfflineDiarizerModels.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/Offline/OfflineDiarizerModels.swift rename to Sources/FluidAudio/Diarizer/Offline/Core/OfflineDiarizerModels.swift diff --git a/Sources/FluidAudio/Diarizer/Offline/OfflineDiarizerTypes.swift b/Sources/FluidAudio/Diarizer/Offline/Core/OfflineDiarizerTypes.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/Offline/OfflineDiarizerTypes.swift rename to Sources/FluidAudio/Diarizer/Offline/Core/OfflineDiarizerTypes.swift diff --git a/Sources/FluidAudio/Diarizer/Offline/OfflineEmbeddingExtractor.swift b/Sources/FluidAudio/Diarizer/Offline/Extraction/OfflineEmbeddingExtractor.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/Offline/OfflineEmbeddingExtractor.swift rename to Sources/FluidAudio/Diarizer/Offline/Extraction/OfflineEmbeddingExtractor.swift diff --git a/Sources/FluidAudio/Diarizer/Offline/PLDATransform.swift b/Sources/FluidAudio/Diarizer/Offline/Extraction/PLDATransform.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/Offline/PLDATransform.swift rename to Sources/FluidAudio/Diarizer/Offline/Extraction/PLDATransform.swift diff --git a/Sources/FluidAudio/Diarizer/Offline/WeightInterpolation.swift b/Sources/FluidAudio/Diarizer/Offline/Extraction/WeightInterpolation.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/Offline/WeightInterpolation.swift rename to Sources/FluidAudio/Diarizer/Offline/Extraction/WeightInterpolation.swift diff --git a/Sources/FluidAudio/Diarizer/Offline/OfflineSegmentationProcessor.swift b/Sources/FluidAudio/Diarizer/Offline/Segmentation/OfflineSegmentationProcessor.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/Offline/OfflineSegmentationProcessor.swift rename to Sources/FluidAudio/Diarizer/Offline/Segmentation/OfflineSegmentationProcessor.swift diff --git a/Sources/FluidAudio/Diarizer/Offline/OfflineReconstruction.swift b/Sources/FluidAudio/Diarizer/Offline/Utils/OfflineReconstruction.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/Offline/OfflineReconstruction.swift rename to Sources/FluidAudio/Diarizer/Offline/Utils/OfflineReconstruction.swift diff --git a/Sources/FluidAudio/Diarizer/Offline/VDSPOperations.swift b/Sources/FluidAudio/Diarizer/Offline/Utils/VDSPOperations.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/Offline/VDSPOperations.swift rename to Sources/FluidAudio/Diarizer/Offline/Utils/VDSPOperations.swift diff --git a/Sources/FluidAudio/Diarizer/AudioValidation.swift b/Sources/FluidAudio/Diarizer/Segmentation/AudioValidation.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/AudioValidation.swift rename to Sources/FluidAudio/Diarizer/Segmentation/AudioValidation.swift diff --git a/Sources/FluidAudio/Diarizer/SegmentationProcessor.swift b/Sources/FluidAudio/Diarizer/Segmentation/SegmentationProcessor.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/SegmentationProcessor.swift rename to Sources/FluidAudio/Diarizer/Segmentation/SegmentationProcessor.swift diff --git a/Sources/FluidAudio/Diarizer/SlidingWindow.swift b/Sources/FluidAudio/Diarizer/Segmentation/SlidingWindow.swift similarity index 100% rename from Sources/FluidAudio/Diarizer/SlidingWindow.swift rename to Sources/FluidAudio/Diarizer/Segmentation/SlidingWindow.swift