Reorg diarizer (#157)

### Why is this change needed?  see `Documentation/SpeakerDiarization.md` - as disucsed, reoraganizing the diarizer folder
2026-05-12 20:20:36 +00:00 · 2025-10-22 20:18:54 -04:00
parent 7fd5ac5446
commit e524cc4de1
24 changed files with 45 additions and 5 deletions
@@ -68,7 +68,6 @@ jobs:
        run: |
          # The output is now an array, so we need to access the first element
          DER=$(jq '.[0].der' offline_results.json)
-          JER=$(jq '.[0].jer' offline_results.json)
          RTF=$(jq '.[0].rtfx' offline_results.json)
          DURATION="1049"  # ES2004a duration in seconds
          SPEAKER_COUNT=$(jq '.[0].detectedSpeakers' offline_results.json)
@@ -84,7 +83,6 @@ jobs:
          INFERENCE_TIME=$(jq '.[0].timings.totalInferenceSeconds' offline_results.json)

          echo "DER=${DER}" >> $GITHUB_OUTPUT
-          echo "JER=${JER}" >> $GITHUB_OUTPUT
          echo "RTF=${RTF}" >> $GITHUB_OUTPUT
          echo "DURATION=${DURATION}" >> $GITHUB_OUTPUT
          echo "SPEAKER_COUNT=${SPEAKER_COUNT}" >> $GITHUB_OUTPUT
@@ -103,7 +101,6 @@ jobs:
        with:
          script: |
            const der = parseFloat('${{ steps.extract.outputs.DER }}');
-            const jer = parseFloat('${{ steps.extract.outputs.JER }}');
            const rtf = parseFloat('${{ steps.extract.outputs.RTF }}');
            const duration = parseFloat('${{ steps.extract.outputs.DURATION }}').toFixed(1);
            const speakerCount = '${{ steps.extract.outputs.SPEAKER_COUNT }}';
@@ -123,7 +120,6 @@ jobs:
            comment += '| Metric | Value | Target | Status | Description |\n';
            comment += '|--------|-------|--------|---------|-------------|\n';
            comment += `| **DER** | **${der.toFixed(1)}%** | <20% | ${der < 20 ? '✅' : '⚠️'} | Diarization Error Rate (lower is better) |\n`;
-            comment += `| **JER** | **${jer.toFixed(1)}%** | <18% | ${jer < 18 ? '✅' : '⚠️'} | Jaccard Error Rate |\n`;
            comment += `| **RTFx** | **${rtf.toFixed(2)}x** | >1.0x | ${rtf > 1.0 ? '✅' : '⚠️'} | Real-Time Factor (higher is faster) |\n\n`;

            comment += '### Offline VBx Pipeline Timing Breakdown\n';
@@ -32,6 +32,50 @@ for segment in result.segments {
 }
 ```

+## Source Layout
+
+The diarizer module mirrors the three-stage pipeline and its offline counterpart. Files live under `Sources/FluidAudio/Diarizer/`:
+
+```
+Core/
+├── DiarizerManager.swift        # Real-time orchestrator and chunk scheduler
+├── DiarizerTypes.swift          # Public models/configs shared across stages
+└── DiarizerModels.swift         # Core ML bundle management
+
+Segmentation/
+├── SegmentationProcessor.swift  # VAD + powerset segmentation inference
+├── SlidingWindow.swift          # Frame windowing helpers
+└── AudioValidation.swift        # Streaming quality/embedding validation
+
+Extraction/
+└── EmbeddingExtractor.swift     # WeSpeaker embedding inference
+
+Clustering/
+├── SpeakerManager.swift         # Active speaker tracking and assignment
+├── SpeakerTypes.swift           # Speaker/raw embedding representations
+└── SpeakerOperations.swift      # Distance/scoring utilities
+
+Offline/
+├── Core/
+│   ├── OfflineDiarizerManager.swift
+│   ├── OfflineDiarizerTypes.swift
+│   └── OfflineDiarizerModels.swift
+├── Segmentation/
+│   └── OfflineSegmentationProcessor.swift
+├── Extraction/
+│   ├── OfflineEmbeddingExtractor.swift
+│   ├── PLDATransform.swift
+│   └── WeightInterpolation.swift
+├── Clustering/
+│   ├── AHCClustering.swift
+│   └── VBxClustering.swift
+└── Utils/
+    ├── OfflineReconstruction.swift
+    └── VDSPOperations.swift
+```
+
+Use this layout as the reference when adding new diarization capabilities so orchestration, segmentation, embedding extraction, and clustering stay isolated.
+
 ## Manual Model Loading

 If you deploy in an offline environment, stage the Core ML bundles manually and skip the automatic HuggingFace downloader.
@@ -31,7 +31,7 @@ Computes agglomerative hierarchical clustering using centroid linkage on the inp

 ## Integration

-Used by `Sources/FluidAudio/Diarizer/Offline/AHCClustering.swift` to perform speaker embedding clustering, which is a core component of the diarization pipeline.
+Used by `Sources/FluidAudio/Diarizer/Offline/Clustering/AHCClustering.swift` to perform speaker embedding clustering, which is a core component of the diarization pipeline.

 ## Source