Simplify USB audio path: drop residual scaffolding from long session

Backend:
- audio.go: drop source rotation + "no-data reopen with next device" loop.
  One source (UAC1Gadget; falls back to hw:1,0 only if sysfs lookup fails).
- internal/audio: remove unused Reader interface and unavailableCapture stub.
  Stub now returns the concrete type with an error.
- webrtc.go: inline single-use resolveAudioCodec helper; DRY video/audio RTCP
  drain into drainRTCP; fold startSessionAudio into the connect callback.

Frontend:
- devices.$id.tsx: drop remoteMediaStreamRef track-merging. Backend tracks
  share stream ID "kvm", so pion delivers them in one MediaStream — just
  assign event.streams[0].
- WebRTCVideo.tsx: replace dynamic per-track <audio> creation + ref array
  with a single hidden <audio> bound to mediaStream.

Remote agent:
- Drop PipeWire/wpctl detection path; plughw: works directly.
- Drop killStaleAudioToneProcesses pkill workaround; the (cmd, cancel, done)
  trio collapses to a single *exec.Cmd field with Start/Kill/Wait.

E2E:
- ra-audio.spec.ts: drop attachAudioDiagnostics scaffold and openReadyPage
  duplicate. Spec is now linear: setup → wait for track → diff stats → tone.

Net: ~355 LOC removed.
This commit is contained in:
Adam Shiervani
2026-05-19 16:22:47 +02:00
parent acb8093022
commit 9f6a0cdb84
8 changed files with 203 additions and 558 deletions
+36 -99
View File
@@ -3,7 +3,6 @@ package kvm
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"strconv"
@@ -16,25 +15,23 @@ import (
"github.com/pion/webrtc/v4/pkg/media"
)
var audioCancel context.CancelFunc
var audioStopped chan struct{}
var audioRuntimeMu sync.Mutex
type audioCaptureSource struct {
name string
device string
}
var (
audioCancel context.CancelFunc
audioStopped chan struct{}
audioMu sync.Mutex
)
// startAudio stops any running capture and, if track is non-nil, starts a new
// capture writing to it. Calling with nil simply stops the current capture.
func startAudio(track *webrtc.TrackLocalStaticSample) {
audioMu.Lock()
defer audioMu.Unlock()
stopAudioLocked()
if track == nil {
return
}
audioRuntimeMu.Lock()
defer audioRuntimeMu.Unlock()
stopAudioUnderLock()
ctx, cancel := context.WithCancel(context.Background())
audioCancel = cancel
audioStopped = make(chan struct{})
@@ -43,17 +40,15 @@ func startAudio(track *webrtc.TrackLocalStaticSample) {
}
func stopAudio() {
audioRuntimeMu.Lock()
defer audioRuntimeMu.Unlock()
stopAudioUnderLock()
audioMu.Lock()
defer audioMu.Unlock()
stopAudioLocked()
}
func stopAudioUnderLock() {
func stopAudioLocked() {
if audioCancel == nil {
return
}
audioCancel()
<-audioStopped
audioCancel = nil
@@ -63,27 +58,21 @@ func stopAudioUnderLock() {
func runAudioCapture(ctx context.Context, track *webrtc.TrackLocalStaticSample, stopped chan<- struct{}) {
defer close(stopped)
device := alsaCaptureDevice()
codec := audioCodecForTrack(track)
sources := audioCaptureSources()
capture, source, sourceIndex, err := openAudioCaptureFrom(sources, 0)
capture, err := audio.OpenALSACapture(device)
if err != nil {
audioLogger.Error().Err(err).Msg("audio capture unavailable")
audioLogger.Error().Err(err).Str("device", device).Msg("audio capture unavailable")
return
}
defer func() {
_ = capture.Close()
}()
defer capture.Close()
sample := media.Sample{Duration: 20 * time.Millisecond}
audioLogger.Info().
Str("source", source.name).
Str("device", source.device).
Str("codec", codec.String()).
Msg("audio capture started")
audioLogger.Info().Str("device", device).Str("codec", codec.String()).Msg("audio capture started")
defer audioLogger.Info().Msg("audio capture stopped")
noDataReads := 0
wroteFirstSample := false
sample := media.Sample{Duration: 20 * time.Millisecond}
idleReads := 0
for {
select {
@@ -95,51 +84,25 @@ func runAudioCapture(ctx context.Context, track *webrtc.TrackLocalStaticSample,
payload, err := capture.ReadEncoded(codec)
if err != nil {
if errors.Is(err, audio.ErrNoAudioData) {
noDataReads++
if noDataReads == 50 || noDataReads%500 == 0 {
audioLogger.Info().Int("reads", noDataReads).Msg("audio capture has no data")
}
if noDataReads == 50 {
_ = capture.Close()
reopened, reopenedSource, reopenedSourceIndex, openErr := openAudioCaptureFrom(sources, sourceIndex+1)
if openErr != nil {
audioLogger.Error().Err(openErr).Msg("audio capture reopen failed")
time.Sleep(500 * time.Millisecond)
noDataReads = 0
continue
}
capture = reopened
source = reopenedSource
sourceIndex = reopenedSourceIndex
noDataReads = 0
audioLogger.Info().
Str("source", source.name).
Str("device", source.device).
Msg("audio capture reopened after no data")
if idleReads++; idleReads%500 == 0 {
audioLogger.Debug().Int("reads", idleReads).Msg("audio capture idle")
}
continue
}
audioLogger.Error().Err(err).Msg("audio capture read failed")
audioLogger.Warn().Err(err).Msg("audio capture read failed")
time.Sleep(100 * time.Millisecond)
continue
}
idleReads = 0
if len(payload) == 0 {
continue
}
noDataReads = 0
sample.Data = payload
if err := track.WriteSample(sample); err != nil {
select {
case <-ctx.Done():
return
default:
audioLogger.Warn().Err(err).Msg("audio sample write failed")
time.Sleep(100 * time.Millisecond)
}
} else if !wroteFirstSample {
wroteFirstSample = true
audioLogger.Info().Str("codec", codec.String()).Int("bytes", len(payload)).Msg("audio sample written")
audioLogger.Warn().Err(err).Msg("audio sample write failed")
time.Sleep(100 * time.Millisecond)
}
}
}
@@ -151,36 +114,13 @@ func audioCodecForTrack(track *webrtc.TrackLocalStaticSample) audio.Codec {
return audio.CodecPCMU
}
func openAudioCaptureFrom(sources []audioCaptureSource, startIndex int) (audio.Reader, audioCaptureSource, int, error) {
var errs []string
if len(sources) == 0 {
return nil, audioCaptureSource{}, 0, fmt.Errorf("no ALSA capture sources configured")
}
for offset := range sources {
index := (startIndex + offset) % len(sources)
source := sources[index]
capture, err := audio.OpenALSACapture(source.device)
if err == nil {
return capture, source, index, nil
}
errs = append(errs, fmt.Sprintf("%s %s: %v", source.name, source.device, err))
}
return nil, audioCaptureSource{}, 0, fmt.Errorf("no ALSA capture device opened (%s)", strings.Join(errs, "; "))
}
func audioCaptureSources() []audioCaptureSource {
var sources []audioCaptureSource
// alsaCaptureDevice returns the ALSA device string for the UAC1 gadget card.
// Falls back to hw:1,0 if the sysfs lookup fails (typically only during early boot).
func alsaCaptureDevice() string {
if card, ok := findALSACard("UAC1Gadget"); ok {
sources = append(sources, audioCaptureSource{
name: "uac1",
device: "hw:" + strconv.Itoa(card) + ",0",
})
return "hw:" + strconv.Itoa(card) + ",0"
}
return append(sources,
audioCaptureSource{name: "uac1-default", device: "hw:1,0"},
)
return "hw:1,0"
}
func findALSACard(cardID string) (int, bool) {
@@ -194,14 +134,11 @@ func findALSACard(cardID string) (int, bool) {
if !strings.HasPrefix(name, "card") {
continue
}
id, err := os.ReadFile(filepath.Join("/sys/class/sound", name, "id"))
if err != nil || strings.TrimSpace(string(id)) != cardID {
continue
}
card, err := strconv.Atoi(strings.TrimPrefix(name, "card"))
if err == nil {
if card, err := strconv.Atoi(strings.TrimPrefix(name, "card")); err == nil {
return card, true
}
}
+36 -155
View File
@@ -147,14 +147,12 @@ func (b *EventBuffer) prune() {
// Agent holds the state of the remote agent.
type Agent struct {
keyboardEvents *EventBuffer
mouseEvents *EventBuffer
audioMu sync.Mutex
audioToneCmd *exec.Cmd
audioToneCancel context.CancelFunc
audioToneDone chan error
monitorMu sync.Mutex
absMouseState struct {
keyboardEvents *EventBuffer
mouseEvents *EventBuffer
audioMu sync.Mutex
audioToneCmd *exec.Cmd
monitorMu sync.Mutex
absMouseState struct {
mu sync.Mutex
x int32
y int32
@@ -509,90 +507,13 @@ func getDisplayInfo() []DisplayInfo {
return displays
}
var (
aplayDeviceRE = regexp.MustCompile(`^card ([0-9]+): ([^ ]+) \[(.*)\], device ([0-9]+): .*\[(.*)\]`)
pipeWireSinkIDRE = regexp.MustCompile(`([0-9]+)\.`)
)
var aplayDeviceRE = regexp.MustCompile(`^card ([0-9]+): ([^ ]+) \[(.*)\], device ([0-9]+): .*\[(.*)\]`)
func firstLine(value string) string {
if value == "" {
return ""
}
line, _, _ := strings.Cut(value, "\n")
return strings.TrimSpace(line)
}
func alsaCardDescription(card int) string {
return firstLine(readSysFile(filepath.Join("/proc/asound", "card"+strconv.Itoa(card), "stream0")))
}
func alsaCardUSBID(card int) string {
return readSysFile(filepath.Join("/proc/asound", "card"+strconv.Itoa(card), "usbid"))
}
func isJetKVMUSBAudioDevice(device AudioDeviceInfo) bool {
if strings.EqualFold(strings.TrimSpace(device.USBID), "1d6b:0104") {
func isJetKVMUSBAudioDevice(d AudioDeviceInfo) bool {
if strings.EqualFold(strings.TrimSpace(d.USBID), "1d6b:0104") {
return true
}
lower := strings.ToLower(device.Name + " " + device.Description)
return strings.Contains(lower, "jetkvm usb emulation device")
}
func startToneCommand(cmd *exec.Cmd) (chan error, error) {
if err := cmd.Start(); err != nil {
return nil, err
}
done := make(chan error, 1)
go func() {
done <- cmd.Wait()
close(done)
}()
select {
case err := <-done:
if err == nil {
err = fmt.Errorf("audio tone exited immediately")
}
return nil, err
case <-time.After(300 * time.Millisecond):
return done, nil
}
}
func findJetKVMPipeWireSinkID() string {
if _, err := exec.LookPath("wpctl"); err != nil {
return ""
}
out, err := exec.Command("wpctl", "status").Output()
if err != nil {
return ""
}
inSinks := false
for _, line := range strings.Split(string(out), "\n") {
lower := strings.ToLower(line)
if strings.Contains(lower, "sinks:") {
inSinks = true
continue
}
if inSinks && strings.Contains(lower, "sink endpoints:") {
return ""
}
if !inSinks {
continue
}
if !strings.Contains(lower, "multifunction composite gadget") &&
!strings.Contains(lower, "jetkvm") &&
!strings.Contains(lower, "usb emulation device") {
continue
}
if m := pipeWireSinkIDRE.FindStringSubmatch(line); m != nil {
return m[1]
}
}
return ""
return strings.Contains(strings.ToLower(d.Name+" "+d.Description), "jetkvm usb emulation device")
}
func listAudioDevices() []AudioDeviceInfo {
@@ -609,110 +530,70 @@ func listAudioDevices() []AudioDeviceInfo {
}
card, _ := strconv.Atoi(m[1])
cardName := strings.TrimSpace(m[3])
device, _ := strconv.Atoi(m[4])
deviceName := strings.TrimSpace(m[5])
description := alsaCardDescription(card)
usbID := alsaCardUSBID(card)
nameParts := []string{cardName, deviceName}
if description != "" {
nameParts = append(nameParts, description)
}
name := strings.Join(nameParts, " / ")
pcm := "plughw:" + strconv.Itoa(card) + "," + strconv.Itoa(device)
streamFile := filepath.Join("/proc/asound", "card"+strconv.Itoa(card), "stream0")
description, _, _ := strings.Cut(readSysFile(streamFile), "\n")
description = strings.TrimSpace(description)
deviceInfo := AudioDeviceInfo{
d := AudioDeviceInfo{
Card: card,
Device: device,
Name: name,
PCM: pcm,
Name: strings.TrimSpace(m[3]) + " / " + strings.TrimSpace(m[5]),
PCM: fmt.Sprintf("plughw:%d,%d", card, device),
Description: description,
USBID: usbID,
USBID: readSysFile(filepath.Join("/proc/asound", "card"+strconv.Itoa(card), "usbid")),
}
deviceInfo.IsJetKVM = isJetKVMUSBAudioDevice(deviceInfo)
devices = append(devices, deviceInfo)
d.IsJetKVM = isJetKVMUSBAudioDevice(d)
devices = append(devices, d)
}
return devices
}
func selectAudioDevice() (AudioDeviceInfo, bool) {
if override := strings.TrimSpace(os.Getenv("JETKVM_AUDIO_DEVICE")); override != "" {
return AudioDeviceInfo{Card: -1, Device: -1, Name: "override", PCM: override, IsJetKVM: true}, true
}
devices := listAudioDevices()
for _, device := range devices {
if isJetKVMUSBAudioDevice(device) {
return device, true
}
}
return AudioDeviceInfo{}, false
}
func (a *Agent) startAudioTone() (AudioDeviceInfo, error) {
a.audioMu.Lock()
defer a.audioMu.Unlock()
a.stopAudioToneLocked()
killStaleAudioToneProcesses()
device, ok := selectAudioDevice()
if !ok {
return AudioDeviceInfo{}, os.ErrNotExist
}
playbackDevice := device.PCM
if sinkID := findJetKVMPipeWireSinkID(); sinkID != "" {
if err := exec.Command("wpctl", "set-default", sinkID).Run(); err == nil {
playbackDevice = "default"
var device AudioDeviceInfo
if override := strings.TrimSpace(os.Getenv("JETKVM_AUDIO_DEVICE")); override != "" {
device = AudioDeviceInfo{Card: -1, Device: -1, Name: "override", PCM: override, IsJetKVM: true}
} else {
for _, d := range listAudioDevices() {
if d.IsJetKVM {
device = d
break
}
}
if !device.IsJetKVM {
return AudioDeviceInfo{}, os.ErrNotExist
}
}
ctx, cancel := context.WithCancel(context.Background())
cmd := exec.CommandContext(ctx, "speaker-test", "-D", playbackDevice, "-t", "sine", "-f", "997", "-r", "48000", "-c", "2", "-p", "20000", "-b", "80000")
done, err := startToneCommand(cmd)
if err != nil {
cancel()
// 997 Hz / 48 kHz stereo sine; -p 20000 sets period so tone runs ~indefinitely
// (long enough for the spec's 12s deadline) without re-arming.
cmd := exec.Command("speaker-test", "-D", device.PCM, "-t", "sine", "-f", "997", "-r", "48000", "-c", "2", "-p", "20000", "-b", "80000")
if err := cmd.Start(); err != nil {
return device, err
}
a.audioToneCmd = cmd
a.audioToneCancel = cancel
a.audioToneDone = done
return device, nil
}
func (a *Agent) stopAudioTone() {
a.audioMu.Lock()
defer a.audioMu.Unlock()
a.stopAudioToneLocked()
}
func (a *Agent) stopAudioToneLocked() {
if a.audioToneCancel != nil {
a.audioToneCancel()
a.audioToneCancel = nil
}
if a.audioToneCmd == nil || a.audioToneCmd.Process == nil {
a.audioToneCmd = nil
a.audioToneDone = nil
return
}
_ = a.audioToneCmd.Process.Kill()
if a.audioToneDone != nil {
select {
case <-a.audioToneDone:
case <-time.After(time.Second):
}
}
_ = a.audioToneCmd.Wait()
a.audioToneCmd = nil
a.audioToneDone = nil
killStaleAudioToneProcesses()
}
func killStaleAudioToneProcesses() {
_ = exec.Command("pkill", "-f", "speaker-test -D .* -t sine -f 997 -r 48000 -c 2 -p 20000 -b 80000").Run()
}
// listMounts returns current mount points, filtered to interesting ones.
+7 -1
View File
@@ -4,6 +4,12 @@ package audio
import "fmt"
func OpenALSACapture(device string) (*unavailableCapture, error) {
type ALSACapture struct{}
func OpenALSACapture(device string) (*ALSACapture, error) {
return nil, fmt.Errorf("ALSA audio capture is not available for this build: %s", device)
}
func (*ALSACapture) ReadEncoded(Codec) ([]byte, error) { return nil, ErrNoAudioData }
func (*ALSACapture) Close() error { return nil }
+2 -17
View File
@@ -30,21 +30,6 @@ func (c Codec) String() string {
}
}
type Reader interface {
ReadEncoded(codec Codec) ([]byte, error)
Close() error
}
type unavailableCapture struct {
err error
}
func (c *unavailableCapture) ReadEncoded(Codec) ([]byte, error) {
return nil, c.err
}
func (c *unavailableCapture) Close() error {
return nil
}
// ErrNoAudioData is returned when the underlying capture device is idle.
// The caller should retry; it is not a fatal error.
var ErrNoAudioData = io.ErrNoProgress
+53 -177
View File
@@ -1,12 +1,5 @@
import { test, expect, type Page, type TestInfo } from "@playwright/test";
import * as fs from "fs";
import {
callJsonRpc,
getDeviceHost,
restartAppViaSSH,
sshExec,
waitForWebRTCReady,
} from "../helpers";
import { test, expect } from "@playwright/test";
import { getDeviceHost, waitForWebRTCReady } from "../helpers";
import { createRemoteAgent } from "./remote-agent";
const agent = createRemoteAgent();
@@ -16,174 +9,14 @@ async function ensureNoPasswordViaAPI() {
const status = await fetch(`http://${host}/device/status`).then(
r => r.json() as Promise<{ isSetup: boolean }>,
);
if (status.isSetup) return;
if (!status.isSetup) {
const res = await fetch(`http://${host}/device/setup`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ localAuthMode: "noPassword" }),
});
if (!res.ok) throw new Error(`Setup POST failed: ${res.status}`);
return;
}
const probe = await fetch(`http://${host}/device`);
if (probe.status !== 401) return;
await sshExec("rm -f /userdata/kvm_config.json && sync");
await restartAppViaSSH();
const res = await fetch(`http://${host}/device/setup`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ localAuthMode: "noPassword" }),
});
if (!res.ok) throw new Error(`Setup POST after reset failed: ${res.status}`);
}
async function openReadyPage(page: Page) {
await page.goto("/", { waitUntil: "networkidle" });
if (page.url().includes("/welcome")) {
const host = getDeviceHost();
await fetch(`http://${host}/device/setup`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ localAuthMode: "noPassword" }),
});
await page.goto("/", { waitUntil: "networkidle" });
}
await waitForWebRTCReady(page);
}
async function attachAudioDiagnostics(testInfo: TestInfo, page: Page): Promise<void> {
const sections: string[] = [];
const browserStats = await page
.evaluate(() => ({
active: window.__kvmTestHooks?.isAudioStreamActive(),
stats: window.__kvmTestHooks?.getInboundAudioStats(),
}))
.catch(error => ({ error: error instanceof Error ? error.message : String(error) }));
sections.push(`browser:\n${JSON.stringify(browserStats, null, 2)}`);
const audioDevices = agent
? await agent.getAudioDevices().catch(error => ({
error: error instanceof Error ? error.message : String(error),
}))
: { error: "remote agent unavailable" };
sections.push(`remote audio devices:\n${JSON.stringify(audioDevices, null, 2)}`);
const deviceSnapshot = await sshExec(
[
"echo '=== date ==='",
"date",
"echo '=== uptime ==='",
"uptime",
"echo '=== jetkvm_app pid ==='",
"pidof jetkvm_app || true",
"echo '=== jetkvm_app process ==='",
"ps -w | grep jetkvm_app | grep -v grep || true",
"echo '=== snd nodes ==='",
"ls -l /dev/snd 2>/dev/null || true",
"echo '=== proc asound ==='",
"cat /proc/asound/cards 2>/dev/null || true",
"cat /proc/asound/pcm 2>/dev/null || true",
"echo '=== recent app log ==='",
"tail -300 /userdata/jetkvm/last.log 2>/dev/null || true",
"echo '=== recent dmesg ==='",
"dmesg | tail -120",
].join("; "),
true,
);
sections.push(`device snapshot:\n${deviceSnapshot}`);
const diagnosticsPath = testInfo.outputPath("audio-usb-diagnostics.txt");
fs.writeFileSync(diagnosticsPath, sections.join("\n\n"));
await testInfo.attach("audio-usb-diagnostics.txt", {
path: diagnosticsPath,
contentType: "text/plain",
});
}
async function expectRemoteToneReachesBrowser(page: Page, testInfo: TestInfo) {
const audioDevices = await agent!.getAudioDevices();
test.skip(
!audioDevices.some(device => device.is_jetkvm),
`No JetKVM USB ALSA playback device found: ${JSON.stringify(audioDevices)}`,
);
await openReadyPage(page);
const originalLogLevel = (await callJsonRpc(page, "getDefaultLogLevel")) as string;
let diagnosticsAttached = false;
const attachDiagnosticsOnce = async () => {
if (diagnosticsAttached) return;
diagnosticsAttached = true;
await attachAudioDiagnostics(testInfo, page);
};
try {
await callJsonRpc(page, "setDefaultLogLevel", { level: "INFO" });
await expect
.poll(() => page.evaluate(() => window.__kvmTestHooks?.isAudioStreamActive()), {
message: "Waiting for browser audio track",
timeout: 10_000,
intervals: [250, 500],
})
.toBe(true);
const before = (await page.evaluate(() => window.__kvmTestHooks?.getInboundAudioStats())) ?? {
bytesReceived: 0,
packetsReceived: 0,
totalAudioEnergy: 0,
codecMimeType: "",
};
const toneDevice = await agent!.startAudioTone();
expect(
toneDevice.is_jetkvm,
`Remote agent selected non-JetKVM audio device: ${JSON.stringify(toneDevice)}`,
).toBe(true);
try {
await expect
.poll(
async () => {
const stats = await page.evaluate(() => window.__kvmTestHooks?.getInboundAudioStats());
if (!stats) return false;
const bytesDelta = stats.bytesReceived - before.bytesReceived;
const packetsDelta = stats.packetsReceived - before.packetsReceived;
const energyDelta = stats.totalAudioEnergy - before.totalAudioEnergy;
const codec = stats.codecMimeType.toLowerCase();
return (
bytesDelta > 800 &&
packetsDelta > 10 &&
energyDelta > 0.0001 &&
codec.includes("g722")
);
},
{
message: "Waiting for captured USB G.722 audio energy",
timeout: 12_000,
intervals: [500, 1000],
},
)
.toBe(true);
} catch (error) {
await attachDiagnosticsOnce();
throw error;
} finally {
await agent!.stopAudioTone();
}
} catch (error) {
await attachDiagnosticsOnce();
throw error;
} finally {
await callJsonRpc(page, "setDefaultLogLevel", { level: originalLogLevel }).catch(() => {
/* ignore */
});
}
if (!res.ok) throw new Error(`Setup POST failed: ${res.status}`);
}
test.beforeAll(async () => {
@@ -192,13 +25,56 @@ test.beforeAll(async () => {
});
test.afterEach(async () => {
if (!agent) return;
await agent.stopAudioTone().catch(() => {
/* ignore */
});
await agent?.stopAudioTone().catch(() => undefined);
});
test("audio: USB remote tone reaches browser WebRTC track as G.722", async ({ page }, testInfo) => {
test("audio: USB remote tone reaches browser WebRTC track as G.722", async ({ page }) => {
test.setTimeout(35_000);
await expectRemoteToneReachesBrowser(page, testInfo);
const devices = await agent!.getAudioDevices();
test.skip(
!devices.some(d => d.is_jetkvm),
`No JetKVM USB ALSA playback device on remote host: ${JSON.stringify(devices)}`,
);
await page.goto("/", { waitUntil: "networkidle" });
await waitForWebRTCReady(page);
await expect
.poll(() => page.evaluate(() => window.__kvmTestHooks?.isAudioStreamActive()), {
message: "browser audio track did not become live",
timeout: 10_000,
intervals: [250, 500],
})
.toBe(true);
const before = (await page.evaluate(() => window.__kvmTestHooks?.getInboundAudioStats())) ?? {
bytesReceived: 0,
packetsReceived: 0,
totalAudioEnergy: 0,
codecMimeType: "",
};
const tone = await agent!.startAudioTone();
expect(tone.is_jetkvm, `selected non-JetKVM playback device: ${JSON.stringify(tone)}`).toBe(true);
await expect
.poll(
async () => {
const stats = await page.evaluate(() => window.__kvmTestHooks?.getInboundAudioStats());
if (!stats) return false;
return (
stats.bytesReceived - before.bytesReceived > 800 &&
stats.packetsReceived - before.packetsReceived > 10 &&
stats.totalAudioEnergy - before.totalAudioEnergy > 0.0001 &&
stats.codecMimeType.toLowerCase().includes("g722")
);
},
{
message: "USB G.722 audio energy never reached browser",
timeout: 12_000,
intervals: [500, 1000],
},
)
.toBe(true);
});
+28 -58
View File
@@ -30,8 +30,8 @@ export default function WebRTCVideo({
}) {
// Video and stream related refs and states
const videoElm = useRef<HTMLVideoElement>(null);
const audioElm = useRef<HTMLAudioElement>(null);
const fullscreenContainerRef = useRef<HTMLDivElement>(null);
const audioElementsRef = useRef<HTMLAudioElement[]>([]);
const { mediaStream, peerConnectionState } = useRTCStore();
const [isPlaying, setIsPlaying] = useState(false);
const [audioAutoplayBlocked, setAudioAutoplayBlocked] = useState(false);
@@ -439,63 +439,36 @@ export default function WebRTCVideo({
[updateVideoSizeStore],
);
useEffect(
function updateVideoStreamOnNewTrack() {
if (!peerConnection) return;
const abortController = new AbortController();
const signal = abortController.signal;
peerConnection.addEventListener(
"track",
(e: RTCTrackEvent) => {
if (e.track.kind === "video") {
addStreamToVideoElm(e.streams[0]);
return;
}
if (e.track.kind === "audio") {
const audioElm = document.createElement("audio");
audioElm.srcObject = new MediaStream([e.track]);
audioElm.style.display = "none";
document.body.appendChild(audioElm);
audioElementsRef.current.push(audioElm);
audioElm
.play()
.then(() => {
setAudioAutoplayBlocked(false);
})
.catch(() => {
console.debug("[Audio] Autoplay blocked, will be started by user interaction");
setAudioAutoplayBlocked(true);
});
}
},
{ signal },
);
return () => {
abortController.abort();
audioElementsRef.current.forEach(audioElm => {
audioElm.srcObject = null;
audioElm.remove();
});
audioElementsRef.current = [];
setAudioAutoplayBlocked(false);
};
},
[addStreamToVideoElm, peerConnection],
);
useEffect(
function updateVideoStream() {
if (!mediaStream) return;
// We set the as early as possible
addStreamToVideoElm(mediaStream);
},
[addStreamToVideoElm, mediaStream],
);
useEffect(
function updateAudioStream() {
const elm = audioElm.current;
if (!elm || !mediaStream) return;
elm.srcObject = mediaStream;
elm
.play()
.then(() => setAudioAutoplayBlocked(false))
.catch(() => {
console.debug("[Audio] Autoplay blocked; waiting for user interaction");
setAudioAutoplayBlocked(true);
});
return () => {
elm.srcObject = null;
setAudioAutoplayBlocked(false);
};
},
[mediaStream],
);
// Setup Keyboard Events
useEffect(
function setupKeyboardEvents() {
@@ -693,6 +666,7 @@ export default function WebRTCVideo({
"animate-slideUpFade": isPlaying,
})}
/>
<audio ref={audioElm} autoPlay playsInline hidden />
<OcrOverlay />
{peerConnection?.connectionState == "connected" && !hasConnectionIssues && (
<div
@@ -706,14 +680,10 @@ export default function WebRTCVideo({
show={hasNoAutoPlayPermissions}
onPlayClick={() => {
videoElm.current?.play();
audioElementsRef.current.forEach(audioElm => {
audioElm
.play()
.then(() => {
setAudioAutoplayBlocked(false);
})
.catch(() => undefined);
});
audioElm.current
?.play()
.then(() => setAudioAutoplayBlocked(false))
.catch(() => undefined);
}}
/>
</div>
+3 -7
View File
@@ -281,7 +281,6 @@ export default function KvmIdRoute() {
const isSettingRemoteAnswerPending = useRef(false);
const makingOffer = useRef(false);
const reconnectAttemptsRef = useRef(2000);
const remoteMediaStreamRef = useRef<MediaStream>(new MediaStream());
const wsProtocol = window.location.protocol === "https:" ? "wss:" : "ws:";
const reconnectInterval = (attempt: number) => {
@@ -460,7 +459,6 @@ export default function KvmIdRoute() {
console.debug("[setupPeerConnection] Setting up peer connection");
setConnectionFailed(false);
setLoadingMessage(m.connecting_to_device());
remoteMediaStreamRef.current = new MediaStream();
let pc: RTCPeerConnection;
try {
@@ -540,11 +538,9 @@ export default function KvmIdRoute() {
};
pc.ontrack = function (event) {
const stream = remoteMediaStreamRef.current;
if (!stream.getTracks().some(track => track.id === event.track.id)) {
stream.addTrack(event.track);
}
setMediaStream(new MediaStream(stream.getTracks()));
// Backend tracks share stream ID "kvm", so pion delivers both video and
// audio in the same MediaStream — assigning it on either event is enough.
setMediaStream(event.streams[0]);
};
setTransceiver(pc.addTransceiver("video", { direction: "recvonly" }));
+38 -44
View File
@@ -132,6 +132,30 @@ type SessionConfig struct {
MDNSMode string
}
// negotiateAudioCodec returns the audio MIME type to use, or "" if the browser
// offer advertises no supported audio codec.
func negotiateAudioCodec(offerSDP string) string {
upper := strings.ToUpper(offerSDP)
switch {
case strings.Contains(upper, "G722/8000"):
return webrtc.MimeTypeG722
case strings.Contains(upper, "PCMU/8000"):
return webrtc.MimeTypePCMU
}
return ""
}
// drainRTCP reads and discards RTCP packets from a sender. Required for NACK
// handling on outgoing tracks; the sender stops on connection close.
func drainRTCP(sender *webrtc.RTPSender) {
buf := make([]byte, 1500)
for {
if _, _, err := sender.Read(buf); err != nil {
return
}
}
}
// resolveCodec picks the video codec based on user preference and browser support.
// Always validates against the browser's SDP offer to prevent negotiation failure.
func resolveCodec(offerSDP string) string {
@@ -154,17 +178,6 @@ func resolveCodec(offerSDP string) string {
}
}
func resolveAudioCodec(offerSDP string) (webrtc.RTPCodecCapability, bool) {
upperOffer := strings.ToUpper(offerSDP)
if strings.Contains(upperOffer, "G722/8000") {
return webrtc.RTPCodecCapability{MimeType: webrtc.MimeTypeG722, ClockRate: 8000}, true
}
if strings.Contains(upperOffer, "PCMU/8000") {
return webrtc.RTPCodecCapability{MimeType: webrtc.MimeTypePCMU, ClockRate: 8000}, true
}
return webrtc.RTPCodecCapability{}, false
}
func (s *Session) ExchangeOffer(offerStr string) (string, error) {
b, err := base64.StdEncoding.DecodeString(offerStr)
if err != nil {
@@ -190,38 +203,22 @@ func (s *Session) ExchangeOffer(offerStr string) (string, error) {
return "", err
}
// Read incoming RTCP packets (required for NACK handling).
go func() {
rtcpBuf := make([]byte, 1500)
for {
if _, _, rtcpErr := rtpSender.Read(rtcpBuf); rtcpErr != nil {
return
}
}
}()
go drainRTCP(rtpSender)
if audioCodec, ok := resolveAudioCodec(offer.SDP); ok {
s.AudioTrack, err = webrtc.NewTrackLocalStaticSample(audioCodec, "audio", "kvm")
if audioMime := negotiateAudioCodec(offer.SDP); audioMime != "" {
s.AudioTrack, err = webrtc.NewTrackLocalStaticSample(
webrtc.RTPCodecCapability{MimeType: audioMime, ClockRate: 8000}, "audio", "kvm")
if err != nil {
return "", err
}
webrtcLogger.Info().Str("codec", audioCodec.MimeType).Msg("audio track enabled")
audioRTPSender, err := s.peerConnection.AddTrack(s.AudioTrack)
audioSender, err := s.peerConnection.AddTrack(s.AudioTrack)
if err != nil {
return "", err
}
go func() {
rtcpBuf := make([]byte, 1500)
for {
if _, _, rtcpErr := audioRTPSender.Read(rtcpBuf); rtcpErr != nil {
return
}
}
}()
webrtcLogger.Info().Str("codec", audioMime).Msg("audio track enabled")
go drainRTCP(audioSender)
} else {
webrtcLogger.Warn().Msg("browser offer does not include supported audio codec; audio track disabled")
webrtcLogger.Warn().Msg("browser offer has no supported audio codec; audio disabled")
}
// Set the remote SessionDescription
@@ -624,7 +621,12 @@ func onCurrentSessionConnected(session *Session) {
_ = nativeInstance.VideoSetCodecType(0)
}
_ = nativeInstance.VideoStart()
startSessionAudio(session)
var audioTrack *webrtc.TrackLocalStaticSample
if session != nil {
audioTrack = session.AudioTrack
}
startAudio(audioTrack)
}
func onLastSessionDisconnected() {
@@ -634,11 +636,3 @@ func onLastSessionDisconnected() {
_ = nativeInstance.VideoStop()
startVideoSleepModeTicker()
}
func startSessionAudio(session *Session) {
if session == nil || session.AudioTrack == nil {
stopAudio()
return
}
startAudio(session.AudioTrack)
}