[fix](trx-server): auto-recover audio streams and harden frontend reconnect

Implement ALSA/CPAL stream auto-recovery by recreating input/output
streams after backend callback failures with bounded retry delay.

Also improve HTTP frontend resilience by polling /status on reconnect
and after SSE errors to refresh snapshot state after broken pipes.

Co-authored-by: Codex <codex@openai.com>
Signed-off-by: Stanislaw Grams <stanislawgrams@gmail.com>
This commit is contained in:
2026-02-12 23:59:39 +01:00
parent 273283708e
commit d085f96838
2 changed files with 252 additions and 105 deletions
@@ -74,6 +74,7 @@ let initialized = false;
let lastEventAt = Date.now(); let lastEventAt = Date.now();
let es; let es;
let esHeartbeat; let esHeartbeat;
let reconnectTimer = null;
function formatFreq(hz) { function formatFreq(hz) {
if (!Number.isFinite(hz)) return "--"; if (!Number.isFinite(hz)) return "--";
@@ -446,6 +447,26 @@ function render(update) {
} }
} }
function scheduleReconnect(delayMs = 1000) {
if (reconnectTimer) return;
reconnectTimer = setTimeout(() => {
reconnectTimer = null;
connect();
}, delayMs);
}
async function pollFreshSnapshot() {
try {
const resp = await fetch("/status", { cache: "no-store" });
if (!resp.ok) return;
const data = await resp.json();
render(data);
lastEventAt = Date.now();
} catch (e) {
// Ignore network errors; connect() retry loop handles reconnection.
}
}
function connect() { function connect() {
if (es) { if (es) {
es.close(); es.close();
@@ -453,9 +474,13 @@ function connect() {
if (esHeartbeat) { if (esHeartbeat) {
clearInterval(esHeartbeat); clearInterval(esHeartbeat);
} }
pollFreshSnapshot();
es = new EventSource("/events"); es = new EventSource("/events");
lastEventAt = Date.now(); lastEventAt = Date.now();
es.onmessage = (evt) => { es.onopen = () => {
pollFreshSnapshot();
};
es.onmessage = (evt) => {
try { try {
if (evt.data === lastRendered) return; if (evt.data === lastRendered) return;
const data = JSON.parse(evt.data); const data = JSON.parse(evt.data);
@@ -472,14 +497,16 @@ es.onmessage = (evt) => {
es.onerror = () => { es.onerror = () => {
powerHint.textContent = "Disconnected, retrying…"; powerHint.textContent = "Disconnected, retrying…";
es.close(); es.close();
setTimeout(connect, 1000); pollFreshSnapshot();
scheduleReconnect(1000);
}; };
esHeartbeat = setInterval(() => { esHeartbeat = setInterval(() => {
const now = Date.now(); const now = Date.now();
if (now - lastEventAt > 15000) { if (now - lastEventAt > 15000) {
es.close(); es.close();
connect(); pollFreshSnapshot();
scheduleReconnect(250);
} }
}, 5000); }, 5000);
} }
+222 -102
View File
@@ -5,6 +5,7 @@
//! Audio capture, playback, and TCP streaming for trx-server. //! Audio capture, playback, and TCP streaming for trx-server.
use std::net::SocketAddr; use std::net::SocketAddr;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, OnceLock}; use std::sync::{Arc, OnceLock};
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use std::{collections::VecDeque, sync::Mutex}; use std::{collections::VecDeque, sync::Mutex};
@@ -31,7 +32,8 @@ const APRS_HISTORY_RETENTION: Duration = Duration::from_secs(24 * 60 * 60);
const FT8_HISTORY_RETENTION: Duration = Duration::from_secs(24 * 60 * 60); const FT8_HISTORY_RETENTION: Duration = Duration::from_secs(24 * 60 * 60);
const WSPR_HISTORY_RETENTION: Duration = Duration::from_secs(24 * 60 * 60); const WSPR_HISTORY_RETENTION: Duration = Duration::from_secs(24 * 60 * 60);
const FT8_SAMPLE_RATE: u32 = 12_000; const FT8_SAMPLE_RATE: u32 = 12_000;
const AUDIO_STREAM_ERROR_LOG_INTERVAL: Duration = Duration::from_secs(10); const AUDIO_STREAM_ERROR_LOG_INTERVAL: Duration = Duration::from_secs(60);
const AUDIO_STREAM_RECOVERY_DELAY: Duration = Duration::from_secs(1);
struct StreamErrorLogger { struct StreamErrorLogger {
label: &'static str, label: &'static str,
@@ -40,6 +42,7 @@ struct StreamErrorLogger {
#[derive(Default)] #[derive(Default)]
struct StreamErrorState { struct StreamErrorState {
last_kind: Option<&'static str>,
last_error: Option<String>, last_error: Option<String>,
last_logged_at: Option<Instant>, last_logged_at: Option<Instant>,
suppressed: u64, suppressed: u64,
@@ -55,19 +58,14 @@ impl StreamErrorLogger {
fn log(&self, err: &str) { fn log(&self, err: &str) {
let now = Instant::now(); let now = Instant::now();
let kind = classify_stream_error(err);
let mut state = self let mut state = self
.state .state
.lock() .lock()
.expect("stream error logger mutex poisoned"); .expect("stream error logger mutex poisoned");
let should_log_now = match (&state.last_error, state.last_logged_at) {
(None, _) => true,
(Some(prev), Some(ts)) => {
prev != err || now.duration_since(ts) >= AUDIO_STREAM_ERROR_LOG_INTERVAL
}
(Some(_), None) => true,
};
if should_log_now { // First occurrence or changed error class: log as error once.
if state.last_kind != Some(kind) {
if state.suppressed > 0 { if state.suppressed > 0 {
warn!( warn!(
"{} repeated {} times: {}", "{} repeated {} times: {}",
@@ -77,12 +75,44 @@ impl StreamErrorLogger {
); );
} }
error!("{}: {}", self.label, err); error!("{}: {}", self.label, err);
state.last_kind = Some(kind);
state.last_error = Some(err.to_string()); state.last_error = Some(err.to_string());
state.last_logged_at = Some(now); state.last_logged_at = Some(now);
state.suppressed = 0; state.suppressed = 0;
} else { return;
state.suppressed += 1;
} }
// Same class: suppress aggressively and emit only periodic summaries.
state.suppressed += 1;
let due = state
.last_logged_at
.map(|ts| now.duration_since(ts) >= AUDIO_STREAM_ERROR_LOG_INTERVAL)
.unwrap_or(false);
if due {
warn!(
"{} recurring ({} repeats/{}s): {}",
self.label,
state.suppressed,
AUDIO_STREAM_ERROR_LOG_INTERVAL.as_secs(),
state.last_error.as_deref().unwrap_or("<unknown>")
);
state.last_logged_at = Some(now);
state.suppressed = 0;
} else {
state.last_error = Some(err.to_string());
}
}
}
fn classify_stream_error(err: &str) -> &'static str {
if err.contains("snd_pcm_poll_descriptors") || err.contains("alsa::poll() returned POLLERR") {
"alsa_poll_failure"
} else if err.contains("Input stream") {
"input_stream_error"
} else if err.contains("Output stream") {
"output_stream_error"
} else {
"other_stream_error"
} }
} }
@@ -226,6 +256,7 @@ fn run_capture(
pcm_tx: Option<broadcast::Sender<Vec<f32>>>, pcm_tx: Option<broadcast::Sender<Vec<f32>>>,
) -> Result<(), Box<dyn std::error::Error>> { ) -> Result<(), Box<dyn std::error::Error>> {
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait}; use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
use std::sync::mpsc::RecvTimeoutError;
let host = cpal::default_host(); let host = cpal::default_host();
let device = if let Some(ref name) = device_name { let device = if let Some(ref name) = device_name {
@@ -260,79 +291,110 @@ fn run_capture(
let mut encoder = opus::Encoder::new(sample_rate, opus_channels, opus::Application::Audio)?; let mut encoder = opus::Encoder::new(sample_rate, opus_channels, opus::Application::Audio)?;
encoder.set_bitrate(opus::Bitrate::Bits(bitrate_bps as i32))?; encoder.set_bitrate(opus::Bitrate::Bits(bitrate_bps as i32))?;
let (sample_tx, sample_rx) = std::sync::mpsc::sync_channel::<Vec<f32>>(64);
let input_err_logger = Arc::new(StreamErrorLogger::new("Audio input stream error"));
let stream = device.build_input_stream(
&config,
move |data: &[f32], _: &cpal::InputCallbackInfo| {
let _ = sample_tx.try_send(data.to_vec());
},
{
let input_err_logger = input_err_logger.clone();
move |err| {
input_err_logger.log(&err.to_string());
}
},
None,
)?;
// Start paused — only capture when clients are connected // Start paused — only capture when clients are connected
info!( info!(
"Audio capture: ready ({}Hz, {} ch, {}ms frames)", "Audio capture: ready ({}Hz, {} ch, {}ms frames)",
sample_rate, channels, frame_duration_ms sample_rate, channels, frame_duration_ms
); );
let input_err_logger = Arc::new(StreamErrorLogger::new("Audio input stream error"));
let mut pcm_buf: Vec<f32> = Vec::with_capacity(frame_samples * 2); let mut pcm_buf: Vec<f32> = Vec::with_capacity(frame_samples * 2);
let mut opus_buf = vec![0u8; 4096]; let mut opus_buf = vec![0u8; 4096];
let mut capturing = false; let mut capturing = false;
loop { loop {
let has_receivers = let (sample_tx, sample_rx) = std::sync::mpsc::sync_channel::<Vec<f32>>(64);
tx.receiver_count() > 0 || pcm_tx.as_ref().is_some_and(|p| p.receiver_count() > 0); let stream_failed = Arc::new(AtomicBool::new(false));
let stream = match device.build_input_stream(
&config,
move |data: &[f32], _: &cpal::InputCallbackInfo| {
let _ = sample_tx.try_send(data.to_vec());
},
{
let input_err_logger = input_err_logger.clone();
let stream_failed = stream_failed.clone();
move |err| {
input_err_logger.log(&err.to_string());
stream_failed.store(true, Ordering::Relaxed);
}
},
None,
) {
Ok(stream) => stream,
Err(err) => {
warn!(
"Audio capture: failed to open input stream, retrying: {}",
err
);
std::thread::sleep(AUDIO_STREAM_RECOVERY_DELAY);
continue;
}
};
if has_receivers && !capturing { if capturing {
let _ = stream.play(); let _ = stream.play();
capturing = true;
info!("Audio capture: started");
} else if !has_receivers && capturing {
let _ = stream.pause();
capturing = false;
pcm_buf.clear();
// Drain any buffered samples
while sample_rx.try_recv().is_ok() {}
info!("Audio capture: paused (no listeners)");
} }
if !capturing { loop {
std::thread::sleep(std::time::Duration::from_millis(100)); if stream_failed.load(Ordering::Relaxed) {
continue; warn!("Audio capture: backend stream error, recreating");
} break;
}
match sample_rx.recv() { let has_receivers =
Ok(samples) => { tx.receiver_count() > 0 || pcm_tx.as_ref().is_some_and(|p| p.receiver_count() > 0);
pcm_buf.extend_from_slice(&samples);
while pcm_buf.len() >= frame_samples { if has_receivers && !capturing {
let frame: Vec<f32> = pcm_buf.drain(..frame_samples).collect(); let _ = stream.play();
if let Some(ref pcm_tx) = pcm_tx { capturing = true;
let _ = pcm_tx.send(frame.clone()); info!("Audio capture: started");
} } else if !has_receivers && capturing {
match encoder.encode_float(&frame, &mut opus_buf) { let _ = stream.pause();
Ok(len) => { capturing = false;
let packet = Bytes::copy_from_slice(&opus_buf[..len]); pcm_buf.clear();
let _ = tx.send(packet); while sample_rx.try_recv().is_ok() {}
info!("Audio capture: paused (no listeners)");
}
if !capturing {
std::thread::sleep(std::time::Duration::from_millis(100));
continue;
}
match sample_rx.recv_timeout(std::time::Duration::from_millis(200)) {
Ok(samples) => {
pcm_buf.extend_from_slice(&samples);
while pcm_buf.len() >= frame_samples {
let frame: Vec<f32> = pcm_buf.drain(..frame_samples).collect();
if let Some(ref pcm_tx) = pcm_tx {
let _ = pcm_tx.send(frame.clone());
} }
Err(e) => { match encoder.encode_float(&frame, &mut opus_buf) {
warn!("Opus encode error: {}", e); Ok(len) => {
let packet = Bytes::copy_from_slice(&opus_buf[..len]);
let _ = tx.send(packet);
}
Err(e) => {
warn!("Opus encode error: {}", e);
}
} }
} }
} }
Err(RecvTimeoutError::Timeout) => {}
Err(RecvTimeoutError::Disconnected) => {
warn!("Audio capture: callback channel disconnected, recreating");
break;
}
} }
Err(_) => break,
} }
}
Ok(()) if capturing {
let _ = stream.pause();
capturing = false;
pcm_buf.clear();
}
std::thread::sleep(AUDIO_STREAM_RECOVERY_DELAY);
}
} }
/// Spawn the audio playback task. /// Spawn the audio playback task.
@@ -362,6 +424,7 @@ fn run_playback(
mut rx: mpsc::Receiver<Bytes>, mut rx: mpsc::Receiver<Bytes>,
) -> Result<(), Box<dyn std::error::Error>> { ) -> Result<(), Box<dyn std::error::Error>> {
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait}; use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
use tokio::sync::mpsc::error::TryRecvError;
let host = cpal::default_host(); let host = cpal::default_host();
let device = if let Some(ref name) = device_name { let device = if let Some(ref name) = device_name {
@@ -400,63 +463,120 @@ fn run_playback(
)); ));
let ring_writer = ring.clone(); let ring_writer = ring.clone();
let output_err_logger = Arc::new(StreamErrorLogger::new("Audio output stream error"));
let stream = device.build_output_stream(
&config,
move |data: &mut [f32], _: &cpal::OutputCallbackInfo| {
let mut ring = ring.lock().unwrap();
for sample in data.iter_mut() {
*sample = ring.pop_front().unwrap_or(0.0);
}
},
{
let output_err_logger = output_err_logger.clone();
move |err| {
output_err_logger.log(&err.to_string());
}
},
None,
)?;
// Start paused — only play when TX packets arrive // Start paused — only play when TX packets arrive
info!("Audio playback: ready ({}Hz, {} ch)", sample_rate, channels); info!("Audio playback: ready ({}Hz, {} ch)", sample_rate, channels);
let output_err_logger = Arc::new(StreamErrorLogger::new("Audio output stream error"));
let mut pcm_buf = vec![0f32; frame_samples]; let mut pcm_buf = vec![0f32; frame_samples];
let mut playing = false; let mut playing = false;
let mut channel_closed = false;
loop {
let stream_failed = Arc::new(AtomicBool::new(false));
let stream = match device.build_output_stream(
&config,
{
let ring = ring.clone();
move |data: &mut [f32], _: &cpal::OutputCallbackInfo| {
let mut ring = ring.lock().unwrap();
for sample in data.iter_mut() {
*sample = ring.pop_front().unwrap_or(0.0);
}
}
},
{
let output_err_logger = output_err_logger.clone();
let stream_failed = stream_failed.clone();
move |err| {
output_err_logger.log(&err.to_string());
stream_failed.store(true, Ordering::Relaxed);
}
},
None,
) {
Ok(stream) => stream,
Err(err) => {
warn!(
"Audio playback: failed to open output stream, retrying: {}",
err
);
std::thread::sleep(AUDIO_STREAM_RECOVERY_DELAY);
continue;
}
};
while let Some(packet) = rx.blocking_recv() {
if !playing { if !playing {
// stay paused until packets arrive
} else {
stream.play()?; stream.play()?;
playing = true;
info!("Audio playback: started");
} }
match decoder.decode_float(&packet, &mut pcm_buf, false) { loop {
Ok(decoded) => { if stream_failed.load(Ordering::Relaxed) {
let mut ring = ring_writer.lock().unwrap(); warn!("Audio playback: backend stream error, recreating");
ring.extend(&pcm_buf[..decoded * channels as usize]); break;
} }
Err(e) => {
warn!("Opus decode error: {}", e); match rx.try_recv() {
Ok(packet) => {
if !playing {
stream.play()?;
playing = true;
info!("Audio playback: started");
}
match decoder.decode_float(&packet, &mut pcm_buf, false) {
Ok(decoded) => {
let mut ring = ring_writer.lock().unwrap();
ring.extend(&pcm_buf[..decoded * channels as usize]);
}
Err(e) => {
warn!("Opus decode error: {}", e);
}
}
if rx.is_empty() {
std::thread::sleep(std::time::Duration::from_millis(
frame_duration_ms as u64 * 2,
));
if rx.is_empty() {
let _ = stream.pause();
playing = false;
ring_writer.lock().unwrap().clear();
info!("Audio playback: paused (idle)");
if channel_closed {
return Ok(());
}
}
}
}
Err(TryRecvError::Empty) => {
if channel_closed && !playing {
return Ok(());
}
std::thread::sleep(std::time::Duration::from_millis(20));
}
Err(TryRecvError::Disconnected) => {
channel_closed = true;
if !playing {
return Ok(());
}
std::thread::sleep(std::time::Duration::from_millis(20));
}
} }
} }
// Pause when no more packets are queued to avoid ALSA underruns if playing {
if rx.is_empty() { let _ = stream.pause();
// Drain remaining samples before pausing playing = false;
std::thread::sleep(std::time::Duration::from_millis(
frame_duration_ms as u64 * 2,
));
if rx.is_empty() {
let _ = stream.pause();
playing = false;
ring_writer.lock().unwrap().clear();
info!("Audio playback: paused (idle)");
}
} }
ring_writer.lock().unwrap().clear();
if channel_closed {
return Ok(());
}
std::thread::sleep(AUDIO_STREAM_RECOVERY_DELAY);
} }
Ok(())
} }
/// Run the APRS decoder task. Only processes PCM when rig mode is PKT. /// Run the APRS decoder task. Only processes PCM when rig mode is PKT.