[fix](trx-client): only reset audio reconnect backoff after stable uptime

run_single_rig_audio_client used to reset reconnect_delay to 1 s as soon as TCP connect() succeeded. A peer that fails moments after accepting (broken handshake, oversize history blob, etc.) therefore stayed in a 1 Hz reconnect loop forever — exponential backoff never had a chance to climb.

Capture the moment connect() returns and only reset the backoff once the connection has lived past STABLE_CONNECTION_THRESHOLD (30 s). Defense in depth alongside the chunked history fix.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
Signed-off-by: Stan Grams <sjg@haxx.space>
This commit is contained in:
2026-05-03 21:00:04 +02:00
parent 72b3c02770
commit cc001287a2
+16 -2
View File
@@ -7,7 +7,7 @@
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::sync::{Arc, Mutex, RwLock}; use std::sync::{Arc, Mutex, RwLock};
use std::time::Duration; use std::time::{Duration, Instant};
use bytes::Bytes; use bytes::Bytes;
use flate2::read::GzDecoder; use flate2::read::GzDecoder;
@@ -36,6 +36,13 @@ use trx_core::audio::{
use trx_core::decode::DecodedMessage; use trx_core::decode::DecodedMessage;
use trx_frontend::VChanAudioCmd; use trx_frontend::VChanAudioCmd;
/// Minimum uptime before a connection is "stable" enough to reset the
/// reconnect backoff. Connections that die before this threshold leave the
/// exponential backoff climbing — protects the server from a tight reconnect
/// storm when the peer is broken in some way that only manifests after the
/// TCP handshake.
const STABLE_CONNECTION_THRESHOLD: Duration = Duration::from_secs(30);
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
struct ActiveVChanSub { struct ActiveVChanSub {
freq_hz: u64, freq_hz: u64,
@@ -289,7 +296,7 @@ async fn run_single_rig_audio_client(
info!("Audio client [{}]: connecting to {}", rig_id, server_addr); info!("Audio client [{}]: connecting to {}", rig_id, server_addr);
match TcpStream::connect(&server_addr).await { match TcpStream::connect(&server_addr).await {
Ok(stream) => { Ok(stream) => {
reconnect_delay = Duration::from_secs(1); let connected_at = Instant::now();
if let Err(e) = handle_single_rig_connection( if let Err(e) = handle_single_rig_connection(
stream, stream,
&rig_id, &rig_id,
@@ -311,6 +318,13 @@ async fn run_single_rig_audio_client(
{ {
warn!("Audio connection [{}] dropped: {}", rig_id, e); warn!("Audio connection [{}] dropped: {}", rig_id, e);
} }
// Only reset the backoff after a connection survived long
// enough to be considered stable. TCP `connect()` succeeding
// is not enough — a peer that fails immediately after
// accepting must not be hammered every second.
if connected_at.elapsed() >= STABLE_CONNECTION_THRESHOLD {
reconnect_delay = Duration::from_secs(1);
}
} }
Err(e) => { Err(e) => {
warn!("Audio connect [{}] failed: {}", rig_id, e); warn!("Audio connect [{}] failed: {}", rig_id, e);