[fix](trx-client): only reset audio reconnect backoff after stable uptime
run_single_rig_audio_client used to reset reconnect_delay to 1 s as soon as TCP connect() succeeded. A peer that fails moments after accepting (broken handshake, oversize history blob, etc.) therefore stayed in a 1 Hz reconnect loop forever — exponential backoff never had a chance to climb. Capture the moment connect() returns and only reset the backoff once the connection has lived past STABLE_CONNECTION_THRESHOLD (30 s). Defense in depth alongside the chunked history fix. Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com> Signed-off-by: Stan Grams <sjg@haxx.space>
This commit is contained in:
@@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::sync::{Arc, Mutex, RwLock};
|
use std::sync::{Arc, Mutex, RwLock};
|
||||||
use std::time::Duration;
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use flate2::read::GzDecoder;
|
use flate2::read::GzDecoder;
|
||||||
@@ -36,6 +36,13 @@ use trx_core::audio::{
|
|||||||
use trx_core::decode::DecodedMessage;
|
use trx_core::decode::DecodedMessage;
|
||||||
use trx_frontend::VChanAudioCmd;
|
use trx_frontend::VChanAudioCmd;
|
||||||
|
|
||||||
|
/// Minimum uptime before a connection is "stable" enough to reset the
|
||||||
|
/// reconnect backoff. Connections that die before this threshold leave the
|
||||||
|
/// exponential backoff climbing — protects the server from a tight reconnect
|
||||||
|
/// storm when the peer is broken in some way that only manifests after the
|
||||||
|
/// TCP handshake.
|
||||||
|
const STABLE_CONNECTION_THRESHOLD: Duration = Duration::from_secs(30);
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
struct ActiveVChanSub {
|
struct ActiveVChanSub {
|
||||||
freq_hz: u64,
|
freq_hz: u64,
|
||||||
@@ -289,7 +296,7 @@ async fn run_single_rig_audio_client(
|
|||||||
info!("Audio client [{}]: connecting to {}", rig_id, server_addr);
|
info!("Audio client [{}]: connecting to {}", rig_id, server_addr);
|
||||||
match TcpStream::connect(&server_addr).await {
|
match TcpStream::connect(&server_addr).await {
|
||||||
Ok(stream) => {
|
Ok(stream) => {
|
||||||
reconnect_delay = Duration::from_secs(1);
|
let connected_at = Instant::now();
|
||||||
if let Err(e) = handle_single_rig_connection(
|
if let Err(e) = handle_single_rig_connection(
|
||||||
stream,
|
stream,
|
||||||
&rig_id,
|
&rig_id,
|
||||||
@@ -311,6 +318,13 @@ async fn run_single_rig_audio_client(
|
|||||||
{
|
{
|
||||||
warn!("Audio connection [{}] dropped: {}", rig_id, e);
|
warn!("Audio connection [{}] dropped: {}", rig_id, e);
|
||||||
}
|
}
|
||||||
|
// Only reset the backoff after a connection survived long
|
||||||
|
// enough to be considered stable. TCP `connect()` succeeding
|
||||||
|
// is not enough — a peer that fails immediately after
|
||||||
|
// accepting must not be hammered every second.
|
||||||
|
if connected_at.elapsed() >= STABLE_CONNECTION_THRESHOLD {
|
||||||
|
reconnect_delay = Duration::from_secs(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!("Audio connect [{}] failed: {}", rig_id, e);
|
warn!("Audio connect [{}] failed: {}", rig_id, e);
|
||||||
|
|||||||
Reference in New Issue
Block a user