[refactor](trx-rs): resolve all P1/P2 improvement areas

P1 (High Priority): - Fix LIFO command batching in rig_task.rs (batch.pop→batch.remove(0)) - Add ±25% jitter to ExponentialBackoff to prevent thundering herd - Add 10,000-entry capacity bounds to decoder history queues - Add rig task crash detection with Error state broadcast - Decompose FrontendRuntimeContext 50-field god-struct into 9 sub-structs (AudioContext, DecodeHistoryContext, HttpAuthConfig, HttpUiConfig, RigRoutingContext, OwnerInfo, VChanContext, SpectrumContext, PerRigAudioContext) - Migrate std::sync::RwLock to tokio::sync::RwLock in background_decode.rs - Extract find_input_device/find_output_device helpers from audio pipeline P2 (Medium Priority): - Introduce SoapySdrConfig builder struct (replaces 20+ positional params) - Add define_command_mappings! macro for ClientCommand↔RigCommand mapping - Replace silent lock poison recovery with lock_or_recover() warning logger - Make timeouts configurable via RigTaskConfig/ListenerConfig and TOML - Extract shared config types to trx-app/src/shared_config.rs Documentation updated in CLAUDE.md, Architecture.md, Improvement-Areas.md. https://claude.ai/code/session_01P9G7QCWfiYbPVJ7cgiXznf Signed-off-by: Claude <noreply@anthropic.com>
2026-03-28 23:26:55 +00:00
parent 0a60684e28
commit 16426548de
22 changed files with 1245 additions and 916 deletions
@@ -11,6 +11,25 @@ use std::time::Duration;

 use crate::rig::response::RigError;

+/// Apply ±25% jitter to a duration to prevent thundering herd on reconnect.
+fn apply_jitter(delay: Duration) -> Duration {
+    // Simple deterministic-ish jitter using the current instant's low bits.
+    // We avoid pulling in `rand` for this single use.
+    let nanos = std::time::Instant::now()
+        .elapsed()
+        .as_nanos()
+        .wrapping_add(
+            std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .unwrap_or_default()
+                .as_nanos(),
+        );
+    // Map to range [0.75, 1.25]
+    let frac = (nanos % 1000) as f64 / 1000.0; // 0.0 .. 1.0
+    let factor = 0.75 + frac * 0.5; // 0.75 .. 1.25
+    Duration::from_secs_f64(delay.as_secs_f64() * factor)
+}
+
 /// Policy for retrying failed operations.
 pub trait RetryPolicy: Send + Sync {
    /// Determine if the operation should be retried.
@@ -72,7 +91,8 @@ impl RetryPolicy for ExponentialBackoff {
    fn delay(&self, attempt: u32) -> Duration {
        let multiplier = 2u32.saturating_pow(attempt);
        let delay = self.base_delay.saturating_mul(multiplier);
-        delay.min(self.max_delay)
+        let capped = delay.min(self.max_delay);
+        apply_jitter(capped)
    }

    fn max_attempts(&self) -> u32 {
@@ -235,13 +255,41 @@ mod tests {
    fn test_exponential_backoff_delays() {
        let policy = ExponentialBackoff::new(5, Duration::from_millis(100), Duration::from_secs(1));

-        assert_eq!(policy.delay(0), Duration::from_millis(100));
-        assert_eq!(policy.delay(1), Duration::from_millis(200));
-        assert_eq!(policy.delay(2), Duration::from_millis(400));
-        assert_eq!(policy.delay(3), Duration::from_millis(800));
-        // Should cap at max_delay
-        assert_eq!(policy.delay(4), Duration::from_secs(1));
-        assert_eq!(policy.delay(5), Duration::from_secs(1));
+        // Delays include ±25% jitter, so check they fall in the expected range.
+        let check = |attempt: u32, base_ms: u64| {
+            let d = policy.delay(attempt);
+            let lo = Duration::from_secs_f64(base_ms as f64 * 0.75 / 1000.0);
+            let hi = Duration::from_secs_f64(base_ms as f64 * 1.25 / 1000.0);
+            assert!(
+                d >= lo && d <= hi,
+                "attempt {}: {:?} not in [{:?}, {:?}]",
+                attempt,
+                d,
+                lo,
+                hi
+            );
+        };
+
+        check(0, 100);
+        check(1, 200);
+        check(2, 400);
+        check(3, 800);
+        // Should cap at max_delay (1s) before jitter
+        check(4, 1000);
+        check(5, 1000);
+    }
+
+    #[test]
+    fn test_exponential_backoff_jitter_varies() {
+        // Two calls should (almost always) produce different values,
+        // confirming jitter is applied.
+        let policy = ExponentialBackoff::new(5, Duration::from_millis(100), Duration::from_secs(1));
+        let d1 = policy.delay(2);
+        std::thread::sleep(Duration::from_micros(10));
+        let d2 = policy.delay(2);
+        // With nanosecond-based jitter they should differ; if not,
+        // the test is still valid — it just means the same instant was sampled.
+        let _ = (d1, d2); // no assertion — this is a smoke test
    }

    #[test]