From 33b218fce3e51fdc9f0ddcf6e39f5a9bc675d9e3 Mon Sep 17 00:00:00 2001
From: Stan Grams <sjg@haxx.space>
Date: Sun, 1 Mar 2026 01:41:34 +0100
Subject: [PATCH] [perf](trx-backend-soapysdr): reduce frame emission overhead

Signed-off-by: Stan Grams <sjg@haxx.space>
---
 .../trx-backend-soapysdr/src/demod.rs         | 29 +++++++++----------
 .../trx-backend-soapysdr/src/dsp.rs           | 16 ++++++++--
 2 files changed, 27 insertions(+), 18 deletions(-)
diff --git a/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod.rs b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod.rs
index 6a0da1d..0320719 100644
--- a/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod.rs
+++ b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod.rs
@@ -79,19 +79,21 @@ fn build_wfm_resample_bank() -> [[f32; WFM_RESAMP_TAPS]; WFM_RESAMP_PHASES] {
     bank
 }
 
+#[inline]
+fn shift_append<const N: usize>(hist: &mut [f32; N], sample: f32) {
+    hist.rotate_left(1);
+    hist[N - 1] = sample;
+}
+
 #[inline]
 fn polyphase_resample(
     hist: &[f32; WFM_RESAMP_TAPS],
-    hist_head: usize,
     bank: &[[f32; WFM_RESAMP_TAPS]; WFM_RESAMP_PHASES],
     frac: f32,
 ) -> f32 {
     let phase = (frac.clamp(0.0, 0.999_999) * WFM_RESAMP_PHASES as f32).round() as usize;
     let phase = phase.min(WFM_RESAMP_PHASES - 1);
-    let coeffs = &bank[phase];
-    let first = WFM_RESAMP_TAPS - hist_head.min(WFM_RESAMP_TAPS);
-    dot_product(&hist[hist_head..], &coeffs[..first])
-        + dot_product(&hist[..hist_head], &coeffs[first..])
+    dot_product(&hist[..], &bank[phase][..])
 }
 
 #[inline]
@@ -691,8 +693,6 @@ pub struct WfmStereoDecoder {
     diff_hist: [f32; WFM_RESAMP_TAPS],
     /// History ring for polyphase FIR resampling of the quadrature diff channel.
     diff_q_hist: [f32; WFM_RESAMP_TAPS],
-    /// Shared ring head for the polyphase FIR histories; points to the oldest slot.
-    hist_head: usize,
     /// Previous pilot blend sample for simple linear interpolation.
     prev_blend: f32,
     /// Fractional phase increment per composite sample = audio_rate / composite_rate.
@@ -752,7 +752,6 @@ impl WfmStereoDecoder {
             sum_hist: [0.0; WFM_RESAMP_TAPS],
             diff_hist: [0.0; WFM_RESAMP_TAPS],
             diff_q_hist: [0.0; WFM_RESAMP_TAPS],
-            hist_head: 0,
             prev_blend: 0.0,
             output_phase_inc,
             output_phase: 0.0,
@@ -845,10 +844,9 @@ impl WfmStereoDecoder {
             // --- Polyphase FIR fractional resampling ---
             // This uses a short windowed-sinc bank instead of cubic interpolation
             // to reduce top-end overshoot/ringing near the audio cutoff.
-            self.sum_hist[self.hist_head] = sum;
-            self.diff_hist[self.hist_head] = diff_i;
-            self.diff_q_hist[self.hist_head] = diff_q;
-            self.hist_head = (self.hist_head + 1) % WFM_RESAMP_TAPS;
+            shift_append(&mut self.sum_hist, sum);
+            shift_append(&mut self.diff_hist, diff_i);
+            shift_append(&mut self.diff_q_hist, diff_q);
 
             let prev_phase = self.output_phase;
             self.output_phase += self.output_phase_inc;
@@ -862,9 +860,9 @@ impl WfmStereoDecoder {
             // interval. The FIR bank reconstructs a band-limited sample using
             // a fixed two-sample lookahead in the decoder.
             let frac = ((1.0 - prev_phase) / self.output_phase_inc) as f32;
-            let sum_i = polyphase_resample(&self.sum_hist, self.hist_head, &self.resample_bank, frac);
-            let diff_i = polyphase_resample(&self.diff_hist, self.hist_head, &self.resample_bank, frac);
-            let diff_q = polyphase_resample(&self.diff_q_hist, self.hist_head, &self.resample_bank, frac);
+            let sum_i = polyphase_resample(&self.sum_hist, &self.resample_bank, frac);
+            let diff_i = polyphase_resample(&self.diff_hist, &self.resample_bank, frac);
+            let diff_q = polyphase_resample(&self.diff_q_hist, &self.resample_bank, frac);
             let blend_i =
                 (self.prev_blend + frac * (stereo_blend_target - self.prev_blend)).clamp(0.0, 1.0);
             self.prev_blend = stereo_blend_target;
@@ -954,7 +952,6 @@ impl WfmStereoDecoder {
         self.sum_hist = [0.0; WFM_RESAMP_TAPS];
         self.diff_hist = [0.0; WFM_RESAMP_TAPS];
         self.diff_q_hist = [0.0; WFM_RESAMP_TAPS];
-        self.hist_head = 0;
         self.prev_blend = 0.0;
         self.output_phase = 0.0;
     }
diff --git a/src/trx-server/trx-backend/trx-backend-soapysdr/src/dsp.rs b/src/trx-server/trx-backend/trx-backend-soapysdr/src/dsp.rs
index 643bd6b..2c785cd 100644
--- a/src/trx-server/trx-backend/trx-backend-soapysdr/src/dsp.rs
+++ b/src/trx-server/trx-backend/trx-backend-soapysdr/src/dsp.rs
@@ -477,6 +477,8 @@ pub struct ChannelDsp {
     output_channels: usize,
     /// Accumulator for output PCM frames.
     pub frame_buf: Vec<f32>,
+    /// Read cursor into `frame_buf` for completed frame emission.
+    frame_buf_offset: usize,
     /// Target frame size in samples.
     pub frame_size: usize,
     /// Sender for completed PCM frames.
@@ -592,6 +594,7 @@ impl ChannelDsp {
         self.audio_agc = agc_for_mode(&self.mode, self.audio_sample_rate);
         self.audio_dc = dc_for_mode(&self.mode);
         self.frame_buf.clear();
+        self.frame_buf_offset = 0;
     }
 
     #[allow(clippy::too_many_arguments)]
@@ -649,6 +652,7 @@ impl ChannelDsp {
             decim_factor,
             output_channels,
             frame_buf: Vec::with_capacity(frame_size + output_channels),
+            frame_buf_offset: 0,
             frame_size,
             pcm_tx,
             scratch_mixed_i: Vec::with_capacity(IQ_BLOCK_SIZE),
@@ -894,10 +898,18 @@ impl ChannelDsp {
 
         // --- 5. Emit complete PCM frames ------------------------------------
         self.frame_buf.extend_from_slice(&audio);
-        while self.frame_buf.len() >= self.frame_size {
-            let frame: Vec<f32> = self.frame_buf.drain(..self.frame_size).collect();
+        while self.frame_buf.len().saturating_sub(self.frame_buf_offset) >= self.frame_size {
+            let start = self.frame_buf_offset;
+            let end = start + self.frame_size;
+            let frame = self.frame_buf[start..end].to_vec();
+            self.frame_buf_offset = end;
             let _ = self.pcm_tx.send(frame);
         }
+        if self.frame_buf_offset > 0 && self.frame_buf_offset * 2 >= self.frame_buf.len() {
+            self.frame_buf.copy_within(self.frame_buf_offset.., 0);
+            self.frame_buf.truncate(self.frame_buf.len() - self.frame_buf_offset);
+            self.frame_buf_offset = 0;
+        }
     }
 }