[perf](trx-backend-soapysdr): reduce frame emission overhead

Signed-off-by: Stan Grams <sjg@haxx.space>
2026-03-01 01:41:34 +01:00
parent 2ed4aaf46d
commit 33b218fce3
2 changed files with 27 additions and 18 deletions
@@ -79,19 +79,21 @@ fn build_wfm_resample_bank() -> [[f32; WFM_RESAMP_TAPS]; WFM_RESAMP_PHASES] {
    bank
 }

+#[inline]
+fn shift_append<const N: usize>(hist: &mut [f32; N], sample: f32) {
+    hist.rotate_left(1);
+    hist[N - 1] = sample;
+}
+
 #[inline]
 fn polyphase_resample(
    hist: &[f32; WFM_RESAMP_TAPS],
-    hist_head: usize,
    bank: &[[f32; WFM_RESAMP_TAPS]; WFM_RESAMP_PHASES],
    frac: f32,
 ) -> f32 {
    let phase = (frac.clamp(0.0, 0.999_999) * WFM_RESAMP_PHASES as f32).round() as usize;
    let phase = phase.min(WFM_RESAMP_PHASES - 1);
-    let coeffs = &bank[phase];
-    let first = WFM_RESAMP_TAPS - hist_head.min(WFM_RESAMP_TAPS);
-    dot_product(&hist[hist_head..], &coeffs[..first])
-        + dot_product(&hist[..hist_head], &coeffs[first..])
+    dot_product(&hist[..], &bank[phase][..])
 }

 #[inline]
@@ -691,8 +693,6 @@ pub struct WfmStereoDecoder {
    diff_hist: [f32; WFM_RESAMP_TAPS],
    /// History ring for polyphase FIR resampling of the quadrature diff channel.
    diff_q_hist: [f32; WFM_RESAMP_TAPS],
-    /// Shared ring head for the polyphase FIR histories; points to the oldest slot.
-    hist_head: usize,
    /// Previous pilot blend sample for simple linear interpolation.
    prev_blend: f32,
    /// Fractional phase increment per composite sample = audio_rate / composite_rate.
@@ -752,7 +752,6 @@ impl WfmStereoDecoder {
            sum_hist: [0.0; WFM_RESAMP_TAPS],
            diff_hist: [0.0; WFM_RESAMP_TAPS],
            diff_q_hist: [0.0; WFM_RESAMP_TAPS],
-            hist_head: 0,
            prev_blend: 0.0,
            output_phase_inc,
            output_phase: 0.0,
@@ -845,10 +844,9 @@ impl WfmStereoDecoder {
            // --- Polyphase FIR fractional resampling ---
            // This uses a short windowed-sinc bank instead of cubic interpolation
            // to reduce top-end overshoot/ringing near the audio cutoff.
-            self.sum_hist[self.hist_head] = sum;
-            self.diff_hist[self.hist_head] = diff_i;
-            self.diff_q_hist[self.hist_head] = diff_q;
-            self.hist_head = (self.hist_head + 1) % WFM_RESAMP_TAPS;
+            shift_append(&mut self.sum_hist, sum);
+            shift_append(&mut self.diff_hist, diff_i);
+            shift_append(&mut self.diff_q_hist, diff_q);

            let prev_phase = self.output_phase;
            self.output_phase += self.output_phase_inc;
@@ -862,9 +860,9 @@ impl WfmStereoDecoder {
            // interval. The FIR bank reconstructs a band-limited sample using
            // a fixed two-sample lookahead in the decoder.
            let frac = ((1.0 - prev_phase) / self.output_phase_inc) as f32;
-            let sum_i = polyphase_resample(&self.sum_hist, self.hist_head, &self.resample_bank, frac);
-            let diff_i = polyphase_resample(&self.diff_hist, self.hist_head, &self.resample_bank, frac);
-            let diff_q = polyphase_resample(&self.diff_q_hist, self.hist_head, &self.resample_bank, frac);
+            let sum_i = polyphase_resample(&self.sum_hist, &self.resample_bank, frac);
+            let diff_i = polyphase_resample(&self.diff_hist, &self.resample_bank, frac);
+            let diff_q = polyphase_resample(&self.diff_q_hist, &self.resample_bank, frac);
            let blend_i =
                (self.prev_blend + frac * (stereo_blend_target - self.prev_blend)).clamp(0.0, 1.0);
            self.prev_blend = stereo_blend_target;
@@ -954,7 +952,6 @@ impl WfmStereoDecoder {
        self.sum_hist = [0.0; WFM_RESAMP_TAPS];
        self.diff_hist = [0.0; WFM_RESAMP_TAPS];
        self.diff_q_hist = [0.0; WFM_RESAMP_TAPS];
-        self.hist_head = 0;
        self.prev_blend = 0.0;
        self.output_phase = 0.0;
    }
@@ -477,6 +477,8 @@ pub struct ChannelDsp {
    output_channels: usize,
    /// Accumulator for output PCM frames.
    pub frame_buf: Vec<f32>,
+    /// Read cursor into `frame_buf` for completed frame emission.
+    frame_buf_offset: usize,
    /// Target frame size in samples.
    pub frame_size: usize,
    /// Sender for completed PCM frames.
@@ -592,6 +594,7 @@ impl ChannelDsp {
        self.audio_agc = agc_for_mode(&self.mode, self.audio_sample_rate);
        self.audio_dc = dc_for_mode(&self.mode);
        self.frame_buf.clear();
+        self.frame_buf_offset = 0;
    }

    #[allow(clippy::too_many_arguments)]
@@ -649,6 +652,7 @@ impl ChannelDsp {
            decim_factor,
            output_channels,
            frame_buf: Vec::with_capacity(frame_size + output_channels),
+            frame_buf_offset: 0,
            frame_size,
            pcm_tx,
            scratch_mixed_i: Vec::with_capacity(IQ_BLOCK_SIZE),
@@ -894,10 +898,18 @@ impl ChannelDsp {

        // --- 5. Emit complete PCM frames ------------------------------------
        self.frame_buf.extend_from_slice(&audio);
-        while self.frame_buf.len() >= self.frame_size {
-            let frame: Vec<f32> = self.frame_buf.drain(..self.frame_size).collect();
+        while self.frame_buf.len().saturating_sub(self.frame_buf_offset) >= self.frame_size {
+            let start = self.frame_buf_offset;
+            let end = start + self.frame_size;
+            let frame = self.frame_buf[start..end].to_vec();
+            self.frame_buf_offset = end;
            let _ = self.pcm_tx.send(frame);
        }
+        if self.frame_buf_offset > 0 && self.frame_buf_offset * 2 >= self.frame_buf.len() {
+            self.frame_buf.copy_within(self.frame_buf_offset.., 0);
+            self.frame_buf.truncate(self.frame_buf.len() - self.frame_buf_offset);
+            self.frame_buf_offset = 0;
+        }
    }
 }