[perf](trx-backend-soapysdr): reduce frame emission overhead

Signed-off-by: Stan Grams <sjg@haxx.space>
This commit is contained in:
2026-03-01 01:41:34 +01:00
parent 2ed4aaf46d
commit 33b218fce3
2 changed files with 27 additions and 18 deletions
@@ -79,19 +79,21 @@ fn build_wfm_resample_bank() -> [[f32; WFM_RESAMP_TAPS]; WFM_RESAMP_PHASES] {
bank bank
} }
#[inline]
fn shift_append<const N: usize>(hist: &mut [f32; N], sample: f32) {
hist.rotate_left(1);
hist[N - 1] = sample;
}
#[inline] #[inline]
fn polyphase_resample( fn polyphase_resample(
hist: &[f32; WFM_RESAMP_TAPS], hist: &[f32; WFM_RESAMP_TAPS],
hist_head: usize,
bank: &[[f32; WFM_RESAMP_TAPS]; WFM_RESAMP_PHASES], bank: &[[f32; WFM_RESAMP_TAPS]; WFM_RESAMP_PHASES],
frac: f32, frac: f32,
) -> f32 { ) -> f32 {
let phase = (frac.clamp(0.0, 0.999_999) * WFM_RESAMP_PHASES as f32).round() as usize; let phase = (frac.clamp(0.0, 0.999_999) * WFM_RESAMP_PHASES as f32).round() as usize;
let phase = phase.min(WFM_RESAMP_PHASES - 1); let phase = phase.min(WFM_RESAMP_PHASES - 1);
let coeffs = &bank[phase]; dot_product(&hist[..], &bank[phase][..])
let first = WFM_RESAMP_TAPS - hist_head.min(WFM_RESAMP_TAPS);
dot_product(&hist[hist_head..], &coeffs[..first])
+ dot_product(&hist[..hist_head], &coeffs[first..])
} }
#[inline] #[inline]
@@ -691,8 +693,6 @@ pub struct WfmStereoDecoder {
diff_hist: [f32; WFM_RESAMP_TAPS], diff_hist: [f32; WFM_RESAMP_TAPS],
/// History ring for polyphase FIR resampling of the quadrature diff channel. /// History ring for polyphase FIR resampling of the quadrature diff channel.
diff_q_hist: [f32; WFM_RESAMP_TAPS], diff_q_hist: [f32; WFM_RESAMP_TAPS],
/// Shared ring head for the polyphase FIR histories; points to the oldest slot.
hist_head: usize,
/// Previous pilot blend sample for simple linear interpolation. /// Previous pilot blend sample for simple linear interpolation.
prev_blend: f32, prev_blend: f32,
/// Fractional phase increment per composite sample = audio_rate / composite_rate. /// Fractional phase increment per composite sample = audio_rate / composite_rate.
@@ -752,7 +752,6 @@ impl WfmStereoDecoder {
sum_hist: [0.0; WFM_RESAMP_TAPS], sum_hist: [0.0; WFM_RESAMP_TAPS],
diff_hist: [0.0; WFM_RESAMP_TAPS], diff_hist: [0.0; WFM_RESAMP_TAPS],
diff_q_hist: [0.0; WFM_RESAMP_TAPS], diff_q_hist: [0.0; WFM_RESAMP_TAPS],
hist_head: 0,
prev_blend: 0.0, prev_blend: 0.0,
output_phase_inc, output_phase_inc,
output_phase: 0.0, output_phase: 0.0,
@@ -845,10 +844,9 @@ impl WfmStereoDecoder {
// --- Polyphase FIR fractional resampling --- // --- Polyphase FIR fractional resampling ---
// This uses a short windowed-sinc bank instead of cubic interpolation // This uses a short windowed-sinc bank instead of cubic interpolation
// to reduce top-end overshoot/ringing near the audio cutoff. // to reduce top-end overshoot/ringing near the audio cutoff.
self.sum_hist[self.hist_head] = sum; shift_append(&mut self.sum_hist, sum);
self.diff_hist[self.hist_head] = diff_i; shift_append(&mut self.diff_hist, diff_i);
self.diff_q_hist[self.hist_head] = diff_q; shift_append(&mut self.diff_q_hist, diff_q);
self.hist_head = (self.hist_head + 1) % WFM_RESAMP_TAPS;
let prev_phase = self.output_phase; let prev_phase = self.output_phase;
self.output_phase += self.output_phase_inc; self.output_phase += self.output_phase_inc;
@@ -862,9 +860,9 @@ impl WfmStereoDecoder {
// interval. The FIR bank reconstructs a band-limited sample using // interval. The FIR bank reconstructs a band-limited sample using
// a fixed two-sample lookahead in the decoder. // a fixed two-sample lookahead in the decoder.
let frac = ((1.0 - prev_phase) / self.output_phase_inc) as f32; let frac = ((1.0 - prev_phase) / self.output_phase_inc) as f32;
let sum_i = polyphase_resample(&self.sum_hist, self.hist_head, &self.resample_bank, frac); let sum_i = polyphase_resample(&self.sum_hist, &self.resample_bank, frac);
let diff_i = polyphase_resample(&self.diff_hist, self.hist_head, &self.resample_bank, frac); let diff_i = polyphase_resample(&self.diff_hist, &self.resample_bank, frac);
let diff_q = polyphase_resample(&self.diff_q_hist, self.hist_head, &self.resample_bank, frac); let diff_q = polyphase_resample(&self.diff_q_hist, &self.resample_bank, frac);
let blend_i = let blend_i =
(self.prev_blend + frac * (stereo_blend_target - self.prev_blend)).clamp(0.0, 1.0); (self.prev_blend + frac * (stereo_blend_target - self.prev_blend)).clamp(0.0, 1.0);
self.prev_blend = stereo_blend_target; self.prev_blend = stereo_blend_target;
@@ -954,7 +952,6 @@ impl WfmStereoDecoder {
self.sum_hist = [0.0; WFM_RESAMP_TAPS]; self.sum_hist = [0.0; WFM_RESAMP_TAPS];
self.diff_hist = [0.0; WFM_RESAMP_TAPS]; self.diff_hist = [0.0; WFM_RESAMP_TAPS];
self.diff_q_hist = [0.0; WFM_RESAMP_TAPS]; self.diff_q_hist = [0.0; WFM_RESAMP_TAPS];
self.hist_head = 0;
self.prev_blend = 0.0; self.prev_blend = 0.0;
self.output_phase = 0.0; self.output_phase = 0.0;
} }
@@ -477,6 +477,8 @@ pub struct ChannelDsp {
output_channels: usize, output_channels: usize,
/// Accumulator for output PCM frames. /// Accumulator for output PCM frames.
pub frame_buf: Vec<f32>, pub frame_buf: Vec<f32>,
/// Read cursor into `frame_buf` for completed frame emission.
frame_buf_offset: usize,
/// Target frame size in samples. /// Target frame size in samples.
pub frame_size: usize, pub frame_size: usize,
/// Sender for completed PCM frames. /// Sender for completed PCM frames.
@@ -592,6 +594,7 @@ impl ChannelDsp {
self.audio_agc = agc_for_mode(&self.mode, self.audio_sample_rate); self.audio_agc = agc_for_mode(&self.mode, self.audio_sample_rate);
self.audio_dc = dc_for_mode(&self.mode); self.audio_dc = dc_for_mode(&self.mode);
self.frame_buf.clear(); self.frame_buf.clear();
self.frame_buf_offset = 0;
} }
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
@@ -649,6 +652,7 @@ impl ChannelDsp {
decim_factor, decim_factor,
output_channels, output_channels,
frame_buf: Vec::with_capacity(frame_size + output_channels), frame_buf: Vec::with_capacity(frame_size + output_channels),
frame_buf_offset: 0,
frame_size, frame_size,
pcm_tx, pcm_tx,
scratch_mixed_i: Vec::with_capacity(IQ_BLOCK_SIZE), scratch_mixed_i: Vec::with_capacity(IQ_BLOCK_SIZE),
@@ -894,10 +898,18 @@ impl ChannelDsp {
// --- 5. Emit complete PCM frames ------------------------------------ // --- 5. Emit complete PCM frames ------------------------------------
self.frame_buf.extend_from_slice(&audio); self.frame_buf.extend_from_slice(&audio);
while self.frame_buf.len() >= self.frame_size { while self.frame_buf.len().saturating_sub(self.frame_buf_offset) >= self.frame_size {
let frame: Vec<f32> = self.frame_buf.drain(..self.frame_size).collect(); let start = self.frame_buf_offset;
let end = start + self.frame_size;
let frame = self.frame_buf[start..end].to_vec();
self.frame_buf_offset = end;
let _ = self.pcm_tx.send(frame); let _ = self.pcm_tx.send(frame);
} }
if self.frame_buf_offset > 0 && self.frame_buf_offset * 2 >= self.frame_buf.len() {
self.frame_buf.copy_within(self.frame_buf_offset.., 0);
self.frame_buf.truncate(self.frame_buf.len() - self.frame_buf_offset);
self.frame_buf_offset = 0;
}
} }
} }