[perf](trx-backend-soapysdr): batch wfm discriminator with avx2 atan2

Pre-compute all FM discriminator outputs using demod_fm_with_prev which
processes 8 samples at a time via AVX2 atan2, then iterate the scalar
results through the rest of the stereo pipeline. Eliminates per-sample
f32::atan2 calls from the inner loop.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Stan Grams <sjg@haxx.space>
This commit is contained in:
2026-03-01 09:42:49 +01:00
parent 36d0e7e862
commit 132cd5b950
@@ -730,24 +730,18 @@ impl WfmStereoDecoder {
return Vec::new(); return Vec::new();
} }
let inv_pi = std::f32::consts::FRAC_1_PI; // Batch FM discriminator using AVX2 atan2 when available.
let disc = demod_fm_with_prev(samples, &mut self.prev_iq);
let mut output = Vec::with_capacity( let mut output = Vec::with_capacity(
((samples.len() as f64 * self.output_phase_inc).ceil() as usize + 1) ((samples.len() as f64 * self.output_phase_inc).ceil() as usize + 1)
* self.output_channels.max(1), * self.output_channels.max(1),
); );
let (trim_sin, trim_cos) = STEREO_SEPARATION_PHASE_TRIM.sin_cos(); let (trim_sin, trim_cos) = STEREO_SEPARATION_PHASE_TRIM.sin_cos();
for &sample in samples { for &disc_sample in &disc {
let x = if let Some(prev_sample) = self.prev_iq {
let product = sample * prev_sample.conj();
product.im.atan2(product.re) * inv_pi
} else {
0.0
};
self.prev_iq = Some(sample);
// Normalize discriminator output so ±75 kHz deviation maps to ±1.0. // Normalize discriminator output so ±75 kHz deviation maps to ±1.0.
let x = x * self.fm_gain; let x = disc_sample * self.fm_gain;
let pilot_tone = self.pilot_bpf.process(x); let pilot_tone = self.pilot_bpf.process(x);