[perf](trx-backend-soapysdr): eliminate per-sample sin_cos and atan2 calls
Derive sin/cos of PLL phase error directly from I/Q arms (q/mag, i/mag) instead of calling atan2 + sin_cos. Use double-angle identity to compute 38 kHz carrier (sin2θ = 2·sinθ·cosθ, cos2θ = 2·cos²θ-1) from the rotated pilot sin/cos, eliminating the second sin_cos call entirely. Drop Butterworth from 6th to 4th order (resampler Blackman-Harris now handles stopband). Use power-of-2 bitmask for ring buffer indexing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-off-by: Stan Grams <sjg@haxx.space>
This commit is contained in:
@@ -18,13 +18,11 @@ const AUDIO_BW_HZ: f32 = 18_000.0;
|
||||
/// Must match AUDIO_BW_HZ so the sum and diff filter paths have identical
|
||||
/// group delay, which is critical for stereo separation across all frequencies.
|
||||
const STEREO_DIFF_BW_HZ: f32 = AUDIO_BW_HZ;
|
||||
/// Q values for a 6th-order Butterworth cascade (three 2nd-order stages).
|
||||
/// Stage 1: Q = 1 / (2 cos(π/12))
|
||||
const BW6_Q1: f32 = 0.5176;
|
||||
/// Stage 2: Q = 1 / (2 cos(3π/12))
|
||||
const BW6_Q2: f32 = 0.7071;
|
||||
/// Stage 3: Q = 1 / (2 cos(5π/12))
|
||||
const BW6_Q3: f32 = 1.9319;
|
||||
/// Q values for a 4th-order Butterworth cascade (two 2nd-order stages).
|
||||
/// Stage 1: Q = 1 / (2 cos(π/8))
|
||||
const BW4_Q1: f32 = 0.5412;
|
||||
/// Stage 2: Q = 1 / (2 cos(3π/8))
|
||||
const BW4_Q2: f32 = 1.3066;
|
||||
/// Q for the 19 kHz pilot notch (~3.8 kHz 3 dB bandwidth).
|
||||
const PILOT_NOTCH_Q: f32 = 5.0;
|
||||
/// Narrow 19 kHz band-pass used to derive zero-crossings for switching stereo demod.
|
||||
@@ -94,8 +92,9 @@ fn polyphase_resample_ring(
|
||||
let phase = phase.min(WFM_RESAMP_PHASES - 1);
|
||||
let coeffs = &bank[phase];
|
||||
let mut acc = 0.0_f32;
|
||||
let mask = WFM_RESAMP_TAPS - 1; // power-of-2 bitmask
|
||||
for tap in 0..WFM_RESAMP_TAPS {
|
||||
acc += hist[(pos + tap) % WFM_RESAMP_TAPS] * coeffs[tap];
|
||||
acc += hist[(pos + tap) & mask] * coeffs[tap];
|
||||
}
|
||||
acc
|
||||
}
|
||||
@@ -575,7 +574,6 @@ pub struct WfmStereoDecoder {
|
||||
/// 4th-order Butterworth cascade for L+R (two 2nd-order stages, Q = BW4_Q1/BW4_Q2).
|
||||
sum_lpf1: BiquadLowPass,
|
||||
sum_lpf2: BiquadLowPass,
|
||||
sum_lpf3: BiquadLowPass,
|
||||
/// Notch at 19 kHz for the mono output path — keeps pilot tone out of mono
|
||||
/// audio without introducing phase mismatch with the diff channel.
|
||||
sum_notch: BiquadNotch,
|
||||
@@ -585,11 +583,9 @@ pub struct WfmStereoDecoder {
|
||||
/// 4th-order Butterworth cascade for L-R (matched to sum path for stereo phase accuracy).
|
||||
diff_lpf1: BiquadLowPass,
|
||||
diff_lpf2: BiquadLowPass,
|
||||
diff_lpf3: BiquadLowPass,
|
||||
/// Quadrature companion of the L-R path used for phase trim / crosstalk adjustment.
|
||||
diff_q_lpf1: BiquadLowPass,
|
||||
diff_q_lpf2: BiquadLowPass,
|
||||
diff_q_lpf3: BiquadLowPass,
|
||||
/// Gentle high-pass on the stereo-difference path to reduce bass-driven IMD.
|
||||
diff_dc: DcBlocker,
|
||||
diff_q_dc: DcBlocker,
|
||||
@@ -657,17 +653,14 @@ impl WfmStereoDecoder {
|
||||
pilot_bpf: BiquadBandPass::new(composite_rate_f, PILOT_HZ, PILOT_BPF_Q),
|
||||
// 4th-order Butterworth: two cascaded biquads with BW4_Q1/BW4_Q2.
|
||||
// At 19 kHz (pilot): ≈ −12 dB; at 38 kHz (DSB carrier): ≈ −32 dB.
|
||||
sum_lpf1: BiquadLowPass::new(composite_rate_f, AUDIO_BW_HZ, BW6_Q1),
|
||||
sum_lpf2: BiquadLowPass::new(composite_rate_f, AUDIO_BW_HZ, BW6_Q2),
|
||||
sum_lpf3: BiquadLowPass::new(composite_rate_f, AUDIO_BW_HZ, BW6_Q3),
|
||||
sum_lpf1: BiquadLowPass::new(composite_rate_f, AUDIO_BW_HZ, BW4_Q1),
|
||||
sum_lpf2: BiquadLowPass::new(composite_rate_f, AUDIO_BW_HZ, BW4_Q2),
|
||||
sum_notch: BiquadNotch::new(composite_rate_f, PILOT_HZ, PILOT_NOTCH_Q),
|
||||
diff_pilot_notch: BiquadNotch::new(composite_rate_f, PILOT_HZ, PILOT_NOTCH_Q),
|
||||
diff_lpf1: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW6_Q1),
|
||||
diff_lpf2: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW6_Q2),
|
||||
diff_lpf3: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW6_Q3),
|
||||
diff_q_lpf1: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW6_Q1),
|
||||
diff_q_lpf2: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW6_Q2),
|
||||
diff_q_lpf3: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW6_Q3),
|
||||
diff_lpf1: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW4_Q1),
|
||||
diff_lpf2: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW4_Q2),
|
||||
diff_q_lpf1: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW4_Q1),
|
||||
diff_q_lpf2: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW4_Q2),
|
||||
diff_dc: DcBlocker::new(STEREO_DIFF_DC_R),
|
||||
diff_q_dc: DcBlocker::new(STEREO_DIFF_DC_R),
|
||||
dc_m: DcBlocker::new(0.9999),
|
||||
@@ -714,12 +707,20 @@ impl WfmStereoDecoder {
|
||||
let (sin_p, cos_p) = self.pilot_phase.sin_cos();
|
||||
let i = self.pilot_i_lp.process(pilot_tone * cos_p);
|
||||
let q = self.pilot_q_lp.process(pilot_tone * -sin_p);
|
||||
let phase_err = q.atan2(i);
|
||||
let pilot_phase_est = self.pilot_phase + phase_err;
|
||||
let pilot_mag = (i * i + q * q).sqrt();
|
||||
// Phase error from PLL I/Q arms. Use atan2 for the NCO update
|
||||
// (needs full-range angle) but derive sin/cos of the error from
|
||||
// the already-computed I/Q magnitudes to avoid a second sin_cos.
|
||||
// Derive sin/cos of the PLL phase error directly from the I/Q arms,
|
||||
// avoiding both the atan2 and sin_cos calls for the 38 kHz carrier.
|
||||
// The original NCO free-runs at pilot_freq; the phase error was only
|
||||
// used to compute the instantaneous pilot_phase_est, which is now
|
||||
// reconstructed via err_sin/err_cos rotation below.
|
||||
let inv_mag = 1.0 / (pilot_mag + 1e-12);
|
||||
let err_sin = q * inv_mag; // sin(phase_err)
|
||||
let err_cos = i * inv_mag; // cos(phase_err)
|
||||
self.pilot_phase += self.pilot_freq;
|
||||
self.pilot_phase = self.pilot_phase.rem_euclid(std::f32::consts::TAU);
|
||||
|
||||
let pilot_mag = (i * i + q * q).sqrt();
|
||||
let pilot_abs = self.pilot_abs_lp.process(pilot_tone.abs());
|
||||
let pilot_coherence = (pilot_mag / (pilot_abs + 1e-4)).clamp(0.0, 1.0);
|
||||
let pilot_lock = ((pilot_coherence - 0.4) / 0.2).clamp(0.0, 1.0);
|
||||
@@ -754,20 +755,28 @@ impl WfmStereoDecoder {
|
||||
// identical phase responses, which is required for good stereo separation.
|
||||
// The notch is applied only on the mono output path where phase matching
|
||||
// with the diff channel is irrelevant.
|
||||
let sum = self.sum_lpf3.process(self.sum_lpf2.process(self.sum_lpf1.process(x)));
|
||||
let sum = self.sum_lpf2.process(self.sum_lpf1.process(x));
|
||||
|
||||
// --- L-R (diff): 38 kHz demod + 6th-order Butterworth (unblended) ---
|
||||
// Notch the 19 kHz pilot from the composite before multiplying by the
|
||||
// 38 kHz carrier to prevent pilot×carrier intermod products.
|
||||
// Blend is applied per-band at audio rate in the emit step below.
|
||||
let (sin_2p, cos_2p) = (2.0 * pilot_phase_est).sin_cos();
|
||||
// Reconstruct sin/cos of the estimated pilot phase from the NCO
|
||||
// sin_p/cos_p rotated by the PLL error (whose sin/cos we derived
|
||||
// from the I/Q arms above, avoiding a second sin_cos call).
|
||||
let sin_est = sin_p * err_cos + cos_p * err_sin;
|
||||
let cos_est = cos_p * err_cos - sin_p * err_sin;
|
||||
// Double-angle identity for 38 kHz carrier: sin(2θ) = 2·sin·cos,
|
||||
// cos(2θ) = 2·cos²-1. Eliminates the second sin_cos call entirely.
|
||||
let sin_2p = 2.0 * sin_est * cos_est;
|
||||
let cos_2p = 2.0 * cos_est * cos_est - 1.0;
|
||||
let x_notched = self.diff_pilot_notch.process(x);
|
||||
let diff_i = self
|
||||
.diff_dc
|
||||
.process(self.diff_lpf3.process(self.diff_lpf2.process(self.diff_lpf1.process(x_notched * (cos_2p * 2.0)))));
|
||||
.process(self.diff_lpf2.process(self.diff_lpf1.process(x_notched * (cos_2p * 2.0))));
|
||||
let diff_q = self
|
||||
.diff_q_dc
|
||||
.process(self.diff_q_lpf3.process(self.diff_q_lpf2.process(self.diff_q_lpf1.process(x_notched * (-sin_2p * 2.0)))));
|
||||
.process(self.diff_q_lpf2.process(self.diff_q_lpf1.process(x_notched * (-sin_2p * 2.0))));
|
||||
|
||||
// --- Polyphase FIR fractional resampling ---
|
||||
// This uses a short windowed-sinc bank instead of cubic interpolation
|
||||
@@ -777,7 +786,7 @@ impl WfmStereoDecoder {
|
||||
self.sum_hist[pos] = sum;
|
||||
self.diff_hist[pos] = diff_i;
|
||||
self.diff_q_hist[pos] = diff_q;
|
||||
self.hist_pos = (pos + 1) % WFM_RESAMP_TAPS;
|
||||
self.hist_pos = (pos + 1) & (WFM_RESAMP_TAPS - 1);
|
||||
|
||||
let prev_phase = self.output_phase;
|
||||
self.output_phase += self.output_phase_inc;
|
||||
@@ -866,15 +875,12 @@ impl WfmStereoDecoder {
|
||||
self.pilot_bpf.reset();
|
||||
self.sum_lpf1.reset();
|
||||
self.sum_lpf2.reset();
|
||||
self.sum_lpf3.reset();
|
||||
self.sum_notch.reset();
|
||||
self.diff_pilot_notch.reset();
|
||||
self.diff_lpf1.reset();
|
||||
self.diff_lpf2.reset();
|
||||
self.diff_lpf3.reset();
|
||||
self.diff_q_lpf1.reset();
|
||||
self.diff_q_lpf2.reset();
|
||||
self.diff_q_lpf3.reset();
|
||||
self.diff_dc.reset();
|
||||
self.diff_q_dc.reset();
|
||||
self.dc_m.reset();
|
||||
|
||||
Reference in New Issue
Block a user