From 08b8c80cc311b01fddd0b37b9f26929ca6515edc Mon Sep 17 00:00:00 2001
From: Stan Grams <sjg@haxx.space>
Date: Sun, 1 Mar 2026 13:55:48 +0100
Subject: [PATCH] [refactor](workspace): split soapysdr demod and dsp modules

Split the SoapySDR backend demod and dsp code into focused modules while keeping behavior stable, and include the resulting formatting updates.

Co-authored-by: OpenAI Codex <codex@openai.com>
Signed-off-by: Stan Grams <sjg@haxx.space>
---
 src/decoders/trx-rds/src/lib.rs               |   25 +-
 src/trx-client/src/config.rs                  |    4 +-
 src/trx-client/src/main.rs                    |    3 +-
 src/trx-client/src/remote_client.rs           |   28 +-
 .../trx-frontend/trx-frontend-http/src/api.rs |   11 +-
 src/trx-server/src/main.rs                    |   10 +-
 .../trx-backend-soapysdr/src/demod.rs         | 1819 +----------------
 .../trx-backend-soapysdr/src/demod/am.rs      |   56 +
 .../trx-backend-soapysdr/src/demod/fm.rs      |   59 +
 .../trx-backend-soapysdr/src/demod/math.rs    |   87 +
 .../src/demod/math_arm.rs                     |   17 +
 .../src/demod/math_x86.rs                     |  136 ++
 .../trx-backend-soapysdr/src/demod/ssb.rs     |   78 +
 .../trx-backend-soapysdr/src/demod/wfm.rs     | 1125 ++++++++++
 .../trx-backend-soapysdr/src/dsp.rs           |  892 +-------
 .../trx-backend-soapysdr/src/dsp/channel.rs   |  485 +++++
 .../trx-backend-soapysdr/src/dsp/filter.rs    |  300 +++
 17 files changed, 2438 insertions(+), 2697 deletions(-)
 create mode 100644 src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/am.rs
 create mode 100644 src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/fm.rs
 create mode 100644 src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/math.rs
 create mode 100644 src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/math_arm.rs
 create mode 100644 src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/math_x86.rs
 create mode 100644 src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/ssb.rs
 create mode 100644 src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/wfm.rs
 create mode 100644 src/trx-server/trx-backend/trx-backend-soapysdr/src/dsp/channel.rs
 create mode 100644 src/trx-server/trx-backend/trx-backend-soapysdr/src/dsp/filter.rs

diff --git a/src/decoders/trx-rds/src/lib.rs b/src/decoders/trx-rds/src/lib.rs
index 4fe70b6..9b1604e 100644
--- a/src/decoders/trx-rds/src/lib.rs
+++ b/src/decoders/trx-rds/src/lib.rs
@@ -239,7 +239,13 @@ impl Candidate {
                 self.locked = false;
                 self.search_bits = 0;
                 self.search_reg = 0;
-                self.process_group(self.block_a, self.block_b, self.block_c, self.block_c_kind, data)
+                self.process_group(
+                    self.block_a,
+                    self.block_b,
+                    self.block_c,
+                    self.block_c_kind,
+                    data,
+                )
             }
             (_, BlockKind::A) => {
                 self.locked = true;
@@ -355,7 +361,9 @@ impl Candidate {
             self.ps_bytes[segment * 2 + 1] = sanitize_text_byte(b1);
             self.ps_seen[segment] = true;
             if self.ps_seen.iter().all(|seen| *seen) {
-                let ps = String::from_utf8_lossy(&self.ps_bytes).trim_end().to_string();
+                let ps = String::from_utf8_lossy(&self.ps_bytes)
+                    .trim_end()
+                    .to_string();
                 if !ps.is_empty() && self.state.program_service.as_deref() != Some(ps.as_str()) {
                     self.state.program_service = Some(ps);
                     changed = true;
@@ -395,7 +403,9 @@ impl Candidate {
                 } else {
                     (last_seen + 1) * 4
                 };
-                let rt = String::from_utf8_lossy(&self.rt_bytes[..rt_len]).trim_end().to_string();
+                let rt = String::from_utf8_lossy(&self.rt_bytes[..rt_len])
+                    .trim_end()
+                    .to_string();
                 if !rt.is_empty() && self.state.radio_text.as_deref() != Some(rt.as_str()) {
                     self.state.radio_text = Some(rt);
                     changed = true;
@@ -414,7 +424,9 @@ impl Candidate {
                 self.ptyn_seen[segment] = true;
             }
             if self.ptyn_seen.iter().all(|seen| *seen) {
-                let ptyn = String::from_utf8_lossy(&self.ptyn_bytes).trim_end().to_string();
+                let ptyn = String::from_utf8_lossy(&self.ptyn_bytes)
+                    .trim_end()
+                    .to_string();
                 if !ptyn.is_empty()
                     && self.state.program_type_name_long.as_deref() != Some(ptyn.as_str())
                 {
@@ -514,7 +526,10 @@ impl RdsDecoder {
                 if candidate.score >= self.best_score {
                     self.best_score = candidate.score;
                     let same_pi = self.best_state.as_ref().and_then(|state| state.pi) == update.pi;
-                    if publish_quality >= MIN_PUBLISH_QUALITY || same_pi || self.best_state.is_none() {
+                    if publish_quality >= MIN_PUBLISH_QUALITY
+                        || same_pi
+                        || self.best_state.is_none()
+                    {
                         self.best_state = Some(update);
                     }
                 }
diff --git a/src/trx-client/src/config.rs b/src/trx-client/src/config.rs
index 7aebda7..3c510b6 100644
--- a/src/trx-client/src/config.rs
+++ b/src/trx-client/src/config.rs
@@ -337,7 +337,9 @@ impl ClientConfig {
         }
         if let Some(rig_id) = &self.frontends.http.default_rig_id {
             if rig_id.trim().is_empty() {
-                return Err("[frontends.http].default_rig_id must not be empty when set".to_string());
+                return Err(
+                    "[frontends.http].default_rig_id must not be empty when set".to_string()
+                );
             }
         }
         if self.frontends.rigctl.enabled && self.frontends.rigctl.rig_ports.is_empty() {
diff --git a/src/trx-client/src/main.rs b/src/trx-client/src/main.rs
index 820536c..236ae67 100644
--- a/src/trx-client/src/main.rs
+++ b/src/trx-client/src/main.rs
@@ -353,8 +353,7 @@ async fn async_init() -> DynResult<AppState> {
                     first = false;
                 }
                 // Proxy channel: inject rig_id_override before forwarding to main tx.
-                let (proxy_tx, mut proxy_rx) =
-                    mpsc::channel::<RigRequest>(RIG_TASK_CHANNEL_BUFFER);
+                let (proxy_tx, mut proxy_rx) = mpsc::channel::<RigRequest>(RIG_TASK_CHANNEL_BUFFER);
                 let main_tx = tx.clone();
                 let rig_id_owned = rig_id.clone();
                 tokio::spawn(async move {
diff --git a/src/trx-client/src/remote_client.rs b/src/trx-client/src/remote_client.rs
index a7e4f7f..329463e 100644
--- a/src/trx-client/src/remote_client.rs
+++ b/src/trx-client/src/remote_client.rs
@@ -250,16 +250,25 @@ async fn send_command_no_state_update(
         writer.write_all(format!("{}\n", payload).as_bytes()),
     )
     .await
-    .map_err(|_| RigError::communication(format!("write timed out after {:?}", SPECTRUM_IO_TIMEOUT)))?
+    .map_err(|_| {
+        RigError::communication(format!("write timed out after {:?}", SPECTRUM_IO_TIMEOUT))
+    })?
     .map_err(|e| RigError::communication(format!("write failed: {e}")))?;
     time::timeout(SPECTRUM_IO_TIMEOUT, writer.flush())
         .await
-        .map_err(|_| RigError::communication(format!("flush timed out after {:?}", SPECTRUM_IO_TIMEOUT)))?
+        .map_err(|_| {
+            RigError::communication(format!("flush timed out after {:?}", SPECTRUM_IO_TIMEOUT))
+        })?
         .map_err(|e| RigError::communication(format!("flush failed: {e}")))?;
-    let line = time::timeout(SPECTRUM_IO_TIMEOUT, read_limited_line(reader, MAX_JSON_LINE_BYTES))
-        .await
-        .map_err(|_| RigError::communication(format!("read timed out after {:?}", SPECTRUM_IO_TIMEOUT)))?
-        .map_err(|e| RigError::communication(format!("read failed: {e}")))?;
+    let line = time::timeout(
+        SPECTRUM_IO_TIMEOUT,
+        read_limited_line(reader, MAX_JSON_LINE_BYTES),
+    )
+    .await
+    .map_err(|_| {
+        RigError::communication(format!("read timed out after {:?}", SPECTRUM_IO_TIMEOUT))
+    })?
+    .map_err(|e| RigError::communication(format!("read failed: {e}")))?;
     let line = line.ok_or_else(|| RigError::communication("connection closed by remote"))?;
     let resp: ClientResponse = serde_json::from_str(line.trim_end())
         .map_err(|e| RigError::communication(format!("invalid response: {e}")))?;
@@ -386,7 +395,12 @@ fn should_poll_spectrum(config: &RemoteClientConfig) -> bool {
         .known_rigs
         .lock()
         .ok()
-        .and_then(|entries| entries.iter().find(|entry| entry.rig_id == selected).cloned())
+        .and_then(|entries| {
+            entries
+                .iter()
+                .find(|entry| entry.rig_id == selected)
+                .cloned()
+        })
         .map(|entry| entry.state.initialized)
         .unwrap_or(true)
 }
diff --git a/src/trx-client/trx-frontend/trx-frontend-http/src/api.rs b/src/trx-client/trx-frontend/trx-frontend-http/src/api.rs
index 0e7f334..4744565 100644
--- a/src/trx-client/trx-frontend/trx-frontend-http/src/api.rs
+++ b/src/trx-client/trx-frontend/trx-frontend-http/src/api.rs
@@ -89,7 +89,10 @@ fn inject_frontend_meta(json: &str, meta: FrontendMeta) -> String {
     serde_json::to_string(&value).unwrap_or_else(|_| json.to_string())
 }
 
-fn frontend_meta_from_context(http_clients: usize, context: &FrontendRuntimeContext) -> FrontendMeta {
+fn frontend_meta_from_context(
+    http_clients: usize,
+    context: &FrontendRuntimeContext,
+) -> FrontendMeta {
     FrontendMeta {
         http_clients,
         rigctl_clients: context.rigctl_clients.load(Ordering::Relaxed),
@@ -314,11 +317,7 @@ pub async fn spectrum(
         IntervalStream::new(time::interval(Duration::from_millis(40))).filter_map(move |_| {
             let context = context_updates.clone();
             std::future::ready({
-                let next = context
-                    .spectrum
-                    .lock()
-                    .ok()
-                    .map(|g| g.snapshot());
+                let next = context.spectrum.lock().ok().map(|g| g.snapshot());
 
                 let payload = match next {
                     Some((revision, _frame)) if last_revision == Some(revision) => None,
diff --git a/src/trx-server/src/main.rs b/src/trx-server/src/main.rs
index 7643914..455db0d 100644
--- a/src/trx-server/src/main.rs
+++ b/src/trx-server/src/main.rs
@@ -820,11 +820,11 @@ async fn main() -> DynResult<()> {
         #[cfg(feature = "soapysdr")]
         let (sdr_prebuilt_rig, sdr_pcm_rx): (OptionalSdrRig, OptionalSdrPcmRx) =
             if rig_cfg.rig.access.access_type.as_deref() == Some("sdr") {
-            let (rig, pcm_rx) = build_sdr_rig_from_instance(rig_cfg)?;
-            (Some(rig), Some(pcm_rx))
-        } else {
-            (None, None)
-        };
+                let (rig, pcm_rx) = build_sdr_rig_from_instance(rig_cfg)?;
+                (Some(rig), Some(pcm_rx))
+            } else {
+                (None, None)
+            };
 
         #[cfg(not(feature = "soapysdr"))]
         let (sdr_prebuilt_rig, sdr_pcm_rx): (OptionalSdrRig, OptionalSdrPcmRx) = (None, None);
diff --git a/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod.rs b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod.rs
index 99c49d3..c5ab076 100644
--- a/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod.rs
+++ b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod.rs
@@ -2,274 +2,20 @@
 //
 // SPDX-License-Identifier: BSD-2-Clause
 
+mod am;
+mod fm;
+mod math;
+mod math_arm;
+mod math_x86;
+mod ssb;
+mod wfm;
+
 use num_complex::Complex;
-use trx_core::rig::state::{RdsData, RigMode};
-use trx_rds::RdsDecoder;
+use trx_core::rig::state::RigMode;
 
-const RDS_SUBCARRIER_HZ: f32 = 57_000.0;
-const RDS_BPF_Q: f32 = 10.0;
-/// Pilot tone frequency (Hz).
-const PILOT_HZ: f32 = 19_000.0;
-/// Audio bandwidth for WFM (Hz).
-/// 15.8 kHz leaves guard band below the 19 kHz pilot and reduces top-end
-/// artifacts on strong signals while preserving the useful broadcast range.
-const AUDIO_BW_HZ: f32 = 18_000.0;
-/// Stereo L-R subchannel bandwidth for WFM (Hz).
-/// Must match AUDIO_BW_HZ so the sum and diff filter paths have identical
-/// group delay, which is critical for stereo separation across all frequencies.
-const STEREO_DIFF_BW_HZ: f32 = AUDIO_BW_HZ;
-/// Q values for a 4th-order Butterworth cascade (two 2nd-order stages).
-/// Stage 1: Q = 1 / (2 cos(π/8))
-const BW4_Q1: f32 = 0.5412;
-/// Stage 2: Q = 1 / (2 cos(3π/8))
-const BW4_Q2: f32 = 1.3066;
-/// Q for the 19 kHz pilot notch (~3.8 kHz 3 dB bandwidth).
-const PILOT_NOTCH_Q: f32 = 5.0;
-/// Narrow 19 kHz band-pass used to derive zero-crossings for switching stereo demod.
-const PILOT_BPF_Q: f32 = 20.0;
-/// Fixed phase trim on the recovered L-R channel to compensate pilot-path delay.
-const STEREO_SEPARATION_PHASE_TRIM: f32 = 0.0;
-/// Fixed gain trim on the recovered L-R channel.
-const STEREO_SEPARATION_GAIN: f32 = 1.000;
-/// Extra headroom in the stereo matrix to reduce stereo-only clipping/IMD on
-/// strong program material. This keeps bass excursions from flattening treble.
-const STEREO_MATRIX_GAIN: f32 = 1.20;
-/// Stereo detection runs every N composite samples to reduce CPU.
-const STEREO_DETECT_DECIMATION: u32 = 16;
-
-// ---------------------------------------------------------------------------
-// Stereo denoise constants
-// ---------------------------------------------------------------------------
-
-/// Number of frequency subbands for stereo denoise processing.
-const DENOISE_BANDS: usize = 6;
-/// Center frequencies (Hz) of each denoise subband.
-/// Spaced to cover the full 0–15 kHz audio range with overlapping skirts.
-const DENOISE_CENTERS: [f32; DENOISE_BANDS] = [250.0, 800.0, 2500.0, 5500.0, 10000.0, 16000.0];
-/// Q values for band-pass filters in each denoise subband.
-/// Low Q values produce wide overlapping bands for better spectral coverage.
-const DENOISE_Q: [f32; DENOISE_BANDS] = [0.3, 0.35, 0.4, 0.5, 0.6, 0.7];
-/// Noise power smoothing cutoff (Hz) — slow, noise is stationary.
-const DENOISE_NOISE_SMOOTH_HZ: f32 = 10.0;
-/// Signal power smoothing cutoff (Hz) — faster for transients.
-const DENOISE_SIGNAL_SMOOTH_HZ: f32 = 30.0;
-/// Diff noise correction factor.
-const DENOISE_BETA: f32 = 1.0;
-/// Sum noise correction factor.
-const DENOISE_ALPHA: f32 = 0.5;
-/// Noise floor to prevent division by zero.
-const DENOISE_FLOOR: f32 = 1e-10;
-/// Knee for SNR-based amplitude weighting.
-const DENOISE_KNEE: f32 = 4.0;
-/// Gentle high-pass memory for the stereo L-R path.
-/// This trims only very low-frequency difference energy that can eat headroom
-/// and modulate higher-frequency stereo detail.
-const STEREO_DIFF_DC_R: f32 = 0.9995;
-/// Fractional-resampler FIR taps for WFM audio reconstruction.
-const WFM_RESAMP_TAPS: usize = 32;
-/// Polyphase slots for the WFM fractional FIR resampler.
-const WFM_RESAMP_PHASES: usize = 64;
-fn build_wfm_resample_bank(cutoff: f32) -> [[f32; WFM_RESAMP_TAPS]; WFM_RESAMP_PHASES] {
-    let mut bank = [[0.0; WFM_RESAMP_TAPS]; WFM_RESAMP_PHASES];
-    let anchor = (WFM_RESAMP_TAPS / 2 - 1) as f32;
-    for (phase_idx, phase) in bank.iter_mut().enumerate() {
-        let frac = phase_idx as f32 / WFM_RESAMP_PHASES as f32;
-        let center = anchor + frac;
-        let mut sum = 0.0_f32;
-        for (tap_idx, coeff) in phase.iter_mut().enumerate() {
-            let x = tap_idx as f32 - center;
-            let sinc = if x.abs() < 1e-6 {
-                cutoff
-            } else {
-                let arg = std::f32::consts::PI * x * cutoff;
-                arg.sin() / (std::f32::consts::PI * x)
-            };
-            let window = if WFM_RESAMP_TAPS == 1 {
-                1.0
-            } else {
-                let pos = tap_idx as f32 / (WFM_RESAMP_TAPS - 1) as f32;
-                let tw = 2.0 * std::f32::consts::PI * pos;
-                0.35875 - 0.48829 * tw.cos()
-                    + 0.14128 * (2.0 * tw).cos()
-                    - 0.01168 * (3.0 * tw).cos()
-            };
-            *coeff = sinc * window;
-            sum += *coeff;
-        }
-        if sum.abs() > 1e-9 {
-            let inv = 1.0 / sum;
-            for coeff in phase.iter_mut() {
-                *coeff *= inv;
-            }
-        }
-    }
-    bank
-}
-
-/// Polyphase FIR resample from a circular buffer.
-/// `pos` points to the oldest sample (next write position).
-#[inline]
-fn polyphase_resample_ring(
-    hist: &[f32; WFM_RESAMP_TAPS],
-    pos: usize,
-    bank: &[[f32; WFM_RESAMP_TAPS]; WFM_RESAMP_PHASES],
-    frac: f32,
-) -> f32 {
-    let phase = (frac.clamp(0.0, 0.999_999) * WFM_RESAMP_PHASES as f32).round() as usize;
-    let phase = phase.min(WFM_RESAMP_PHASES - 1);
-    let coeffs = &bank[phase];
-    let mut acc = 0.0_f32;
-    let mask = WFM_RESAMP_TAPS - 1; // power-of-2 bitmask
-    for tap in 0..WFM_RESAMP_TAPS {
-        acc += hist[(pos + tap) & mask] * coeffs[tap];
-    }
-    acc
-}
-
-#[inline]
-fn fast_atan2(y: f32, x: f32) -> f32 {
-    if x == 0.0 {
-        if y > 0.0 {
-            return std::f32::consts::FRAC_PI_2;
-        }
-        if y < 0.0 {
-            return -std::f32::consts::FRAC_PI_2;
-        }
-        return 0.0;
-    }
-
-    #[inline]
-    fn fast_atan(z: f32) -> f32 {
-        let abs_z = z.abs();
-        if abs_z <= 1.0 {
-            z * (std::f32::consts::FRAC_PI_4 + 0.273 * (1.0 - abs_z))
-        } else {
-            let inv = 1.0 / z;
-            let base = inv * (std::f32::consts::FRAC_PI_4 + 0.273 * (1.0 - inv.abs()));
-            if z > 0.0 {
-                std::f32::consts::FRAC_PI_2 - base
-            } else {
-                -std::f32::consts::FRAC_PI_2 - base
-            }
-        }
-    }
-
-    if x > 0.0 {
-        fast_atan(y / x)
-    } else if x < 0.0 {
-        if y >= 0.0 {
-            fast_atan(y / x) + std::f32::consts::PI
-        } else {
-            fast_atan(y / x) - std::f32::consts::PI
-        }
-    } else {
-        0.0
-    }
-}
-
-/// 7th-order minimax atan approximation for |z| ≤ 1.
-/// Max error ≈ 2.4e-7 rad (~0.000014°).
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-#[target_feature(enable = "avx2")]
-unsafe fn atan_poly_avx2(z: std::arch::x86_64::__m256) -> std::arch::x86_64::__m256 {
-    #[cfg(target_arch = "x86")]
-    use std::arch::x86::*;
-    #[cfg(target_arch = "x86_64")]
-    use std::arch::x86_64::*;
-
-    // Coefficients for atan(z) ≈ z·(c0 + z²·(c1 + z²·(c2 + z²·c3)))
-    let c0 = _mm256_set1_ps(0.999_999_5_f32);
-    let c1 = _mm256_set1_ps(-0.333_326_1_f32);
-    let c2 = _mm256_set1_ps(0.199_777_1_f32);
-    let c3 = _mm256_set1_ps(-0.138_776_8_f32);
-
-    let z2 = _mm256_mul_ps(z, z);
-    // Horner: c2 + z²·c3
-    let p = _mm256_add_ps(c2, _mm256_mul_ps(z2, c3));
-    // c1 + z²·p
-    let p = _mm256_add_ps(c1, _mm256_mul_ps(z2, p));
-    // c0 + z²·p
-    let p = _mm256_add_ps(c0, _mm256_mul_ps(z2, p));
-    // z · p
-    _mm256_mul_ps(z, p)
-}
-
-/// Branchless AVX2 atan2 using argument reduction and 7th-order polynomial.
-///
-/// Uses the |y| > |x| swap technique to keep the atan input in [-1, 1],
-/// then corrects with π/2 and sign adjustments.  No divisions by zero,
-/// no NaN branches — pure SIMD throughput.
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-#[target_feature(enable = "avx2")]
-unsafe fn fast_atan2_8_avx2(
-    y: std::arch::x86_64::__m256,
-    x: std::arch::x86_64::__m256,
-) -> std::arch::x86_64::__m256 {
-    #[cfg(target_arch = "x86")]
-    use std::arch::x86::*;
-    #[cfg(target_arch = "x86_64")]
-    use std::arch::x86_64::*;
-
-    let abs_mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7FFF_FFFF_u32 as i32));
-    let sign_mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x8000_0000_u32 as i32));
-    let pi = _mm256_set1_ps(std::f32::consts::PI);
-    let pi_2 = _mm256_set1_ps(std::f32::consts::FRAC_PI_2);
-
-    let abs_y = _mm256_and_ps(y, abs_mask);
-    let abs_x = _mm256_and_ps(x, abs_mask);
-
-    // If |y| > |x|, swap so the atan input is in [-1, 1].
-    let swap_mask = _mm256_cmp_ps(abs_y, abs_x, _CMP_GT_OS);
-    let num = _mm256_blendv_ps(y, x, swap_mask);
-    let den = _mm256_blendv_ps(x, y, swap_mask);
-
-    // Add tiny epsilon to denominator to avoid 0/0 NaN when both x and y are zero.
-    let eps = _mm256_set1_ps(1.0e-30);
-    let safe_den = _mm256_or_ps(den, _mm256_and_ps(
-        _mm256_cmp_ps(den, _mm256_setzero_ps(), _CMP_EQ_OQ),
-        eps,
-    ));
-    let atan_input = _mm256_div_ps(num, safe_den);
-    let mut result = atan_poly_avx2(atan_input);
-
-    // If swapped, result = copysign(π/2, atan_input) - result
-    let adj = _mm256_sub_ps(
-        _mm256_or_ps(pi_2, _mm256_and_ps(atan_input, sign_mask)),
-        result,
-    );
-    result = _mm256_blendv_ps(result, adj, swap_mask);
-
-    // Quadrant correction: if x < 0, add ±π (sign from y).
-    let x_sign_mask = _mm256_castsi256_ps(_mm256_srai_epi32(
-        _mm256_castps_si256(x),
-        31,
-    ));
-    let correction = _mm256_and_ps(
-        _mm256_xor_ps(pi, _mm256_and_ps(sign_mask, y)),
-        x_sign_mask,
-    );
-    _mm256_add_ps(result, correction)
-}
-
-#[derive(Debug, Clone)]
-struct OnePoleLowPass {
-    alpha: f32,
-    y: f32,
-}
-
-#[derive(Debug, Clone)]
-struct BiquadBandPass {
-    b0: f32,
-    b1: f32,
-    b2: f32,
-    a1: f32,
-    a2: f32,
-    x1: f32,
-    x2: f32,
-    y1: f32,
-    y2: f32,
-}
+pub use self::wfm::WfmStereoDecoder;
 
+/// Shared DC blocker used by narrowband and WFM audio paths.
 #[derive(Debug, Clone)]
 pub(crate) struct DcBlocker {
     r: f32,
@@ -302,10 +48,8 @@ impl DcBlocker {
 /// Soft AGC with a fast-attack / slow-release envelope follower.
 ///
 /// Tracks the signal envelope and adjusts gain so the output level converges
-/// toward `target`.  Gain decreases quickly when the signal is louder than
-/// the target (prevents clipping) and recovers slowly during quieter periods
-/// (avoids pumping noise).  A `max_gain` cap prevents excessive amplification
-/// of noise during silence.
+/// toward `target`. Gain decreases quickly when the signal is louder than
+/// the target and recovers slowly during quieter periods.
 #[derive(Debug, Clone)]
 pub(crate) struct SoftAgc {
     gain: f32,
@@ -318,12 +62,6 @@ pub(crate) struct SoftAgc {
 
 impl SoftAgc {
     /// Create a new `SoftAgc`.
-    ///
-    /// - `sample_rate`: audio sample rate in Hz
-    /// - `attack_ms`:   envelope follower attack time (fast, e.g. 1–10 ms)
-    /// - `release_ms`:  envelope follower release time (slow, e.g. 50–1000 ms)
-    /// - `target`:      desired peak output level (e.g. `0.5`)
-    /// - `max_gain_db`: maximum gain cap in dB (e.g. `30.0`)
     pub(crate) fn new(
         sample_rate: f32,
         attack_ms: f32,
@@ -345,7 +83,6 @@ impl SoftAgc {
     }
 
     fn update_gain(&mut self, level: f32) -> f32 {
-        // Update envelope tracker (peak-hold with attack/release).
         let env_coeff = if level > self.envelope {
             self.attack_coeff
         } else {
@@ -353,7 +90,6 @@ impl SoftAgc {
         };
         self.envelope += env_coeff * (level - self.envelope);
 
-        // Compute desired gain; fast response when reducing, slow when recovering.
         if self.envelope > 1e-6 {
             let desired = (self.target / self.envelope).min(self.max_gain);
             let gain_coeff = if desired < self.gain {
@@ -392,737 +128,6 @@ impl SoftAgc {
     }
 }
 
-impl BiquadBandPass {
-    fn new(sample_rate: f32, center_hz: f32, q: f32) -> Self {
-        let sr = sample_rate.max(1.0);
-        let center = center_hz.clamp(100.0, sr * 0.45);
-        let q = q.max(0.2);
-        let w0 = 2.0 * std::f32::consts::PI * center / sr;
-        let alpha = w0.sin() / (2.0 * q);
-        let cos_w0 = w0.cos();
-
-        let a0 = 1.0 + alpha;
-        let inv_a0 = 1.0 / a0;
-
-        // RBJ band-pass, constant skirt gain.
-        let b0 = alpha * inv_a0;
-        let b1 = 0.0;
-        let b2 = -alpha * inv_a0;
-        let a1 = (-2.0 * cos_w0) * inv_a0;
-        let a2 = (1.0 - alpha) * inv_a0;
-
-        Self {
-            b0,
-            b1,
-            b2,
-            a1,
-            a2,
-            x1: 0.0,
-            x2: 0.0,
-            y1: 0.0,
-            y2: 0.0,
-        }
-    }
-
-    fn process(&mut self, x: f32) -> f32 {
-        let y = self.b0 * x + self.b1 * self.x1 + self.b2 * self.x2 - self.a1 * self.y1 - self.a2 * self.y2;
-        self.x2 = self.x1;
-        self.x1 = x;
-        self.y2 = self.y1;
-        self.y1 = y;
-        y
-    }
-
-    fn reset(&mut self) {
-        self.x1 = 0.0;
-        self.x2 = 0.0;
-        self.y1 = 0.0;
-        self.y2 = 0.0;
-    }
-}
-
-/// 2nd-order IIR low-pass filter (RBJ cookbook).  Use [`BW4_Q1`]/[`BW4_Q2`] in
-/// cascade for a proper 4th-order Butterworth response.
-#[derive(Debug, Clone)]
-struct BiquadLowPass {
-    b0: f32,
-    b1: f32,
-    b2: f32,
-    a1: f32,
-    a2: f32,
-    x1: f32,
-    x2: f32,
-    y1: f32,
-    y2: f32,
-}
-
-impl BiquadLowPass {
-    fn new(sample_rate: f32, cutoff_hz: f32, q: f32) -> Self {
-        let sr = sample_rate.max(1.0);
-        let fc = cutoff_hz.clamp(1.0, sr * 0.45);
-        let q = q.max(0.1);
-        let w0 = 2.0 * std::f32::consts::PI * fc / sr;
-        let alpha = w0.sin() / (2.0 * q);
-        let cos_w0 = w0.cos();
-        let a0_inv = 1.0 / (1.0 + alpha);
-        let b0 = (1.0 - cos_w0) * 0.5 * a0_inv;
-        let b1 = (1.0 - cos_w0) * a0_inv;
-        let b2 = b0;
-        let a1 = -2.0 * cos_w0 * a0_inv;
-        let a2 = (1.0 - alpha) * a0_inv;
-        Self { b0, b1, b2, a1, a2, x1: 0.0, x2: 0.0, y1: 0.0, y2: 0.0 }
-    }
-
-    fn process(&mut self, x: f32) -> f32 {
-        let y = self.b0 * x + self.b1 * self.x1 + self.b2 * self.x2
-            - self.a1 * self.y1 - self.a2 * self.y2;
-        self.x2 = self.x1;
-        self.x1 = x;
-        self.y2 = self.y1;
-        self.y1 = y;
-        y
-    }
-
-    fn reset(&mut self) {
-        self.x1 = 0.0;
-        self.x2 = 0.0;
-        self.y1 = 0.0;
-        self.y2 = 0.0;
-    }
-}
-
-/// 2nd-order IIR notch (band-reject) filter (RBJ cookbook).
-#[derive(Debug, Clone)]
-struct BiquadNotch {
-    b0: f32,
-    b1: f32,
-    b2: f32,
-    a1: f32,
-    a2: f32,
-    x1: f32,
-    x2: f32,
-    y1: f32,
-    y2: f32,
-}
-
-impl BiquadNotch {
-    fn new(sample_rate: f32, center_hz: f32, q: f32) -> Self {
-        let sr = sample_rate.max(1.0);
-        let fc = center_hz.clamp(1.0, sr * 0.45);
-        let q = q.max(0.1);
-        let w0 = 2.0 * std::f32::consts::PI * fc / sr;
-        let alpha = w0.sin() / (2.0 * q);
-        let cos_w0 = w0.cos();
-        let a0_inv = 1.0 / (1.0 + alpha);
-        let b0 = a0_inv;
-        let b1 = -2.0 * cos_w0 * a0_inv;
-        let b2 = a0_inv;
-        let a1 = b1;
-        let a2 = (1.0 - alpha) * a0_inv;
-        Self { b0, b1, b2, a1, a2, x1: 0.0, x2: 0.0, y1: 0.0, y2: 0.0 }
-    }
-
-    fn process(&mut self, x: f32) -> f32 {
-        let y = self.b0 * x + self.b1 * self.x1 + self.b2 * self.x2
-            - self.a1 * self.y1 - self.a2 * self.y2;
-        self.x2 = self.x1;
-        self.x1 = x;
-        self.y2 = self.y1;
-        self.y1 = y;
-        y
-    }
-
-    fn reset(&mut self) {
-        self.x1 = 0.0;
-        self.x2 = 0.0;
-        self.y1 = 0.0;
-        self.y2 = 0.0;
-    }
-}
-
-impl OnePoleLowPass {
-    fn new(sample_rate: f32, cutoff_hz: f32) -> Self {
-        let sr = sample_rate.max(1.0);
-        let cutoff = cutoff_hz.clamp(1.0, sr * 0.49);
-        let dt = 1.0 / sr;
-        let rc = 1.0 / (2.0 * std::f32::consts::PI * cutoff);
-        let alpha = dt / (rc + dt);
-        Self { alpha, y: 0.0 }
-    }
-
-    fn process(&mut self, x: f32) -> f32 {
-        self.y += self.alpha * (x - self.y);
-        self.y
-    }
-
-    fn reset(&mut self) {
-        self.y = 0.0;
-    }
-}
-
-#[derive(Debug, Clone)]
-struct Deemphasis {
-    alpha: f32,
-    y: f32,
-}
-
-impl Deemphasis {
-    fn new(sample_rate: f32, tau_us: f32) -> Self {
-        let sr = sample_rate.max(1.0);
-        let tau = (tau_us.max(1.0)) * 1e-6;
-        let alpha = 1.0 - (-1.0 / (sr * tau)).exp();
-        Self { alpha, y: 0.0 }
-    }
-
-    fn process(&mut self, x: f32) -> f32 {
-        self.y += self.alpha * (x - self.y);
-        self.y
-    }
-
-    fn reset(&mut self) {
-        self.y = 0.0;
-    }
-}
-
-// ---------------------------------------------------------------------------
-// Stereo denoise — frequency-selective diff attenuation
-// ---------------------------------------------------------------------------
-
-/// Per-subband state for stereo denoise processing.
-#[derive(Debug, Clone)]
-struct DenoiseSubband {
-    /// Band-pass filters for sum, diff_i, and diff_q signals.
-    sum_bp: BiquadBandPass,
-    diff_i_bp: BiquadBandPass,
-    diff_q_bp: BiquadBandPass,
-    /// Smoothed noise power estimate (from |diff_q|²).
-    noise_lp: OnePoleLowPass,
-    /// Smoothed diff signal power estimate.
-    diff_lp: OnePoleLowPass,
-    /// Smoothed sum signal power estimate.
-    sum_lp: OnePoleLowPass,
-}
-
-impl DenoiseSubband {
-    fn new(audio_rate: f32, center_hz: f32, q: f32) -> Self {
-        Self {
-            sum_bp: BiquadBandPass::new(audio_rate, center_hz, q),
-            diff_i_bp: BiquadBandPass::new(audio_rate, center_hz, q),
-            diff_q_bp: BiquadBandPass::new(audio_rate, center_hz, q),
-            noise_lp: OnePoleLowPass::new(audio_rate, DENOISE_NOISE_SMOOTH_HZ),
-            diff_lp: OnePoleLowPass::new(audio_rate, DENOISE_SIGNAL_SMOOTH_HZ),
-            sum_lp: OnePoleLowPass::new(audio_rate, DENOISE_SIGNAL_SMOOTH_HZ),
-        }
-    }
-
-    /// Process one sample through this subband.
-    /// Returns `(gain, weight)` where `gain` is the per-band SNR-based
-    /// attenuation factor [0,1] and `weight` is the band's energy contribution
-    /// for computing a weighted-average broadband gain.
-    #[inline]
-    fn process(&mut self, sum: f32, diff_i: f32, diff_q: f32) -> (f32, f32) {
-        let sum_band = self.sum_bp.process(sum);
-        let diff_i_band = self.diff_i_bp.process(diff_i);
-        let diff_q_band = self.diff_q_bp.process(diff_q);
-
-        // Estimate noise power from quadrature component.
-        let noise_raw = diff_q_band * diff_q_band;
-        let noise_power = self.noise_lp.process(noise_raw).max(DENOISE_FLOOR);
-
-        // Estimate diff signal power, correcting for noise.
-        let diff_raw = diff_i_band * diff_i_band;
-        let diff_power = (self.diff_lp.process(diff_raw) - DENOISE_BETA * noise_power).max(0.0);
-
-        // Estimate sum signal power, correcting for noise.
-        let sum_raw = sum_band * sum_band;
-        let sum_power = (self.sum_lp.process(sum_raw) - DENOISE_ALPHA * noise_power).max(0.0);
-
-        // Hden: SNR-based gate — sqrt(diff_power / noise_power), clamped [0,1].
-        let hden = (diff_power / noise_power).sqrt().min(1.0);
-
-        // Weight A: diff SNR vs knee — amplitude weighting.
-        let diff_snr = diff_power / noise_power;
-        let weight_a = diff_snr / (diff_snr + DENOISE_KNEE);
-
-        // Weight B: mono content suppression — only active when noise is present.
-        // When noise is low (clean signal), weight_B → 1.0 (no suppression).
-        // When noise is high, weight_B → diff/(sum+diff) (suppress mono-correlated diff).
-        let noise_indicator = (noise_power / (diff_power + DENOISE_FLOOR)).min(1.0);
-        let weight_b_raw = diff_power / (sum_power + diff_power + DENOISE_FLOOR);
-        let weight_b = 1.0 - noise_indicator * (1.0 - weight_b_raw);
-
-        let gain = hden * weight_a * weight_b;
-        // Use smoothed diff power as band energy weight for broadband averaging.
-        let band_energy = self.diff_lp.y.max(DENOISE_FLOOR);
-        (gain, band_energy)
-    }
-
-    fn reset(&mut self) {
-        self.sum_bp.reset();
-        self.diff_i_bp.reset();
-        self.diff_q_bp.reset();
-        self.noise_lp.reset();
-        self.diff_lp.reset();
-        self.sum_lp.reset();
-    }
-}
-
-/// Frequency-selective stereo denoise processor.
-///
-/// Splits sum, diff_i, and diff_q into N subbands and attenuates the diff
-/// signal per-band based on SNR estimated from the quadrature component
-/// (diff_q), which serves as a noise reference.
-#[derive(Debug, Clone)]
-struct StereoDenoise {
-    bands: [DenoiseSubband; DENOISE_BANDS],
-    enabled: bool,
-}
-
-impl StereoDenoise {
-    fn new(audio_rate: f32) -> Self {
-        let bands = std::array::from_fn(|i| {
-            DenoiseSubband::new(audio_rate, DENOISE_CENTERS[i], DENOISE_Q[i])
-        });
-        Self {
-            bands,
-            enabled: true,
-        }
-    }
-
-    /// Process one sample. Returns the denoised diff_i signal.
-    /// Computes per-band SNR-based gains and applies their energy-weighted
-    /// average to the original diff_i, preserving perfect passthrough when
-    /// all bands have high SNR.
-    /// When disabled, returns `diff_i` unchanged.
-    #[inline]
-    fn process(&mut self, sum: f32, diff_i: f32, diff_q: f32) -> f32 {
-        if !self.enabled {
-            return diff_i;
-        }
-        let mut gain_sum = 0.0_f32;
-        let mut weight_sum = 0.0_f32;
-        for band in &mut self.bands {
-            let (gain, weight) = band.process(sum, diff_i, diff_q);
-            gain_sum += gain * weight;
-            weight_sum += weight;
-        }
-        let broadband_gain = if weight_sum > DENOISE_FLOOR {
-            (gain_sum / weight_sum).clamp(0.0, 1.0)
-        } else {
-            1.0
-        };
-        diff_i * broadband_gain
-    }
-
-    fn reset(&mut self) {
-        for band in &mut self.bands {
-            band.reset();
-        }
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct WfmStereoDecoder {
-    output_channels: usize,
-    stereo_enabled: bool,
-    rds_decoder: RdsDecoder,
-    rds_bpf: BiquadBandPass,
-    rds_dc: DcBlocker,
-    prev_iq: Option<Complex<f32>>,
-    /// Quadrature NCO state: (cos, sin) of pilot phase.
-    nco_cos: f32,
-    nco_sin: f32,
-    /// Precomputed rotation increment: (cos(Δ), sin(Δ)) where Δ = 2π·19000/fs.
-    nco_inc_cos: f32,
-    nco_inc_sin: f32,
-    /// Sample counter for periodic NCO renormalization.
-    nco_counter: u32,
-    pilot_i_lp: OnePoleLowPass,
-    pilot_q_lp: OnePoleLowPass,
-    pilot_abs_lp: OnePoleLowPass,
-    pilot_bpf: BiquadBandPass,
-    /// 4th-order Butterworth cascade for L+R (two 2nd-order stages, Q = BW4_Q1/BW4_Q2).
-    sum_lpf1: BiquadLowPass,
-    sum_lpf2: BiquadLowPass,
-    /// Notch at 19 kHz for the mono output path — keeps pilot tone out of mono
-    /// audio without introducing phase mismatch with the diff channel.
-    sum_notch: BiquadNotch,
-    /// Notch at 19 kHz on composite before diff demod — removes pilot that would
-    /// create intermod products when multiplied by the 38 kHz carrier.
-    diff_pilot_notch: BiquadNotch,
-    /// 4th-order Butterworth cascade for L-R (matched to sum path for stereo phase accuracy).
-    diff_lpf1: BiquadLowPass,
-    diff_lpf2: BiquadLowPass,
-    /// Quadrature companion of the L-R path used for phase trim / crosstalk adjustment.
-    diff_q_lpf1: BiquadLowPass,
-    diff_q_lpf2: BiquadLowPass,
-    /// Gentle high-pass on the stereo-difference path to reduce bass-driven IMD.
-    diff_dc: DcBlocker,
-    diff_q_dc: DcBlocker,
-    /// DC blockers on audio outputs — remove carrier-offset DC from the FM discriminator.
-    dc_m: DcBlocker,
-    dc_l: DcBlocker,
-    dc_r: DcBlocker,
-    deemph_m: Deemphasis,
-    deemph_l: Deemphasis,
-    deemph_r: Deemphasis,
-    /// Smoothed pilot-derived stereo detection strength in [0, 1].
-    stereo_detect_level: f32,
-    /// Hysteretic pilot-lock result used by the UI.
-    stereo_detected: bool,
-    /// Decimation counter for stereo detection (runs every STEREO_DETECT_DECIMATION samples).
-    detect_counter: u32,
-    /// Accumulated pilot magnitude for decimated detection.
-    detect_pilot_mag_acc: f32,
-    /// Accumulated pilot abs for decimated detection.
-    detect_pilot_abs_acc: f32,
-    /// FM discriminator gain normalization.
-    ///
-    /// `demod_fm` outputs `atan2(…)/π ≈ 2·Δf/fs` for small deviations.
-    /// For standard WFM ±75 kHz deviation we want ±1.0 at full deviation,
-    /// so `fm_gain = fs / (2 · 75_000)`.
-    fm_gain: f32,
-    /// Shared coefficient bank for the polyphase fractional audio resampler.
-    resample_bank: [[f32; WFM_RESAMP_TAPS]; WFM_RESAMP_PHASES],
-    /// Circular buffer for polyphase FIR resampling of the sum channel.
-    sum_hist: [f32; WFM_RESAMP_TAPS],
-    /// Circular buffer for polyphase FIR resampling of the diff channel.
-    diff_hist: [f32; WFM_RESAMP_TAPS],
-    /// Circular buffer for polyphase FIR resampling of the quadrature diff channel.
-    diff_q_hist: [f32; WFM_RESAMP_TAPS],
-    /// Write position for the circular history buffers.
-    hist_pos: usize,
-    /// Frequency-selective stereo denoise processor.
-    denoise: StereoDenoise,
-    /// Previous pilot blend sample for simple linear interpolation.
-    prev_blend: f32,
-    /// Fractional phase increment per composite sample = audio_rate / composite_rate.
-    /// Avoids integer-division rate error when composite_rate is not an exact
-    /// multiple of audio_rate (e.g. 250 kHz composite → 48 kHz audio).
-    output_phase_inc: f64,
-    /// Fractional phase accumulator (0 .. 1).  Emits an output sample whenever
-    /// it crosses 1.0, ensuring the long-term rate is exactly audio_rate.
-    output_phase: f64,
-}
-
-impl WfmStereoDecoder {
-    pub fn new(
-        composite_rate: u32,
-        audio_rate: u32,
-        output_channels: usize,
-        stereo_enabled: bool,
-        deemphasis_us: u32,
-    ) -> Self {
-        let composite_rate_f = composite_rate.max(1) as f32;
-        let output_phase_inc = audio_rate.max(1) as f64 / composite_rate.max(1) as f64;
-        let deemphasis_us = deemphasis_us as f32;
-        Self {
-            output_channels: output_channels.max(1),
-            stereo_enabled,
-            rds_decoder: RdsDecoder::new(composite_rate),
-            rds_bpf: BiquadBandPass::new(composite_rate_f, RDS_SUBCARRIER_HZ, RDS_BPF_Q),
-            rds_dc: DcBlocker::new(0.995),
-            prev_iq: None,
-            nco_cos: 1.0,
-            nco_sin: 0.0,
-            nco_inc_cos: (2.0 * std::f32::consts::PI * PILOT_HZ / composite_rate_f).cos(),
-            nco_inc_sin: (2.0 * std::f32::consts::PI * PILOT_HZ / composite_rate_f).sin(),
-            nco_counter: 0,
-            pilot_i_lp: OnePoleLowPass::new(composite_rate_f, 400.0),
-            pilot_q_lp: OnePoleLowPass::new(composite_rate_f, 400.0),
-            pilot_abs_lp: OnePoleLowPass::new(composite_rate_f, 400.0),
-            pilot_bpf: BiquadBandPass::new(composite_rate_f, PILOT_HZ, PILOT_BPF_Q),
-            // 4th-order Butterworth: two cascaded biquads with BW4_Q1/BW4_Q2.
-            // At 19 kHz (pilot): ≈ −12 dB; at 38 kHz (DSB carrier): ≈ −32 dB.
-            sum_lpf1: BiquadLowPass::new(composite_rate_f, AUDIO_BW_HZ, BW4_Q1),
-            sum_lpf2: BiquadLowPass::new(composite_rate_f, AUDIO_BW_HZ, BW4_Q2),
-            sum_notch: BiquadNotch::new(composite_rate_f, PILOT_HZ, PILOT_NOTCH_Q),
-            diff_pilot_notch: BiquadNotch::new(composite_rate_f, PILOT_HZ, PILOT_NOTCH_Q),
-            diff_lpf1: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW4_Q1),
-            diff_lpf2: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW4_Q2),
-            diff_q_lpf1: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW4_Q1),
-            diff_q_lpf2: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW4_Q2),
-            diff_dc: DcBlocker::new(STEREO_DIFF_DC_R),
-            diff_q_dc: DcBlocker::new(STEREO_DIFF_DC_R),
-            dc_m: DcBlocker::new(0.9999),
-            dc_l: DcBlocker::new(0.9999),
-            dc_r: DcBlocker::new(0.9999),
-            deemph_m: Deemphasis::new(audio_rate.max(1) as f32, deemphasis_us),
-            deemph_l: Deemphasis::new(audio_rate.max(1) as f32, deemphasis_us),
-            deemph_r: Deemphasis::new(audio_rate.max(1) as f32, deemphasis_us),
-            stereo_detect_level: 0.0,
-            stereo_detected: false,
-            detect_counter: 0,
-            detect_pilot_mag_acc: 0.0,
-            detect_pilot_abs_acc: 0.0,
-            fm_gain: composite_rate_f / (2.0 * 75_000.0),
-            resample_bank: build_wfm_resample_bank(audio_rate as f32 / composite_rate_f),
-            sum_hist: [0.0; WFM_RESAMP_TAPS],
-            diff_hist: [0.0; WFM_RESAMP_TAPS],
-            diff_q_hist: [0.0; WFM_RESAMP_TAPS],
-            hist_pos: 0,
-            denoise: StereoDenoise::new(audio_rate.max(1) as f32),
-            prev_blend: 0.0,
-            output_phase_inc,
-            output_phase: 0.0,
-        }
-    }
-
-    pub fn process_iq(&mut self, samples: &[Complex<f32>]) -> Vec<f32> {
-        if samples.is_empty() {
-            return Vec::new();
-        }
-
-        // Batch FM discriminator using AVX2 atan2 when available.
-        let disc = demod_fm_with_prev(samples, &mut self.prev_iq);
-
-        let mut output = Vec::with_capacity(
-            ((samples.len() as f64 * self.output_phase_inc).ceil() as usize + 1)
-                * self.output_channels.max(1),
-        );
-        let (trim_sin, trim_cos) = STEREO_SEPARATION_PHASE_TRIM.sin_cos();
-
-        for &disc_sample in &disc {
-            // Normalize discriminator output so ±75 kHz deviation maps to ±1.0.
-            let x = disc_sample * self.fm_gain;
-
-            let pilot_tone = self.pilot_bpf.process(x);
-
-            // --- Pilot phase estimator (quadrature NCO) ---
-            let sin_p = self.nco_sin;
-            let cos_p = self.nco_cos;
-            let i = self.pilot_i_lp.process(pilot_tone * cos_p);
-            let q = self.pilot_q_lp.process(pilot_tone * -sin_p);
-            let pilot_mag = (i * i + q * q).sqrt();
-            // Derive sin/cos of the PLL phase error from the I/Q arms directly.
-            let inv_mag = 1.0 / (pilot_mag + 1e-12);
-            let err_sin = q * inv_mag;
-            let err_cos = i * inv_mag;
-            // Advance NCO via complex rotation: (cos,sin) *= (cos_inc,sin_inc).
-            let new_cos = self.nco_cos * self.nco_inc_cos - self.nco_sin * self.nco_inc_sin;
-            let new_sin = self.nco_cos * self.nco_inc_sin + self.nco_sin * self.nco_inc_cos;
-            self.nco_cos = new_cos;
-            self.nco_sin = new_sin;
-            // Renormalize NCO every 1024 samples to prevent drift.
-            self.nco_counter += 1;
-            if self.nco_counter >= 1024 {
-                self.nco_counter = 0;
-                let mag = (self.nco_cos * self.nco_cos + self.nco_sin * self.nco_sin).sqrt();
-                let inv = 1.0 / mag;
-                self.nco_cos *= inv;
-                self.nco_sin *= inv;
-            }
-
-            // --- Decimated stereo detection ---
-            let pilot_abs = self.pilot_abs_lp.process(pilot_tone.abs());
-            self.detect_pilot_mag_acc += pilot_mag;
-            self.detect_pilot_abs_acc += pilot_abs;
-            self.detect_counter += 1;
-            if self.detect_counter >= STEREO_DETECT_DECIMATION {
-                let inv_n = 1.0 / STEREO_DETECT_DECIMATION as f32;
-                let avg_mag = self.detect_pilot_mag_acc * inv_n;
-                let avg_abs = self.detect_pilot_abs_acc * inv_n;
-                let pilot_coherence = (avg_mag / (avg_abs + 1e-4)).clamp(0.0, 1.0);
-                let pilot_lock = ((pilot_coherence - 0.4) / 0.2).clamp(0.0, 1.0);
-                let stereo_drive = (avg_mag * pilot_lock * 120.0).clamp(0.0, 1.0);
-                let detect_coeff = if stereo_drive > self.stereo_detect_level {
-                    0.0008 * STEREO_DETECT_DECIMATION as f32
-                } else {
-                    0.00005 * STEREO_DETECT_DECIMATION as f32
-                };
-                self.stereo_detect_level +=
-                    detect_coeff * (stereo_drive - self.stereo_detect_level);
-                if self.stereo_detected {
-                    if self.stereo_detect_level < 0.22 {
-                        self.stereo_detected = false;
-                    }
-                } else if self.stereo_detect_level > 0.6 {
-                    self.stereo_detected = true;
-                }
-                self.detect_counter = 0;
-                self.detect_pilot_mag_acc = 0.0;
-                self.detect_pilot_abs_acc = 0.0;
-            }
-            let stereo_blend_target = if self.stereo_detected {
-                1.0
-            } else {
-                0.0
-            };
-
-            // --- RDS ---
-            let rds_quality = (0.35 + pilot_mag * 20.0).clamp(0.35, 1.0);
-            let rds_band = self.rds_bpf.process(x);
-            let rds_clean = self.rds_dc.process(rds_band);
-            let _ = self.rds_decoder.process_sample(rds_clean, rds_quality);
-
-            // --- L+R (sum): 4th-order Butterworth ---
-            // The pilot notch is NOT applied here so the sum and diff paths have
-            // identical phase responses, which is required for good stereo separation.
-            // The notch is applied only on the mono output path where phase matching
-            // with the diff channel is irrelevant.
-            let sum = self.sum_lpf2.process(self.sum_lpf1.process(x));
-
-            // --- L-R (diff): 38 kHz demod + 6th-order Butterworth (unblended) ---
-            // Notch the 19 kHz pilot from the composite before multiplying by the
-            // 38 kHz carrier to prevent pilot×carrier intermod products.
-            // Blend is applied per-band at audio rate in the emit step below.
-            // Reconstruct sin/cos of the estimated pilot phase from the NCO
-            // sin_p/cos_p rotated by the PLL error (whose sin/cos we derived
-            // from the I/Q arms above, avoiding a second sin_cos call).
-            let sin_est = sin_p * err_cos + cos_p * err_sin;
-            let cos_est = cos_p * err_cos - sin_p * err_sin;
-            // Double-angle identity for 38 kHz carrier: sin(2θ) = 2·sin·cos,
-            // cos(2θ) = 2·cos²-1. Eliminates the second sin_cos call entirely.
-            let sin_2p = 2.0 * sin_est * cos_est;
-            let cos_2p = 2.0 * cos_est * cos_est - 1.0;
-            let x_notched = self.diff_pilot_notch.process(x);
-            let diff_i = self
-                .diff_dc
-                .process(self.diff_lpf2.process(self.diff_lpf1.process(x_notched * (cos_2p * 2.0))));
-            let diff_q = self
-                .diff_q_dc
-                .process(self.diff_q_lpf2.process(self.diff_q_lpf1.process(x_notched * (-sin_2p * 2.0))));
-
-            // --- Polyphase FIR fractional resampling ---
-            // This uses a short windowed-sinc bank instead of cubic interpolation
-            // to reduce top-end overshoot/ringing near the audio cutoff.
-            // Circular buffer: O(1) write instead of O(N) shift.
-            let pos = self.hist_pos;
-            self.sum_hist[pos] = sum;
-            self.diff_hist[pos] = diff_i;
-            self.diff_q_hist[pos] = diff_q;
-            self.hist_pos = (pos + 1) & (WFM_RESAMP_TAPS - 1);
-
-            let prev_phase = self.output_phase;
-            self.output_phase += self.output_phase_inc;
-            if self.output_phase < 1.0 {
-                self.prev_blend = stereo_blend_target;
-                continue;
-            }
-            self.output_phase -= 1.0;
-
-            // `frac` positions the output sample within the current fractional
-            // interval. The FIR bank reconstructs a band-limited sample using
-            // a fixed two-sample lookahead in the decoder.
-            let frac = ((1.0 - prev_phase) / self.output_phase_inc) as f32;
-            let ring_pos = self.hist_pos; // oldest sample = next write position
-            let sum_i = polyphase_resample_ring(&self.sum_hist, ring_pos, &self.resample_bank, frac);
-            let diff_i = polyphase_resample_ring(&self.diff_hist, ring_pos, &self.resample_bank, frac);
-            let diff_q = polyphase_resample_ring(&self.diff_q_hist, ring_pos, &self.resample_bank, frac);
-            let blend_i =
-                (self.prev_blend + frac * (stereo_blend_target - self.prev_blend)).clamp(0.0, 1.0);
-            self.prev_blend = stereo_blend_target;
-            let diff_i = (diff_i * trim_cos + diff_q * trim_sin) * STEREO_SEPARATION_GAIN;
-            let diff_i = self.denoise.process(sum_i, diff_i, diff_q);
-
-            // --- Deemphasis + DC block + output ---
-            if self.output_channels >= 2 && self.stereo_enabled {
-                let diff = diff_i * blend_i;
-                let left_corr = (sum_i + diff) * STEREO_MATRIX_GAIN;
-                let right_corr = (sum_i - diff) * STEREO_MATRIX_GAIN;
-                let left = self
-                    .dc_l
-                    .process(self.deemph_l.process(left_corr))
-                    .clamp(-1.0, 1.0);
-                let right = self
-                    .dc_r
-                    .process(self.deemph_r.process(right_corr))
-                    .clamp(-1.0, 1.0);
-                output.push(left);
-                output.push(right);
-            } else {
-                // Mono path: apply the pilot notch here so the 19 kHz pilot tone
-                // does not leak into mono audio.  Phase matching with diff is not
-                // a concern for mono, so the notch can sit anywhere in the chain.
-                let mono = self
-                    .dc_m
-                    .process(self.deemph_m.process(self.sum_notch.process(sum_i)))
-                    .clamp(-1.0, 1.0);
-                output.push(mono);
-                if self.output_channels >= 2 {
-                    output.push(mono);
-                }
-            }
-        }
-
-        output
-    }
-
-    pub fn set_stereo_enabled(&mut self, enabled: bool) {
-        self.stereo_enabled = enabled;
-    }
-
-    pub fn rds_data(&self) -> Option<RdsData> {
-        self.rds_decoder.snapshot()
-    }
-
-    pub fn reset_rds(&mut self) {
-        self.rds_decoder.reset();
-    }
-
-    pub fn reset_stereo_detect(&mut self) {
-        self.stereo_detect_level = 0.0;
-        self.stereo_detected = false;
-        self.detect_counter = 0;
-        self.detect_pilot_mag_acc = 0.0;
-        self.detect_pilot_abs_acc = 0.0;
-    }
-
-    pub fn reset_demod_state(&mut self) {
-        self.prev_iq = None;
-    }
-
-    pub fn reset_state(&mut self) {
-        self.rds_decoder.reset();
-        self.rds_bpf.reset();
-        self.rds_dc.reset();
-        self.prev_iq = None;
-        self.nco_cos = 1.0;
-        self.nco_sin = 0.0;
-        self.nco_counter = 0;
-        self.pilot_i_lp.reset();
-        self.pilot_q_lp.reset();
-        self.pilot_abs_lp.reset();
-        self.pilot_bpf.reset();
-        self.sum_lpf1.reset();
-        self.sum_lpf2.reset();
-        self.sum_notch.reset();
-        self.diff_pilot_notch.reset();
-        self.diff_lpf1.reset();
-        self.diff_lpf2.reset();
-        self.diff_q_lpf1.reset();
-        self.diff_q_lpf2.reset();
-        self.diff_dc.reset();
-        self.diff_q_dc.reset();
-        self.dc_m.reset();
-        self.dc_l.reset();
-        self.dc_r.reset();
-        self.deemph_m.reset();
-        self.deemph_l.reset();
-        self.deemph_r.reset();
-        self.stereo_detect_level = 0.0;
-        self.stereo_detected = false;
-        self.detect_counter = 0;
-        self.detect_pilot_mag_acc = 0.0;
-        self.detect_pilot_abs_acc = 0.0;
-        self.sum_hist = [0.0; WFM_RESAMP_TAPS];
-        self.diff_hist = [0.0; WFM_RESAMP_TAPS];
-        self.diff_q_hist = [0.0; WFM_RESAMP_TAPS];
-        self.hist_pos = 0;
-        self.denoise.reset();
-        self.prev_blend = 0.0;
-        self.output_phase = 0.0;
-    }
-
-    pub fn set_denoise_enabled(&mut self, enabled: bool) {
-        self.denoise.enabled = enabled;
-    }
-
-    pub fn stereo_detected(&self) -> bool {
-        self.stereo_detected
-    }
-}
-
 /// Selects the demodulation algorithm for a channel.
 #[derive(Debug, Clone, PartialEq)]
 pub enum Demodulator {
@@ -1132,9 +137,9 @@ pub enum Demodulator {
     Lsb,
     /// AM envelope detector: magnitude of IQ, DC-removed.
     Am,
-    /// Narrow-band FM: instantaneous frequency via quadrature (arg of s[n]*conj(s[n-1])).
+    /// Narrow-band FM: instantaneous frequency via quadrature.
     Fm,
-    /// Wide-band FM: same algorithm as FM, wider pre-filter (handled upstream in DSP).
+    /// Wide-band FM: same quadrature discriminator, wider filtering upstream.
     Wfm,
     /// CW: magnitude of IQ after narrow BPF (BPF applied upstream), envelope.
     Cw,
@@ -1154,321 +159,27 @@ impl Demodulator {
             RigMode::CW | RigMode::CWR => Self::Cw,
             RigMode::DIG => Self::Passthrough,
             // VHF/UHF packet radio (APRS, AX.25) is FM-encoded AFSK.
-            // FM-demodulate the signal before passing audio to the APRS decoder.
             RigMode::PKT => Self::Fm,
             RigMode::Other(_) => Self::Usb,
         }
     }
 
     /// Demodulate a block of baseband IQ samples.
-    ///
-    /// `samples`: complex f32 IQ already centred at 0 Hz.
-    /// Returns real f32 audio samples, same length as input.
     pub fn demodulate(&self, samples: &[Complex<f32>]) -> Vec<f32> {
         match self {
-            Self::Usb | Self::Passthrough => demod_usb(samples),
-            Self::Lsb => demod_lsb(samples),
-            Self::Am => demod_am(samples),
-            Self::Fm | Self::Wfm => demod_fm(samples),
-            Self::Cw => demod_cw(samples),
+            Self::Usb | Self::Passthrough => ssb::demod_usb(samples),
+            Self::Lsb => ssb::demod_lsb(samples),
+            Self::Am => am::demod_am(samples),
+            Self::Fm | Self::Wfm => fm::demod_fm(samples),
+            Self::Cw => ssb::demod_cw(samples),
         }
     }
 }
 
-// ---------------------------------------------------------------------------
-// USB / Passthrough
-// ---------------------------------------------------------------------------
-
-/// USB demodulator: take the real part of each IQ sample.
-fn demod_usb(samples: &[Complex<f32>]) -> Vec<f32> {
-    samples.iter().map(|s| s.re).collect()
-}
-
-// ---------------------------------------------------------------------------
-// LSB
-// ---------------------------------------------------------------------------
-
-/// LSB demodulator: LSB mixing is handled upstream by negating `channel_if_hz`;
-/// the demodulator itself is identical to USB — just take `.re`.
-fn demod_lsb(samples: &[Complex<f32>]) -> Vec<f32> {
-    samples.iter().map(|s| s.re).collect()
-}
-
-// ---------------------------------------------------------------------------
-// AM
-// ---------------------------------------------------------------------------
-
-/// AM envelope detector: magnitude of IQ.
-///
-/// Returns the raw envelope amplitude.  DC removal (carrier offset) and level
-/// normalisation are handled downstream by the per-channel DC blocker and AGC.
-fn demod_am(samples: &[Complex<f32>]) -> Vec<f32> {
-    samples
-        .iter()
-        .map(|s| (s.re * s.re + s.im * s.im).sqrt())
-        .collect()
-}
-
-// ---------------------------------------------------------------------------
-// FM / WFM
-// ---------------------------------------------------------------------------
-
-/// FM quadrature discriminator: instantaneous frequency via arg(s[n] * conj(s[n-1])).
-/// Output is in radians/sample, scaled by 1/π to normalise to [-1, 1].
-fn demod_fm_with_prev(
-    samples: &[Complex<f32>],
-    prev: &mut Option<Complex<f32>>,
-) -> Vec<f32> {
-    if samples.is_empty() {
-        return Vec::new();
-    }
-
-    let inv_pi = std::f32::consts::FRAC_1_PI;
-    let mut output = Vec::with_capacity(samples.len());
-
-    if let Some(prev_sample) = prev.as_ref().copied() {
-        let product = samples[0] * prev_sample.conj();
-        let angle = fast_atan2(product.im, product.re);
-        output.push(angle * inv_pi);
-    } else {
-        output.push(0.0_f32);
-    }
-
-    let mut i = 1usize;
-
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    if std::arch::is_x86_feature_detected!("avx2") {
-        i = demod_fm_body_avx2(samples, i, inv_pi, &mut output);
-    }
-
-    for idx in i..samples.len() {
-        let product = samples[idx] * samples[idx - 1].conj();
-        let angle = fast_atan2(product.im, product.re);
-        output.push(angle * inv_pi);
-    }
-
-    *prev = samples.last().copied();
-    output
-}
-
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-fn demod_fm_body_avx2(
-    samples: &[Complex<f32>],
-    start: usize,
-    inv_pi: f32,
-    output: &mut Vec<f32>,
-) -> usize {
-    unsafe { demod_fm_body_avx2_impl(samples, start, inv_pi, output) }
-}
-
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-#[target_feature(enable = "avx2")]
-unsafe fn demod_fm_body_avx2_impl(
-    samples: &[Complex<f32>],
-    start: usize,
-    inv_pi: f32,
-    output: &mut Vec<f32>,
-) -> usize {
-    #[cfg(target_arch = "x86")]
-    use std::arch::x86::*;
-    #[cfg(target_arch = "x86_64")]
-    use std::arch::x86_64::*;
-
-    let len = samples.len();
-    let mut i = start;
-    let mut cur_re = [0.0_f32; 8];
-    let mut cur_im = [0.0_f32; 8];
-    let mut prev_re = [0.0_f32; 8];
-    let mut prev_im = [0.0_f32; 8];
-    let mut angles = [0.0_f32; 8];
-    let inv_pi_v = _mm256_set1_ps(inv_pi);
-
-    while i + 8 <= len {
-        for lane in 0..8 {
-            let cur = samples[i + lane];
-            let prev = samples[i + lane - 1];
-            cur_re[lane] = cur.re;
-            cur_im[lane] = cur.im;
-            prev_re[lane] = prev.re;
-            prev_im[lane] = prev.im;
-        }
-
-        let cur_re_v = _mm256_loadu_ps(cur_re.as_ptr());
-        let cur_im_v = _mm256_loadu_ps(cur_im.as_ptr());
-        let prev_re_v = _mm256_loadu_ps(prev_re.as_ptr());
-        let prev_im_v = _mm256_loadu_ps(prev_im.as_ptr());
-
-        let re_v = _mm256_add_ps(
-            _mm256_mul_ps(cur_re_v, prev_re_v),
-            _mm256_mul_ps(cur_im_v, prev_im_v),
-        );
-        let im_v = _mm256_sub_ps(
-            _mm256_mul_ps(cur_im_v, prev_re_v),
-            _mm256_mul_ps(cur_re_v, prev_im_v),
-        );
-
-        let angle_v = _mm256_mul_ps(fast_atan2_8_avx2(im_v, re_v), inv_pi_v);
-        _mm256_storeu_ps(angles.as_mut_ptr(), angle_v);
-        output.extend_from_slice(&angles);
-
-        i += 8;
-    }
-
-    i
-}
-
-/// FM quadrature discriminator: instantaneous frequency via arg(s[n] * conj(s[n-1])).
-/// Output is in radians/sample, scaled by 1/π to normalise to [-1, 1].
-fn demod_fm(samples: &[Complex<f32>]) -> Vec<f32> {
-    let mut prev = None;
-    demod_fm_with_prev(samples, &mut prev)
-}
-
-// ---------------------------------------------------------------------------
-// CW
-// ---------------------------------------------------------------------------
-
-/// CW demodulator: take the real part of each baseband IQ sample.
-///
-/// The upstream FIR filter centres the CW carrier at the configured audio
-/// offset (e.g. 700 Hz), so demodulating identically to USB produces the
-/// characteristic CW side-tone.  Level normalisation is handled downstream
-/// by the per-channel AGC.
-fn demod_cw(samples: &[Complex<f32>]) -> Vec<f32> {
-    samples.iter().map(|s| s.re).collect()
-}
-
-// ---------------------------------------------------------------------------
-// Tests
-// ---------------------------------------------------------------------------
-
 #[cfg(test)]
 mod tests {
     use super::*;
-    use num_complex::Complex;
 
-    fn complex_tone(freq_norm: f32, len: usize) -> Vec<Complex<f32>> {
-        use std::f32::consts::TAU;
-        (0..len)
-            .map(|n| Complex::from_polar(1.0, TAU * freq_norm * n as f32))
-            .collect()
-    }
-
-    fn assert_approx_eq(a: f32, b: f32, tol: f32, label: &str) {
-        assert!(
-            (a - b).abs() <= tol,
-            "{}: expected {} ≈ {} (tol {})",
-            label,
-            a,
-            b,
-            tol
-        );
-    }
-
-    // Test 1: USB and Passthrough return the real part of each sample.
-    #[test]
-    fn test_usb_passthrough_takes_real_part() {
-        let input = vec![
-            Complex::new(1.0_f32, 2.0),
-            Complex::new(3.0, 4.0),
-            Complex::new(-1.0, 0.0),
-            Complex::new(0.0, -1.0),
-        ];
-        let expected = vec![1.0_f32, 3.0, -1.0, 0.0];
-
-        let usb_out = Demodulator::Usb.demodulate(&input);
-        assert_eq!(usb_out, expected, "USB should return real parts");
-
-        let pass_out = Demodulator::Passthrough.demodulate(&input);
-        assert_eq!(pass_out, expected, "Passthrough should return real parts");
-    }
-
-    // Test 2: LSB returns the real part (mixing handled upstream).
-    #[test]
-    fn test_lsb_takes_real_part() {
-        let input = vec![
-            Complex::new(1.0_f32, 2.0),
-            Complex::new(3.0, 4.0),
-            Complex::new(-1.0, 0.0),
-            Complex::new(0.0, -1.0),
-        ];
-        let expected = vec![1.0_f32, 3.0, -1.0, 0.0];
-
-        let lsb_out = Demodulator::Lsb.demodulate(&input);
-        assert_eq!(lsb_out, expected, "LSB should return real parts");
-    }
-
-    // Test 3: AM on a constant-magnitude signal returns the raw envelope (1.0).
-    // DC removal and normalization are handled downstream by the DC blocker and AGC.
-    #[test]
-    fn test_am_raw_magnitude_constant() {
-        let input: Vec<Complex<f32>> = (0..8).map(|_| Complex::new(1.0, 0.0)).collect();
-        let out = Demodulator::Am.demodulate(&input);
-        assert_eq!(out.len(), 8);
-        for (i, &v) in out.iter().enumerate() {
-            assert_approx_eq(v, 1.0, 1e-6, &format!("AM raw magnitude sample {}", i));
-        }
-    }
-
-    // Test 4: AM on alternating-magnitude signal returns the raw envelope.
-    #[test]
-    fn test_am_raw_magnitude_varying() {
-        let input = vec![
-            Complex::new(0.0_f32, 0.0),
-            Complex::new(1.0, 0.0),
-            Complex::new(0.0, 0.0),
-            Complex::new(1.0, 0.0),
-        ];
-        let expected = [0.0_f32, 1.0, 0.0, 1.0];
-        let out = Demodulator::Am.demodulate(&input);
-        assert_eq!(out.len(), 4);
-        for (i, (&got, &exp)) in out.iter().zip(expected.iter()).enumerate() {
-            assert_approx_eq(got, exp, 1e-6, &format!("AM raw magnitude sample {}", i));
-        }
-    }
-
-    // Test 5: FM discriminator on a pure tone at 0.25 cycles/sample.
-    // arg(s[n]*conj(s[n-1])) = 2π*0.25 = π/2; scaled by 1/π → 0.5.
-    #[test]
-    fn test_fm_tone_frequency() {
-        let input = complex_tone(0.25, 16);
-        let out = Demodulator::Fm.demodulate(&input);
-        assert_eq!(out.len(), 16);
-        // First sample is 0.0 by convention.
-        assert_approx_eq(out[0], 0.0, 1e-6, "FM tone sample 0 (zero by convention)");
-        // Remaining samples should be approximately 0.5.
-        for (i, &sample) in out.iter().enumerate().skip(1) {
-            assert_approx_eq(sample, 0.5, 0.01, &format!("FM tone sample {}", i));
-        }
-    }
-
-    // Test 6: FM discriminator on a DC (constant-phase) signal outputs all zeros.
-    #[test]
-    fn test_fm_silence_is_zero() {
-        let input: Vec<Complex<f32>> = (0..8).map(|_| Complex::new(1.0, 0.0)).collect();
-        let out = Demodulator::Fm.demodulate(&input);
-        assert_eq!(out.len(), 8);
-        for (i, &v) in out.iter().enumerate() {
-            assert_approx_eq(v, 0.0, 1e-6, &format!("FM silence sample {}", i));
-        }
-    }
-
-    // Test 7: CW demodulator returns the real part (same as USB).
-    #[test]
-    fn test_cw_takes_real_part() {
-        let input = vec![
-            Complex::new(3.0_f32, 4.0),
-            Complex::new(0.0, 0.0),
-            Complex::new(1.0, 0.0),
-        ];
-        let out = Demodulator::Cw.demodulate(&input);
-        assert_eq!(out.len(), 3);
-        assert_approx_eq(out[0], 3.0, 1e-6, "CW sample 0");
-        assert_approx_eq(out[1], 0.0, 1e-6, "CW sample 1");
-        assert_approx_eq(out[2], 1.0, 1e-6, "CW sample 2");
-    }
-
-    // Test 8: Demodulator::for_mode maps each RigMode to the correct variant.
     #[test]
     fn test_demodulator_for_mode_mapping() {
         assert_eq!(Demodulator::for_mode(&RigMode::USB), Demodulator::Usb);
@@ -1485,267 +196,9 @@ mod tests {
         assert_eq!(Demodulator::for_mode(&RigMode::PKT), Demodulator::Fm);
     }
 
-    // Test 9: Synthetic FM stereo — verify L/R separation.
-    //
-    // Generate a composite FM stereo signal with a 1 kHz tone on L only:
-    //   L = sin(2π·1000·t),  R = 0
-    //   sum  = L + R = sin(2π·1000·t)
-    //   diff = L - R = sin(2π·1000·t)
-    //   composite = sum + 0.1·cos(2π·19000·t) + diff·cos(2π·38000·t)
-    //
-    // FM-modulate at 240 kHz composite rate, run through WfmStereoDecoder,
-    // and verify L has significant energy while R is near zero.
-    #[test]
-    fn test_wfm_stereo_separation() {
-        use std::f32::consts::TAU;
-
-        let composite_rate: u32 = 240_000;
-        let audio_rate: u32 = 48_000;
-        let fs = composite_rate as f32;
-        let duration_secs = 0.5_f32; // 500 ms — enough for PLL lock + measurement
-        let num_samples = (fs * duration_secs) as usize;
-
-        // --- Build composite baseband ---
-        let audio_freq = 1000.0_f32;
-        let pilot_freq = 19_000.0_f32;
-        let carrier_freq = 38_000.0_f32;
-        let mut composite = vec![0.0_f32; num_samples];
-        for (n, sample) in composite.iter_mut().enumerate() {
-            let t = n as f32 / fs;
-            let audio = (TAU * audio_freq * t).sin(); // L = audio, R = 0
-            let sum = audio;       // L + R
-            let diff = audio;      // L - R
-            let pilot = 0.1 * (TAU * pilot_freq * t).cos();
-            let carrier = (TAU * carrier_freq * t).cos();
-            *sample = sum + pilot + diff * carrier;
-        }
-
-        // --- FM-modulate composite → IQ samples ---
-        // deviation: peak composite ≈ ±2.1, map to ±75 kHz
-        // phase per sample = 2π · (75000 / peak_composite) · composite[n] / fs
-        let peak_composite = 2.1_f32; // sum(1) + pilot(0.1) + diff(1)
-        let deviation_hz = 75_000.0_f32;
-        let mod_index = TAU * deviation_hz / (peak_composite * fs);
-        let mut phase: f32 = 0.0;
-        let mut iq = Vec::with_capacity(num_samples);
-        for &c in &composite {
-            phase += mod_index * c;
-            iq.push(Complex::from_polar(1.0, phase));
-        }
-
-        // --- Decode ---
-        let mut decoder = WfmStereoDecoder::new(
-            composite_rate,
-            audio_rate,
-            2,    // stereo output
-            true, // stereo enabled
-            50,   // 50 µs deemphasis
-        );
-        let output = decoder.process_iq(&iq);
-
-        // Output is interleaved L, R.  Skip the first 200 ms for PLL lock.
-        let skip_samples = (0.2 * audio_rate as f32) as usize;
-        let stereo_pairs = output.len() / 2;
-        assert!(
-            stereo_pairs > skip_samples + 100,
-            "not enough output samples: {} pairs, need > {}",
-            stereo_pairs,
-            skip_samples + 100
-        );
-
-        // Measure RMS of L and R channels in the measurement window.
-        let mut l_energy = 0.0_f64;
-        let mut r_energy = 0.0_f64;
-        let mut count = 0_u64;
-        for i in skip_samples..stereo_pairs {
-            let l = output[2 * i] as f64;
-            let r = output[2 * i + 1] as f64;
-            l_energy += l * l;
-            r_energy += r * r;
-            count += 1;
-        }
-        let l_rms = (l_energy / count as f64).sqrt();
-        let r_rms = (r_energy / count as f64).sqrt();
-
-        eprintln!("L RMS = {l_rms:.6}, R RMS = {r_rms:.6}");
-
-        // L should have significant energy (tone is present).
-        assert!(
-            l_rms > 0.01,
-            "L channel has no energy: L_rms = {l_rms:.6}"
-        );
-
-        // R should be much smaller than L (>20 dB separation).
-        let separation_db = if r_rms > 1e-10 {
-            20.0 * (l_rms / r_rms).log10()
-        } else {
-            f64::INFINITY
-        };
-        eprintln!("stereo separation = {separation_db:.1} dB");
-
-        assert!(
-            separation_db > 20.0,
-            "stereo separation too low: {separation_db:.1} dB (L_rms={l_rms:.6}, R_rms={r_rms:.6})"
-        );
-    }
-
-    /// Multi-tone stereo separation test.
-    ///
-    /// Generates a composite FM stereo signal with tones at 400, 2000, 8000
-    /// and 14000 Hz on a single channel (L-only then R-only), demodulates,
-    /// and verifies that the silent channel stays quiet across the full
-    /// audio band.  This catches group-delay and phase-trim problems that
-    /// a single 1 kHz tone would miss.
-    #[test]
-    fn test_wfm_stereo_separation_multitone() {
-        use std::f32::consts::TAU;
-
-        let composite_rate: u32 = 240_000;
-        let audio_rate: u32 = 48_000;
-        let fs = composite_rate as f32;
-        let duration_secs = 0.8_f32;
-        let num_samples = (fs * duration_secs) as usize;
-        let freqs = [400.0_f32, 2_000.0, 8_000.0, 14_000.0];
-        let pilot_freq = 19_000.0_f32;
-        let carrier_freq = 38_000.0_f32;
-
-        // Test both L-only (diff = +audio) and R-only (diff = -audio).
-        for (label, diff_sign) in [("L-only", 1.0_f32), ("R-only", -1.0_f32)] {
-            let mut composite = vec![0.0_f32; num_samples];
-            for (n, sample) in composite.iter_mut().enumerate() {
-                let t = n as f32 / fs;
-                let audio: f32 = freqs.iter().map(|&f| (TAU * f * t).sin()).sum::<f32>()
-                    / freqs.len() as f32;
-                let sum = audio;                     // L + R (same for both cases)
-                let diff = audio * diff_sign;        // L - R
-                let pilot = 0.1 * (TAU * pilot_freq * t).cos();
-                let carrier = (TAU * carrier_freq * t).cos();
-                *sample = sum + pilot + diff * carrier;
-            }
-
-            let peak_composite = 2.1_f32;
-            let deviation_hz = 75_000.0_f32;
-            let mod_index = TAU * deviation_hz / (peak_composite * fs);
-            let mut phase: f32 = 0.0;
-            let mut iq = Vec::with_capacity(num_samples);
-            for &c in &composite {
-                phase += mod_index * c;
-                iq.push(Complex::from_polar(1.0, phase));
-            }
-
-            let mut decoder = WfmStereoDecoder::new(
-                composite_rate,
-                audio_rate,
-                2,
-                true,
-                50,
-            );
-            let output = decoder.process_iq(&iq);
-
-            let skip_samples = (0.3 * audio_rate as f32) as usize;
-            let stereo_pairs = output.len() / 2;
-            assert!(stereo_pairs > skip_samples + 100,
-                "{label}: not enough output samples");
-
-            let mut active_energy = 0.0_f64;
-            let mut silent_energy = 0.0_f64;
-            let mut count = 0_u64;
-            for i in skip_samples..stereo_pairs {
-                let l = output[2 * i] as f64;
-                let r = output[2 * i + 1] as f64;
-                if diff_sign > 0.0 {
-                    // L-only: L is active, R is silent
-                    active_energy += l * l;
-                    silent_energy += r * r;
-                } else {
-                    // R-only: R is active, L is silent
-                    active_energy += r * r;
-                    silent_energy += l * l;
-                }
-                count += 1;
-            }
-            let active_rms = (active_energy / count as f64).sqrt();
-            let silent_rms = (silent_energy / count as f64).sqrt();
-
-            let separation_db = if silent_rms > 1e-10 {
-                20.0 * (active_rms / silent_rms).log10()
-            } else {
-                f64::INFINITY
-            };
-
-            eprintln!("{label}: active RMS = {active_rms:.6}, silent RMS = {silent_rms:.6}, separation = {separation_db:.1} dB");
-
-            assert!(active_rms > 0.01,
-                "{label}: active channel has no energy: {active_rms:.6}");
-            assert!(separation_db > 15.0,
-                "{label}: multitone stereo separation too low: {separation_db:.1} dB");
-        }
-    }
-
-    #[test]
-    fn test_wfm_no_pilot_stays_mono_detect() {
-        use std::f32::consts::TAU;
-
-        let composite_rate: u32 = 240_000;
-        let audio_rate: u32 = 48_000;
-        let fs = composite_rate as f32;
-        let duration_secs = 0.5_f32;
-        let num_samples = (fs * duration_secs) as usize;
-
-        let audio_freq = 1000.0_f32;
-        let mut composite = vec![0.0_f32; num_samples];
-        for (n, sample) in composite.iter_mut().enumerate() {
-            let t = n as f32 / fs;
-            *sample = (TAU * audio_freq * t).sin();
-        }
-
-        let deviation_hz = 75_000.0_f32;
-        let mod_index = TAU * deviation_hz / fs;
-        let mut phase: f32 = 0.0;
-        let mut iq = Vec::with_capacity(num_samples);
-        for &c in &composite {
-            phase += mod_index * c;
-            iq.push(Complex::from_polar(1.0, phase));
-        }
-
-        let mut decoder = WfmStereoDecoder::new(
-            composite_rate,
-            audio_rate,
-            2,
-            true,
-            50,
-        );
-        let output = decoder.process_iq(&iq);
-
-        assert!(
-            !decoder.stereo_detected(),
-            "decoder should not detect stereo without a 19 kHz pilot"
-        );
-
-        let skip_samples = (0.2 * audio_rate as f32) as usize;
-        let stereo_pairs = output.len() / 2;
-        assert!(stereo_pairs > skip_samples + 100);
-
-        let mut diff_energy = 0.0_f64;
-        let mut count = 0_u64;
-        for i in skip_samples..stereo_pairs {
-            let l = output[2 * i] as f64;
-            let r = output[2 * i + 1] as f64;
-            let d = l - r;
-            diff_energy += d * d;
-            count += 1;
-        }
-        let diff_rms = (diff_energy / count as f64).sqrt();
-        assert!(
-            diff_rms < 0.01,
-            "mono signal without pilot should not develop audible stereo difference: diff_rms={diff_rms:.6}"
-        );
-    }
-
-    // Test 10: All demodulators return an empty Vec for empty input without panicking.
     #[test]
     fn test_empty_input() {
-        let empty: Vec<Complex<f32>> = Vec::new();
+        let empty = Vec::new();
         let demodulators = [
             Demodulator::Usb,
             Demodulator::Lsb,
@@ -1756,236 +209,10 @@ mod tests {
             Demodulator::Passthrough,
         ];
         for demod in &demodulators {
-            let out = demod.demodulate(&empty);
             assert!(
-                out.is_empty(),
-                "{:?} should return empty Vec for empty input",
-                demod
+                demod.demodulate(&empty).is_empty(),
+                "{demod:?} should return empty Vec for empty input",
             );
         }
     }
-
-    // --- Stereo denoise tests ---
-
-    /// Noisy diff should be attenuated by >6 dB when diff_q carries matching noise.
-    #[test]
-    fn test_denoise_noisy_diff_attenuation() {
-        let audio_rate = 48_000.0;
-        let num_samples = (audio_rate * 0.5) as usize; // 500 ms
-        let mut denoise = StereoDenoise::new(audio_rate);
-
-        // Use a simple PRNG for deterministic noise.
-        let mut rng_state = 12345_u64;
-        let mut next_noise = || -> f32 {
-            rng_state = rng_state.wrapping_mul(6364136223846793005).wrapping_add(1);
-            ((rng_state >> 33) as f32 / (1u64 << 31) as f32) - 1.0
-        };
-
-        // Generate noise for diff_i and diff_q (correlated noise scenario).
-        let mut input_energy = 0.0_f64;
-        let mut output_energy = 0.0_f64;
-        let skip = (audio_rate * 0.1) as usize; // skip 100 ms warmup
-
-        for n in 0..num_samples {
-            let noise_i = next_noise() * 0.5;
-            let noise_q = next_noise() * 0.5;
-            let sum = 0.0; // no sum signal
-            let out = denoise.process(sum, noise_i, noise_q);
-            if n >= skip {
-                input_energy += (noise_i as f64) * (noise_i as f64);
-                output_energy += (out as f64) * (out as f64);
-            }
-        }
-
-        let input_rms = (input_energy / (num_samples - skip) as f64).sqrt();
-        let output_rms = (output_energy / (num_samples - skip) as f64).sqrt();
-        let attenuation_db = 20.0 * (input_rms / (output_rms + 1e-20)).log10();
-
-        eprintln!("denoise noise attenuation: {attenuation_db:.1} dB (input_rms={input_rms:.6}, output_rms={output_rms:.6})");
-        assert!(
-            attenuation_db > 6.0,
-            "noisy diff should be attenuated by >6 dB, got {attenuation_db:.1} dB"
-        );
-    }
-
-    /// Clean stereo tone (diff_q = 0) should pass with <3 dB loss.
-    #[test]
-    fn test_denoise_clean_stereo_preservation() {
-        use std::f32::consts::TAU;
-
-        let audio_rate = 48_000.0;
-        let num_samples = (audio_rate * 0.5) as usize;
-        let mut denoise = StereoDenoise::new(audio_rate);
-        let tone_freq = 1000.0;
-
-        let mut input_energy = 0.0_f64;
-        let mut output_energy = 0.0_f64;
-        let skip = (audio_rate * 0.15) as usize;
-
-        for n in 0..num_samples {
-            let t = n as f32 / audio_rate;
-            let tone = (TAU * tone_freq * t).sin() * 0.5;
-            let sum = tone;    // mono content
-            let diff_i = tone; // stereo diff = same tone
-            let diff_q = 0.0;  // no noise reference
-            let out = denoise.process(sum, diff_i, diff_q);
-            if n >= skip {
-                input_energy += (diff_i as f64) * (diff_i as f64);
-                output_energy += (out as f64) * (out as f64);
-            }
-        }
-
-        let input_rms = (input_energy / (num_samples - skip) as f64).sqrt();
-        let output_rms = (output_energy / (num_samples - skip) as f64).sqrt();
-        let loss_db = 20.0 * (input_rms / (output_rms + 1e-20)).log10();
-
-        eprintln!("denoise clean tone loss: {loss_db:.1} dB (input_rms={input_rms:.6}, output_rms={output_rms:.6})");
-        assert!(
-            loss_db < 4.0,
-            "clean stereo tone should lose <4 dB, got {loss_db:.1} dB"
-        );
-    }
-
-    /// When disabled, denoise should return diff_i unchanged (bit-exact).
-    #[test]
-    fn test_denoise_bypass_when_disabled() {
-        let audio_rate = 48_000.0;
-        let mut denoise = StereoDenoise::new(audio_rate);
-        denoise.enabled = false;
-
-        let test_values = [0.0_f32, 0.5, -0.3, 1.0, -1.0, 0.001];
-        for &val in &test_values {
-            let out = denoise.process(0.1, val, 0.2);
-            assert_eq!(out, val, "bypass should return diff_i unchanged for {val}");
-        }
-    }
-
-    /// Per-band selectivity: clean low tone + noisy highs should preserve the low band.
-    #[test]
-    fn test_denoise_per_band_selectivity() {
-        use std::f32::consts::TAU;
-
-        let audio_rate = 48_000.0;
-        let num_samples = (audio_rate * 0.5) as usize;
-        let mut denoise = StereoDenoise::new(audio_rate);
-        let low_freq = 400.0;
-
-        let mut rng_state = 67890_u64;
-        let mut next_noise = || -> f32 {
-            rng_state = rng_state.wrapping_mul(6364136223846793005).wrapping_add(1);
-            ((rng_state >> 33) as f32 / (1u64 << 31) as f32) - 1.0
-        };
-
-        let skip = (audio_rate * 0.15) as usize;
-        let mut output_samples = Vec::with_capacity(num_samples - skip);
-
-        for n in 0..num_samples {
-            let t = n as f32 / audio_rate;
-            let low_tone = (TAU * low_freq * t).sin() * 0.3;
-            let high_noise = next_noise() * 0.3;
-            let sum = low_tone;
-            let diff_i = low_tone + high_noise;
-            let diff_q = high_noise; // noise reference for high freqs
-            let out = denoise.process(sum, diff_i, diff_q);
-            if n >= skip {
-                output_samples.push(out);
-            }
-        }
-
-        // Measure energy in the low-frequency band using a bandpass filter.
-        let mut low_bp = BiquadBandPass::new(audio_rate, low_freq, 2.0);
-        let mut low_energy = 0.0_f64;
-        let mut total_energy = 0.0_f64;
-        for &s in &output_samples {
-            let low_part = low_bp.process(s);
-            low_energy += (low_part as f64) * (low_part as f64);
-            total_energy += (s as f64) * (s as f64);
-        }
-
-        let low_ratio = low_energy / (total_energy + 1e-20);
-        eprintln!("denoise selectivity: low-band energy ratio = {low_ratio:.4}");
-
-        // The low band tone should dominate after denoise removes high-freq noise.
-        assert!(
-            low_ratio > 0.3,
-            "low-band tone should be dominant after denoise, ratio = {low_ratio:.4}"
-        );
-    }
-
-    /// Integration test: existing stereo separation test should still pass with denoise.
-    #[test]
-    fn test_wfm_stereo_separation_with_denoise() {
-        use std::f32::consts::TAU;
-
-        let composite_rate: u32 = 240_000;
-        let audio_rate: u32 = 48_000;
-        let fs = composite_rate as f32;
-        let duration_secs = 0.5_f32;
-        let num_samples = (fs * duration_secs) as usize;
-
-        let audio_freq = 1000.0_f32;
-        let pilot_freq = 19_000.0_f32;
-        let carrier_freq = 38_000.0_f32;
-        let mut composite = vec![0.0_f32; num_samples];
-        for (n, sample) in composite.iter_mut().enumerate() {
-            let t = n as f32 / fs;
-            let audio = (TAU * audio_freq * t).sin();
-            let sum = audio;
-            let diff = audio;
-            let pilot = 0.1 * (TAU * pilot_freq * t).cos();
-            let carrier = (TAU * carrier_freq * t).cos();
-            *sample = sum + pilot + diff * carrier;
-        }
-
-        let peak_composite = 2.1_f32;
-        let deviation_hz = 75_000.0_f32;
-        let mod_index = TAU * deviation_hz / (peak_composite * fs);
-        let mut phase: f32 = 0.0;
-        let mut iq = Vec::with_capacity(num_samples);
-        for &c in &composite {
-            phase += mod_index * c;
-            iq.push(Complex::from_polar(1.0, phase));
-        }
-
-        let mut decoder = WfmStereoDecoder::new(
-            composite_rate,
-            audio_rate,
-            2,
-            true,
-            50,
-        );
-        // Denoise is enabled by default — verify it doesn't break stereo separation.
-        let output = decoder.process_iq(&iq);
-
-        let skip_samples = (0.2 * audio_rate as f32) as usize;
-        let stereo_pairs = output.len() / 2;
-        assert!(stereo_pairs > skip_samples + 100);
-
-        let mut l_energy = 0.0_f64;
-        let mut r_energy = 0.0_f64;
-        let mut count = 0_u64;
-        for i in skip_samples..stereo_pairs {
-            let l = output[2 * i] as f64;
-            let r = output[2 * i + 1] as f64;
-            l_energy += l * l;
-            r_energy += r * r;
-            count += 1;
-        }
-        let l_rms = (l_energy / count as f64).sqrt();
-        let r_rms = (r_energy / count as f64).sqrt();
-
-        let separation_db = if r_rms > 1e-10 {
-            20.0 * (l_rms / r_rms).log10()
-        } else {
-            f64::INFINITY
-        };
-
-        eprintln!("stereo+denoise: L RMS = {l_rms:.6}, R RMS = {r_rms:.6}, separation = {separation_db:.1} dB");
-
-        assert!(l_rms > 0.01, "L channel should have energy");
-        assert!(
-            separation_db > 15.0,
-            "stereo separation with denoise should be >15 dB, got {separation_db:.1} dB"
-        );
-    }
 }
diff --git a/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/am.rs b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/am.rs
new file mode 100644
index 0000000..4320954
--- /dev/null
+++ b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/am.rs
@@ -0,0 +1,56 @@
+// SPDX-FileCopyrightText: 2025 Stanislaw Grams <stanislawgrams@gmail.com>
+//
+// SPDX-License-Identifier: BSD-2-Clause
+
+use num_complex::Complex;
+
+/// AM envelope detector: magnitude of IQ.
+pub(super) fn demod_am(samples: &[Complex<f32>]) -> Vec<f32> {
+    samples
+        .iter()
+        .map(|sample| (sample.re * sample.re + sample.im * sample.im).sqrt())
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::demod_am;
+    use num_complex::Complex;
+
+    fn assert_approx_eq(a: f32, b: f32, tol: f32, label: &str) {
+        assert!(
+            (a - b).abs() <= tol,
+            "{}: expected {} ≈ {} (tol {})",
+            label,
+            a,
+            b,
+            tol
+        );
+    }
+
+    #[test]
+    fn test_am_raw_magnitude_constant() {
+        let input: Vec<Complex<f32>> = (0..8).map(|_| Complex::new(1.0, 0.0)).collect();
+        let out = demod_am(&input);
+        assert_eq!(out.len(), 8);
+        for (idx, &value) in out.iter().enumerate() {
+            assert_approx_eq(value, 1.0, 1e-6, &format!("AM sample {idx}"));
+        }
+    }
+
+    #[test]
+    fn test_am_raw_magnitude_varying() {
+        let input = vec![
+            Complex::new(0.0_f32, 0.0),
+            Complex::new(1.0, 0.0),
+            Complex::new(0.0, 0.0),
+            Complex::new(1.0, 0.0),
+        ];
+        let expected = [0.0_f32, 1.0, 0.0, 1.0];
+        let out = demod_am(&input);
+        assert_eq!(out.len(), 4);
+        for (idx, (&got, &exp)) in out.iter().zip(expected.iter()).enumerate() {
+            assert_approx_eq(got, exp, 1e-6, &format!("AM sample {idx}"));
+        }
+    }
+}
diff --git a/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/fm.rs b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/fm.rs
new file mode 100644
index 0000000..4930e82
--- /dev/null
+++ b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/fm.rs
@@ -0,0 +1,59 @@
+// SPDX-FileCopyrightText: 2025 Stanislaw Grams <stanislawgrams@gmail.com>
+//
+// SPDX-License-Identifier: BSD-2-Clause
+
+use num_complex::Complex;
+
+use super::math::demod_fm_with_prev;
+
+/// FM quadrature discriminator: instantaneous frequency via `arg(s[n] * conj(s[n-1]))`.
+pub(super) fn demod_fm(samples: &[Complex<f32>]) -> Vec<f32> {
+    let mut prev = None;
+    demod_fm_with_prev(samples, &mut prev)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::demod_fm;
+    use num_complex::Complex;
+
+    fn complex_tone(freq_norm: f32, len: usize) -> Vec<Complex<f32>> {
+        use std::f32::consts::TAU;
+
+        (0..len)
+            .map(|n| Complex::from_polar(1.0, TAU * freq_norm * n as f32))
+            .collect()
+    }
+
+    fn assert_approx_eq(a: f32, b: f32, tol: f32, label: &str) {
+        assert!(
+            (a - b).abs() <= tol,
+            "{}: expected {} ≈ {} (tol {})",
+            label,
+            a,
+            b,
+            tol
+        );
+    }
+
+    #[test]
+    fn test_fm_tone_frequency() {
+        let input = complex_tone(0.25, 16);
+        let out = demod_fm(&input);
+        assert_eq!(out.len(), 16);
+        assert_approx_eq(out[0], 0.0, 1e-6, "FM tone sample 0");
+        for (idx, &sample) in out.iter().enumerate().skip(1) {
+            assert_approx_eq(sample, 0.5, 0.01, &format!("FM tone sample {idx}"));
+        }
+    }
+
+    #[test]
+    fn test_fm_silence_is_zero() {
+        let input: Vec<Complex<f32>> = (0..8).map(|_| Complex::new(1.0, 0.0)).collect();
+        let out = demod_fm(&input);
+        assert_eq!(out.len(), 8);
+        for (idx, &value) in out.iter().enumerate() {
+            assert_approx_eq(value, 0.0, 1e-6, &format!("FM silence sample {idx}"));
+        }
+    }
+}
diff --git a/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/math.rs b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/math.rs
new file mode 100644
index 0000000..2d54fef
--- /dev/null
+++ b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/math.rs
@@ -0,0 +1,87 @@
+// SPDX-FileCopyrightText: 2025 Stanislaw Grams <stanislawgrams@gmail.com>
+//
+// SPDX-License-Identifier: BSD-2-Clause
+
+use num_complex::Complex;
+
+#[inline]
+pub(super) fn fast_atan2(y: f32, x: f32) -> f32 {
+    if x == 0.0 {
+        if y > 0.0 {
+            return std::f32::consts::FRAC_PI_2;
+        }
+        if y < 0.0 {
+            return -std::f32::consts::FRAC_PI_2;
+        }
+        return 0.0;
+    }
+
+    #[inline]
+    fn fast_atan(z: f32) -> f32 {
+        let abs_z = z.abs();
+        if abs_z <= 1.0 {
+            z * (std::f32::consts::FRAC_PI_4 + 0.273 * (1.0 - abs_z))
+        } else {
+            let inv = 1.0 / z;
+            let base = inv * (std::f32::consts::FRAC_PI_4 + 0.273 * (1.0 - inv.abs()));
+            if z > 0.0 {
+                std::f32::consts::FRAC_PI_2 - base
+            } else {
+                -std::f32::consts::FRAC_PI_2 - base
+            }
+        }
+    }
+
+    if x > 0.0 {
+        fast_atan(y / x)
+    } else if x < 0.0 {
+        if y >= 0.0 {
+            fast_atan(y / x) + std::f32::consts::PI
+        } else {
+            fast_atan(y / x) - std::f32::consts::PI
+        }
+    } else {
+        0.0
+    }
+}
+
+/// FM quadrature discriminator: instantaneous frequency via `arg(s[n] * conj(s[n-1]))`.
+/// Output is in radians/sample, scaled by `1/π` to normalize to `[-1, 1]`.
+pub(super) fn demod_fm_with_prev(
+    samples: &[Complex<f32>],
+    prev: &mut Option<Complex<f32>>,
+) -> Vec<f32> {
+    if samples.is_empty() {
+        return Vec::new();
+    }
+
+    let inv_pi = std::f32::consts::FRAC_1_PI;
+    let mut output = Vec::with_capacity(samples.len());
+
+    if let Some(prev_sample) = prev.as_ref().copied() {
+        let product = samples[0] * prev_sample.conj();
+        output.push(fast_atan2(product.im, product.re) * inv_pi);
+    } else {
+        output.push(0.0);
+    }
+
+    let mut idx = 1usize;
+
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    if std::arch::is_x86_feature_detected!("avx2") {
+        idx = super::math_x86::demod_fm_body_avx2(samples, idx, inv_pi, &mut output);
+    }
+
+    #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+    {
+        idx = super::math_arm::demod_fm_body_neon(samples, idx, inv_pi, &mut output);
+    }
+
+    for sample_idx in idx..samples.len() {
+        let product = samples[sample_idx] * samples[sample_idx - 1].conj();
+        output.push(fast_atan2(product.im, product.re) * inv_pi);
+    }
+
+    *prev = samples.last().copied();
+    output
+}
diff --git a/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/math_arm.rs b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/math_arm.rs
new file mode 100644
index 0000000..254192c
--- /dev/null
+++ b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/math_arm.rs
@@ -0,0 +1,17 @@
+// SPDX-FileCopyrightText: 2025 Stanislaw Grams <stanislawgrams@gmail.com>
+//
+// SPDX-License-Identifier: BSD-2-Clause
+
+#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+use num_complex::Complex;
+
+/// Placeholder hook for future ARM/NEON FM discriminator vectorization.
+#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+pub(super) fn demod_fm_body_neon(
+    _samples: &[Complex<f32>],
+    start: usize,
+    _inv_pi: f32,
+    _output: &mut Vec<f32>,
+) -> usize {
+    start
+}
diff --git a/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/math_x86.rs b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/math_x86.rs
new file mode 100644
index 0000000..2814a73
--- /dev/null
+++ b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/math_x86.rs
@@ -0,0 +1,136 @@
+// SPDX-FileCopyrightText: 2025 Stanislaw Grams <stanislawgrams@gmail.com>
+//
+// SPDX-License-Identifier: BSD-2-Clause
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+use num_complex::Complex;
+
+/// 7th-order minimax atan approximation for |z| <= 1.
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[target_feature(enable = "avx2")]
+unsafe fn atan_poly_avx2(z: std::arch::x86_64::__m256) -> std::arch::x86_64::__m256 {
+    #[cfg(target_arch = "x86")]
+    use std::arch::x86::*;
+    #[cfg(target_arch = "x86_64")]
+    use std::arch::x86_64::*;
+
+    let c0 = _mm256_set1_ps(0.999_999_5_f32);
+    let c1 = _mm256_set1_ps(-0.333_326_1_f32);
+    let c2 = _mm256_set1_ps(0.199_777_1_f32);
+    let c3 = _mm256_set1_ps(-0.138_776_8_f32);
+
+    let z2 = _mm256_mul_ps(z, z);
+    let p = _mm256_add_ps(c2, _mm256_mul_ps(z2, c3));
+    let p = _mm256_add_ps(c1, _mm256_mul_ps(z2, p));
+    let p = _mm256_add_ps(c0, _mm256_mul_ps(z2, p));
+    _mm256_mul_ps(z, p)
+}
+
+/// Branchless AVX2 atan2 using argument reduction and polynomial evaluation.
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[target_feature(enable = "avx2")]
+unsafe fn fast_atan2_8_avx2(
+    y: std::arch::x86_64::__m256,
+    x: std::arch::x86_64::__m256,
+) -> std::arch::x86_64::__m256 {
+    #[cfg(target_arch = "x86")]
+    use std::arch::x86::*;
+    #[cfg(target_arch = "x86_64")]
+    use std::arch::x86_64::*;
+
+    let abs_mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7FFF_FFFF_u32 as i32));
+    let sign_mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x8000_0000_u32 as i32));
+    let pi = _mm256_set1_ps(std::f32::consts::PI);
+    let pi_2 = _mm256_set1_ps(std::f32::consts::FRAC_PI_2);
+
+    let abs_y = _mm256_and_ps(y, abs_mask);
+    let abs_x = _mm256_and_ps(x, abs_mask);
+
+    let swap_mask = _mm256_cmp_ps(abs_y, abs_x, _CMP_GT_OS);
+    let num = _mm256_blendv_ps(y, x, swap_mask);
+    let den = _mm256_blendv_ps(x, y, swap_mask);
+
+    let eps = _mm256_set1_ps(1.0e-30);
+    let safe_den = _mm256_or_ps(
+        den,
+        _mm256_and_ps(_mm256_cmp_ps(den, _mm256_setzero_ps(), _CMP_EQ_OQ), eps),
+    );
+    let atan_input = _mm256_div_ps(num, safe_den);
+    let mut result = atan_poly_avx2(atan_input);
+
+    let adj = _mm256_sub_ps(
+        _mm256_or_ps(pi_2, _mm256_and_ps(atan_input, sign_mask)),
+        result,
+    );
+    result = _mm256_blendv_ps(result, adj, swap_mask);
+
+    let x_sign_mask = _mm256_castsi256_ps(_mm256_srai_epi32(_mm256_castps_si256(x), 31));
+    let correction = _mm256_and_ps(_mm256_xor_ps(pi, _mm256_and_ps(sign_mask, y)), x_sign_mask);
+    _mm256_add_ps(result, correction)
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+pub(super) fn demod_fm_body_avx2(
+    samples: &[Complex<f32>],
+    start: usize,
+    inv_pi: f32,
+    output: &mut Vec<f32>,
+) -> usize {
+    unsafe { demod_fm_body_avx2_impl(samples, start, inv_pi, output) }
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[target_feature(enable = "avx2")]
+unsafe fn demod_fm_body_avx2_impl(
+    samples: &[Complex<f32>],
+    start: usize,
+    inv_pi: f32,
+    output: &mut Vec<f32>,
+) -> usize {
+    #[cfg(target_arch = "x86")]
+    use std::arch::x86::*;
+    #[cfg(target_arch = "x86_64")]
+    use std::arch::x86_64::*;
+
+    let len = samples.len();
+    let mut idx = start;
+    let mut cur_re = [0.0_f32; 8];
+    let mut cur_im = [0.0_f32; 8];
+    let mut prev_re = [0.0_f32; 8];
+    let mut prev_im = [0.0_f32; 8];
+    let mut angles = [0.0_f32; 8];
+    let inv_pi_v = _mm256_set1_ps(inv_pi);
+
+    while idx + 8 <= len {
+        for lane in 0..8 {
+            let cur = samples[idx + lane];
+            let prev = samples[idx + lane - 1];
+            cur_re[lane] = cur.re;
+            cur_im[lane] = cur.im;
+            prev_re[lane] = prev.re;
+            prev_im[lane] = prev.im;
+        }
+
+        let cur_re_v = _mm256_loadu_ps(cur_re.as_ptr());
+        let cur_im_v = _mm256_loadu_ps(cur_im.as_ptr());
+        let prev_re_v = _mm256_loadu_ps(prev_re.as_ptr());
+        let prev_im_v = _mm256_loadu_ps(prev_im.as_ptr());
+
+        let re_v = _mm256_add_ps(
+            _mm256_mul_ps(cur_re_v, prev_re_v),
+            _mm256_mul_ps(cur_im_v, prev_im_v),
+        );
+        let im_v = _mm256_sub_ps(
+            _mm256_mul_ps(cur_im_v, prev_re_v),
+            _mm256_mul_ps(cur_re_v, prev_im_v),
+        );
+
+        let angle_v = _mm256_mul_ps(fast_atan2_8_avx2(im_v, re_v), inv_pi_v);
+        _mm256_storeu_ps(angles.as_mut_ptr(), angle_v);
+        output.extend_from_slice(&angles);
+
+        idx += 8;
+    }
+
+    idx
+}
diff --git a/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/ssb.rs b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/ssb.rs
new file mode 100644
index 0000000..fa7b997
--- /dev/null
+++ b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/ssb.rs
@@ -0,0 +1,78 @@
+// SPDX-FileCopyrightText: 2025 Stanislaw Grams <stanislawgrams@gmail.com>
+//
+// SPDX-License-Identifier: BSD-2-Clause
+
+use num_complex::Complex;
+
+/// USB demodulator: take the real part of each IQ sample.
+pub(super) fn demod_usb(samples: &[Complex<f32>]) -> Vec<f32> {
+    samples.iter().map(|sample| sample.re).collect()
+}
+
+/// LSB demodulator: mixing is handled upstream by negating `channel_if_hz`.
+pub(super) fn demod_lsb(samples: &[Complex<f32>]) -> Vec<f32> {
+    samples.iter().map(|sample| sample.re).collect()
+}
+
+/// CW demodulator: take the real part of each baseband IQ sample.
+pub(super) fn demod_cw(samples: &[Complex<f32>]) -> Vec<f32> {
+    samples.iter().map(|sample| sample.re).collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{demod_cw, demod_lsb, demod_usb};
+    use num_complex::Complex;
+
+    fn assert_approx_eq(a: f32, b: f32, tol: f32, label: &str) {
+        assert!(
+            (a - b).abs() <= tol,
+            "{}: expected {} ≈ {} (tol {})",
+            label,
+            a,
+            b,
+            tol
+        );
+    }
+
+    #[test]
+    fn test_usb_passthrough_takes_real_part() {
+        let input = vec![
+            Complex::new(1.0_f32, 2.0),
+            Complex::new(3.0, 4.0),
+            Complex::new(-1.0, 0.0),
+            Complex::new(0.0, -1.0),
+        ];
+        let expected = vec![1.0_f32, 3.0, -1.0, 0.0];
+
+        assert_eq!(demod_usb(&input), expected);
+        assert_eq!(demod_usb(&input), expected);
+    }
+
+    #[test]
+    fn test_lsb_takes_real_part() {
+        let input = vec![
+            Complex::new(1.0_f32, 2.0),
+            Complex::new(3.0, 4.0),
+            Complex::new(-1.0, 0.0),
+            Complex::new(0.0, -1.0),
+        ];
+        let expected = vec![1.0_f32, 3.0, -1.0, 0.0];
+
+        assert_eq!(demod_lsb(&input), expected);
+    }
+
+    #[test]
+    fn test_cw_takes_real_part() {
+        let input = vec![
+            Complex::new(3.0_f32, 4.0),
+            Complex::new(0.0, 0.0),
+            Complex::new(1.0, 0.0),
+        ];
+        let out = demod_cw(&input);
+        assert_eq!(out.len(), 3);
+        assert_approx_eq(out[0], 3.0, 1e-6, "CW sample 0");
+        assert_approx_eq(out[1], 0.0, 1e-6, "CW sample 1");
+        assert_approx_eq(out[2], 1.0, 1e-6, "CW sample 2");
+    }
+}
diff --git a/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/wfm.rs b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/wfm.rs
new file mode 100644
index 0000000..bb2833f
--- /dev/null
+++ b/src/trx-server/trx-backend/trx-backend-soapysdr/src/demod/wfm.rs
@@ -0,0 +1,1125 @@
+// SPDX-FileCopyrightText: 2025 Stanislaw Grams <stanislawgrams@gmail.com>
+//
+// SPDX-License-Identifier: BSD-2-Clause
+
+use num_complex::Complex;
+use trx_core::rig::state::RdsData;
+use trx_rds::RdsDecoder;
+
+use super::{math::demod_fm_with_prev, DcBlocker};
+
+const RDS_SUBCARRIER_HZ: f32 = 57_000.0;
+const RDS_BPF_Q: f32 = 10.0;
+/// Pilot tone frequency (Hz).
+const PILOT_HZ: f32 = 19_000.0;
+/// Audio bandwidth for WFM (Hz).
+const AUDIO_BW_HZ: f32 = 18_000.0;
+/// Stereo L-R subchannel bandwidth for WFM (Hz).
+const STEREO_DIFF_BW_HZ: f32 = AUDIO_BW_HZ;
+/// Stage 1 Butterworth Q.
+const BW4_Q1: f32 = 0.5412;
+/// Stage 2 Butterworth Q.
+const BW4_Q2: f32 = 1.3066;
+/// Q for the 19 kHz pilot notch.
+const PILOT_NOTCH_Q: f32 = 5.0;
+/// Narrow 19 kHz band-pass used to derive zero-crossings for switching stereo demod.
+const PILOT_BPF_Q: f32 = 20.0;
+/// Fixed phase trim on the recovered L-R channel.
+const STEREO_SEPARATION_PHASE_TRIM: f32 = 0.0;
+/// Fixed gain trim on the recovered L-R channel.
+const STEREO_SEPARATION_GAIN: f32 = 1.000;
+/// Extra headroom in the stereo matrix.
+const STEREO_MATRIX_GAIN: f32 = 1.20;
+/// Stereo detection runs every N composite samples.
+const STEREO_DETECT_DECIMATION: u32 = 16;
+
+const DENOISE_BANDS: usize = 6;
+const DENOISE_CENTERS: [f32; DENOISE_BANDS] = [250.0, 800.0, 2500.0, 5500.0, 10000.0, 16000.0];
+const DENOISE_Q: [f32; DENOISE_BANDS] = [0.3, 0.35, 0.4, 0.5, 0.6, 0.7];
+const DENOISE_NOISE_SMOOTH_HZ: f32 = 10.0;
+const DENOISE_SIGNAL_SMOOTH_HZ: f32 = 30.0;
+const DENOISE_BETA: f32 = 1.0;
+const DENOISE_ALPHA: f32 = 0.5;
+const DENOISE_FLOOR: f32 = 1e-10;
+const DENOISE_KNEE: f32 = 4.0;
+const STEREO_DIFF_DC_R: f32 = 0.9995;
+const WFM_RESAMP_TAPS: usize = 32;
+const WFM_RESAMP_PHASES: usize = 64;
+
+fn build_wfm_resample_bank(cutoff: f32) -> [[f32; WFM_RESAMP_TAPS]; WFM_RESAMP_PHASES] {
+    let mut bank = [[0.0; WFM_RESAMP_TAPS]; WFM_RESAMP_PHASES];
+    let anchor = (WFM_RESAMP_TAPS / 2 - 1) as f32;
+    for (phase_idx, phase) in bank.iter_mut().enumerate() {
+        let frac = phase_idx as f32 / WFM_RESAMP_PHASES as f32;
+        let center = anchor + frac;
+        let mut sum = 0.0_f32;
+        for (tap_idx, coeff) in phase.iter_mut().enumerate() {
+            let x = tap_idx as f32 - center;
+            let sinc = if x.abs() < 1e-6 {
+                cutoff
+            } else {
+                let arg = std::f32::consts::PI * x * cutoff;
+                arg.sin() / (std::f32::consts::PI * x)
+            };
+            let window = if WFM_RESAMP_TAPS == 1 {
+                1.0
+            } else {
+                let pos = tap_idx as f32 / (WFM_RESAMP_TAPS - 1) as f32;
+                let tw = 2.0 * std::f32::consts::PI * pos;
+                0.35875 - 0.48829 * tw.cos() + 0.14128 * (2.0 * tw).cos()
+                    - 0.01168 * (3.0 * tw).cos()
+            };
+            *coeff = sinc * window;
+            sum += *coeff;
+        }
+        if sum.abs() > 1e-9 {
+            let inv = 1.0 / sum;
+            for coeff in phase.iter_mut() {
+                *coeff *= inv;
+            }
+        }
+    }
+    bank
+}
+
+#[inline]
+fn polyphase_resample_ring(
+    hist: &[f32; WFM_RESAMP_TAPS],
+    pos: usize,
+    bank: &[[f32; WFM_RESAMP_TAPS]; WFM_RESAMP_PHASES],
+    frac: f32,
+) -> f32 {
+    let phase = (frac.clamp(0.0, 0.999_999) * WFM_RESAMP_PHASES as f32).round() as usize;
+    let phase = phase.min(WFM_RESAMP_PHASES - 1);
+    let coeffs = &bank[phase];
+    let mut acc = 0.0_f32;
+    let mask = WFM_RESAMP_TAPS - 1;
+    for tap in 0..WFM_RESAMP_TAPS {
+        acc += hist[(pos + tap) & mask] * coeffs[tap];
+    }
+    acc
+}
+
+#[derive(Debug, Clone)]
+struct OnePoleLowPass {
+    alpha: f32,
+    y: f32,
+}
+
+#[derive(Debug, Clone)]
+struct BiquadBandPass {
+    b0: f32,
+    b1: f32,
+    b2: f32,
+    a1: f32,
+    a2: f32,
+    x1: f32,
+    x2: f32,
+    y1: f32,
+    y2: f32,
+}
+
+impl BiquadBandPass {
+    fn new(sample_rate: f32, center_hz: f32, q: f32) -> Self {
+        let sr = sample_rate.max(1.0);
+        let center = center_hz.clamp(100.0, sr * 0.45);
+        let q = q.max(0.2);
+        let w0 = 2.0 * std::f32::consts::PI * center / sr;
+        let alpha = w0.sin() / (2.0 * q);
+        let cos_w0 = w0.cos();
+
+        let a0 = 1.0 + alpha;
+        let inv_a0 = 1.0 / a0;
+
+        Self {
+            b0: alpha * inv_a0,
+            b1: 0.0,
+            b2: -alpha * inv_a0,
+            a1: (-2.0 * cos_w0) * inv_a0,
+            a2: (1.0 - alpha) * inv_a0,
+            x1: 0.0,
+            x2: 0.0,
+            y1: 0.0,
+            y2: 0.0,
+        }
+    }
+
+    fn process(&mut self, x: f32) -> f32 {
+        let y = self.b0 * x + self.b1 * self.x1 + self.b2 * self.x2
+            - self.a1 * self.y1
+            - self.a2 * self.y2;
+        self.x2 = self.x1;
+        self.x1 = x;
+        self.y2 = self.y1;
+        self.y1 = y;
+        y
+    }
+
+    fn reset(&mut self) {
+        self.x1 = 0.0;
+        self.x2 = 0.0;
+        self.y1 = 0.0;
+        self.y2 = 0.0;
+    }
+}
+
+#[derive(Debug, Clone)]
+struct BiquadLowPass {
+    b0: f32,
+    b1: f32,
+    b2: f32,
+    a1: f32,
+    a2: f32,
+    x1: f32,
+    x2: f32,
+    y1: f32,
+    y2: f32,
+}
+
+impl BiquadLowPass {
+    fn new(sample_rate: f32, cutoff_hz: f32, q: f32) -> Self {
+        let sr = sample_rate.max(1.0);
+        let fc = cutoff_hz.clamp(1.0, sr * 0.45);
+        let q = q.max(0.1);
+        let w0 = 2.0 * std::f32::consts::PI * fc / sr;
+        let alpha = w0.sin() / (2.0 * q);
+        let cos_w0 = w0.cos();
+        let a0_inv = 1.0 / (1.0 + alpha);
+        let b0 = (1.0 - cos_w0) * 0.5 * a0_inv;
+        let b1 = (1.0 - cos_w0) * a0_inv;
+        let b2 = b0;
+        let a1 = -2.0 * cos_w0 * a0_inv;
+        let a2 = (1.0 - alpha) * a0_inv;
+        Self {
+            b0,
+            b1,
+            b2,
+            a1,
+            a2,
+            x1: 0.0,
+            x2: 0.0,
+            y1: 0.0,
+            y2: 0.0,
+        }
+    }
+
+    fn process(&mut self, x: f32) -> f32 {
+        let y = self.b0 * x + self.b1 * self.x1 + self.b2 * self.x2
+            - self.a1 * self.y1
+            - self.a2 * self.y2;
+        self.x2 = self.x1;
+        self.x1 = x;
+        self.y2 = self.y1;
+        self.y1 = y;
+        y
+    }
+
+    fn reset(&mut self) {
+        self.x1 = 0.0;
+        self.x2 = 0.0;
+        self.y1 = 0.0;
+        self.y2 = 0.0;
+    }
+}
+
+#[derive(Debug, Clone)]
+struct BiquadNotch {
+    b0: f32,
+    b1: f32,
+    b2: f32,
+    a1: f32,
+    a2: f32,
+    x1: f32,
+    x2: f32,
+    y1: f32,
+    y2: f32,
+}
+
+impl BiquadNotch {
+    fn new(sample_rate: f32, center_hz: f32, q: f32) -> Self {
+        let sr = sample_rate.max(1.0);
+        let fc = center_hz.clamp(1.0, sr * 0.45);
+        let q = q.max(0.1);
+        let w0 = 2.0 * std::f32::consts::PI * fc / sr;
+        let alpha = w0.sin() / (2.0 * q);
+        let cos_w0 = w0.cos();
+        let a0_inv = 1.0 / (1.0 + alpha);
+        Self {
+            b0: a0_inv,
+            b1: -2.0 * cos_w0 * a0_inv,
+            b2: a0_inv,
+            a1: -2.0 * cos_w0 * a0_inv,
+            a2: (1.0 - alpha) * a0_inv,
+            x1: 0.0,
+            x2: 0.0,
+            y1: 0.0,
+            y2: 0.0,
+        }
+    }
+
+    fn process(&mut self, x: f32) -> f32 {
+        let y = self.b0 * x + self.b1 * self.x1 + self.b2 * self.x2
+            - self.a1 * self.y1
+            - self.a2 * self.y2;
+        self.x2 = self.x1;
+        self.x1 = x;
+        self.y2 = self.y1;
+        self.y1 = y;
+        y
+    }
+
+    fn reset(&mut self) {
+        self.x1 = 0.0;
+        self.x2 = 0.0;
+        self.y1 = 0.0;
+        self.y2 = 0.0;
+    }
+}
+
+impl OnePoleLowPass {
+    fn new(sample_rate: f32, cutoff_hz: f32) -> Self {
+        let sr = sample_rate.max(1.0);
+        let cutoff = cutoff_hz.clamp(1.0, sr * 0.49);
+        let dt = 1.0 / sr;
+        let rc = 1.0 / (2.0 * std::f32::consts::PI * cutoff);
+        let alpha = dt / (rc + dt);
+        Self { alpha, y: 0.0 }
+    }
+
+    fn process(&mut self, x: f32) -> f32 {
+        self.y += self.alpha * (x - self.y);
+        self.y
+    }
+
+    fn reset(&mut self) {
+        self.y = 0.0;
+    }
+}
+
+#[derive(Debug, Clone)]
+struct Deemphasis {
+    alpha: f32,
+    y: f32,
+}
+
+impl Deemphasis {
+    fn new(sample_rate: f32, tau_us: f32) -> Self {
+        let sr = sample_rate.max(1.0);
+        let tau = tau_us.max(1.0) * 1e-6;
+        let alpha = 1.0 - (-1.0 / (sr * tau)).exp();
+        Self { alpha, y: 0.0 }
+    }
+
+    fn process(&mut self, x: f32) -> f32 {
+        self.y += self.alpha * (x - self.y);
+        self.y
+    }
+
+    fn reset(&mut self) {
+        self.y = 0.0;
+    }
+}
+
+#[derive(Debug, Clone)]
+struct DenoiseSubband {
+    sum_bp: BiquadBandPass,
+    diff_i_bp: BiquadBandPass,
+    diff_q_bp: BiquadBandPass,
+    noise_lp: OnePoleLowPass,
+    diff_lp: OnePoleLowPass,
+    sum_lp: OnePoleLowPass,
+}
+
+impl DenoiseSubband {
+    fn new(audio_rate: f32, center_hz: f32, q: f32) -> Self {
+        Self {
+            sum_bp: BiquadBandPass::new(audio_rate, center_hz, q),
+            diff_i_bp: BiquadBandPass::new(audio_rate, center_hz, q),
+            diff_q_bp: BiquadBandPass::new(audio_rate, center_hz, q),
+            noise_lp: OnePoleLowPass::new(audio_rate, DENOISE_NOISE_SMOOTH_HZ),
+            diff_lp: OnePoleLowPass::new(audio_rate, DENOISE_SIGNAL_SMOOTH_HZ),
+            sum_lp: OnePoleLowPass::new(audio_rate, DENOISE_SIGNAL_SMOOTH_HZ),
+        }
+    }
+
+    #[inline]
+    fn process(&mut self, sum: f32, diff_i: f32, diff_q: f32) -> (f32, f32) {
+        let sum_band = self.sum_bp.process(sum);
+        let diff_i_band = self.diff_i_bp.process(diff_i);
+        let diff_q_band = self.diff_q_bp.process(diff_q);
+
+        let noise_power = self
+            .noise_lp
+            .process(diff_q_band * diff_q_band)
+            .max(DENOISE_FLOOR);
+        let diff_power =
+            (self.diff_lp.process(diff_i_band * diff_i_band) - DENOISE_BETA * noise_power).max(0.0);
+        let sum_power =
+            (self.sum_lp.process(sum_band * sum_band) - DENOISE_ALPHA * noise_power).max(0.0);
+
+        let hden = (diff_power / noise_power).sqrt().min(1.0);
+        let diff_snr = diff_power / noise_power;
+        let weight_a = diff_snr / (diff_snr + DENOISE_KNEE);
+
+        let noise_indicator = (noise_power / (diff_power + DENOISE_FLOOR)).min(1.0);
+        let weight_b_raw = diff_power / (sum_power + diff_power + DENOISE_FLOOR);
+        let weight_b = 1.0 - noise_indicator * (1.0 - weight_b_raw);
+
+        let gain = hden * weight_a * weight_b;
+        let band_energy = self.diff_lp.y.max(DENOISE_FLOOR);
+        (gain, band_energy)
+    }
+
+    fn reset(&mut self) {
+        self.sum_bp.reset();
+        self.diff_i_bp.reset();
+        self.diff_q_bp.reset();
+        self.noise_lp.reset();
+        self.diff_lp.reset();
+        self.sum_lp.reset();
+    }
+}
+
+#[derive(Debug, Clone)]
+struct StereoDenoise {
+    bands: [DenoiseSubband; DENOISE_BANDS],
+    enabled: bool,
+}
+
+impl StereoDenoise {
+    fn new(audio_rate: f32) -> Self {
+        let bands = std::array::from_fn(|idx| {
+            DenoiseSubband::new(audio_rate, DENOISE_CENTERS[idx], DENOISE_Q[idx])
+        });
+        Self {
+            bands,
+            enabled: true,
+        }
+    }
+
+    #[inline]
+    fn process(&mut self, sum: f32, diff_i: f32, diff_q: f32) -> f32 {
+        if !self.enabled {
+            return diff_i;
+        }
+
+        let mut gain_sum = 0.0_f32;
+        let mut weight_sum = 0.0_f32;
+        for band in &mut self.bands {
+            let (gain, weight) = band.process(sum, diff_i, diff_q);
+            gain_sum += gain * weight;
+            weight_sum += weight;
+        }
+
+        let broadband_gain = if weight_sum > DENOISE_FLOOR {
+            (gain_sum / weight_sum).clamp(0.0, 1.0)
+        } else {
+            1.0
+        };
+        diff_i * broadband_gain
+    }
+
+    fn reset(&mut self) {
+        for band in &mut self.bands {
+            band.reset();
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct WfmStereoDecoder {
+    output_channels: usize,
+    stereo_enabled: bool,
+    rds_decoder: RdsDecoder,
+    rds_bpf: BiquadBandPass,
+    rds_dc: DcBlocker,
+    prev_iq: Option<Complex<f32>>,
+    nco_cos: f32,
+    nco_sin: f32,
+    nco_inc_cos: f32,
+    nco_inc_sin: f32,
+    nco_counter: u32,
+    pilot_i_lp: OnePoleLowPass,
+    pilot_q_lp: OnePoleLowPass,
+    pilot_abs_lp: OnePoleLowPass,
+    pilot_bpf: BiquadBandPass,
+    sum_lpf1: BiquadLowPass,
+    sum_lpf2: BiquadLowPass,
+    sum_notch: BiquadNotch,
+    diff_pilot_notch: BiquadNotch,
+    diff_lpf1: BiquadLowPass,
+    diff_lpf2: BiquadLowPass,
+    diff_q_lpf1: BiquadLowPass,
+    diff_q_lpf2: BiquadLowPass,
+    diff_dc: DcBlocker,
+    diff_q_dc: DcBlocker,
+    dc_m: DcBlocker,
+    dc_l: DcBlocker,
+    dc_r: DcBlocker,
+    deemph_m: Deemphasis,
+    deemph_l: Deemphasis,
+    deemph_r: Deemphasis,
+    stereo_detect_level: f32,
+    stereo_detected: bool,
+    detect_counter: u32,
+    detect_pilot_mag_acc: f32,
+    detect_pilot_abs_acc: f32,
+    fm_gain: f32,
+    resample_bank: [[f32; WFM_RESAMP_TAPS]; WFM_RESAMP_PHASES],
+    sum_hist: [f32; WFM_RESAMP_TAPS],
+    diff_hist: [f32; WFM_RESAMP_TAPS],
+    diff_q_hist: [f32; WFM_RESAMP_TAPS],
+    hist_pos: usize,
+    denoise: StereoDenoise,
+    prev_blend: f32,
+    output_phase_inc: f64,
+    output_phase: f64,
+}
+
+impl WfmStereoDecoder {
+    pub fn new(
+        composite_rate: u32,
+        audio_rate: u32,
+        output_channels: usize,
+        stereo_enabled: bool,
+        deemphasis_us: u32,
+    ) -> Self {
+        let composite_rate_f = composite_rate.max(1) as f32;
+        let output_phase_inc = audio_rate.max(1) as f64 / composite_rate.max(1) as f64;
+        let deemphasis_us = deemphasis_us as f32;
+        Self {
+            output_channels: output_channels.max(1),
+            stereo_enabled,
+            rds_decoder: RdsDecoder::new(composite_rate),
+            rds_bpf: BiquadBandPass::new(composite_rate_f, RDS_SUBCARRIER_HZ, RDS_BPF_Q),
+            rds_dc: DcBlocker::new(0.995),
+            prev_iq: None,
+            nco_cos: 1.0,
+            nco_sin: 0.0,
+            nco_inc_cos: (2.0 * std::f32::consts::PI * PILOT_HZ / composite_rate_f).cos(),
+            nco_inc_sin: (2.0 * std::f32::consts::PI * PILOT_HZ / composite_rate_f).sin(),
+            nco_counter: 0,
+            pilot_i_lp: OnePoleLowPass::new(composite_rate_f, 400.0),
+            pilot_q_lp: OnePoleLowPass::new(composite_rate_f, 400.0),
+            pilot_abs_lp: OnePoleLowPass::new(composite_rate_f, 400.0),
+            pilot_bpf: BiquadBandPass::new(composite_rate_f, PILOT_HZ, PILOT_BPF_Q),
+            sum_lpf1: BiquadLowPass::new(composite_rate_f, AUDIO_BW_HZ, BW4_Q1),
+            sum_lpf2: BiquadLowPass::new(composite_rate_f, AUDIO_BW_HZ, BW4_Q2),
+            sum_notch: BiquadNotch::new(composite_rate_f, PILOT_HZ, PILOT_NOTCH_Q),
+            diff_pilot_notch: BiquadNotch::new(composite_rate_f, PILOT_HZ, PILOT_NOTCH_Q),
+            diff_lpf1: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW4_Q1),
+            diff_lpf2: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW4_Q2),
+            diff_q_lpf1: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW4_Q1),
+            diff_q_lpf2: BiquadLowPass::new(composite_rate_f, STEREO_DIFF_BW_HZ, BW4_Q2),
+            diff_dc: DcBlocker::new(STEREO_DIFF_DC_R),
+            diff_q_dc: DcBlocker::new(STEREO_DIFF_DC_R),
+            dc_m: DcBlocker::new(0.9999),
+            dc_l: DcBlocker::new(0.9999),
+            dc_r: DcBlocker::new(0.9999),
+            deemph_m: Deemphasis::new(audio_rate.max(1) as f32, deemphasis_us),
+            deemph_l: Deemphasis::new(audio_rate.max(1) as f32, deemphasis_us),
+            deemph_r: Deemphasis::new(audio_rate.max(1) as f32, deemphasis_us),
+            stereo_detect_level: 0.0,
+            stereo_detected: false,
+            detect_counter: 0,
+            detect_pilot_mag_acc: 0.0,
+            detect_pilot_abs_acc: 0.0,
+            fm_gain: composite_rate_f / (2.0 * 75_000.0),
+            resample_bank: build_wfm_resample_bank(audio_rate as f32 / composite_rate_f),
+            sum_hist: [0.0; WFM_RESAMP_TAPS],
+            diff_hist: [0.0; WFM_RESAMP_TAPS],
+            diff_q_hist: [0.0; WFM_RESAMP_TAPS],
+            hist_pos: 0,
+            denoise: StereoDenoise::new(audio_rate.max(1) as f32),
+            prev_blend: 0.0,
+            output_phase_inc,
+            output_phase: 0.0,
+        }
+    }
+
+    pub fn process_iq(&mut self, samples: &[Complex<f32>]) -> Vec<f32> {
+        if samples.is_empty() {
+            return Vec::new();
+        }
+
+        let disc = demod_fm_with_prev(samples, &mut self.prev_iq);
+        let mut output = Vec::with_capacity(
+            ((samples.len() as f64 * self.output_phase_inc).ceil() as usize + 1)
+                * self.output_channels.max(1),
+        );
+        let (trim_sin, trim_cos) = STEREO_SEPARATION_PHASE_TRIM.sin_cos();
+
+        for &disc_sample in &disc {
+            let x = disc_sample * self.fm_gain;
+            let pilot_tone = self.pilot_bpf.process(x);
+
+            let sin_p = self.nco_sin;
+            let cos_p = self.nco_cos;
+            let i = self.pilot_i_lp.process(pilot_tone * cos_p);
+            let q = self.pilot_q_lp.process(pilot_tone * -sin_p);
+            let pilot_mag = (i * i + q * q).sqrt();
+            let inv_mag = 1.0 / (pilot_mag + 1e-12);
+            let err_sin = q * inv_mag;
+            let err_cos = i * inv_mag;
+
+            let new_cos = self.nco_cos * self.nco_inc_cos - self.nco_sin * self.nco_inc_sin;
+            let new_sin = self.nco_cos * self.nco_inc_sin + self.nco_sin * self.nco_inc_cos;
+            self.nco_cos = new_cos;
+            self.nco_sin = new_sin;
+            self.nco_counter += 1;
+            if self.nco_counter >= 1024 {
+                self.nco_counter = 0;
+                let mag = (self.nco_cos * self.nco_cos + self.nco_sin * self.nco_sin).sqrt();
+                let inv = 1.0 / mag;
+                self.nco_cos *= inv;
+                self.nco_sin *= inv;
+            }
+
+            let pilot_abs = self.pilot_abs_lp.process(pilot_tone.abs());
+            self.detect_pilot_mag_acc += pilot_mag;
+            self.detect_pilot_abs_acc += pilot_abs;
+            self.detect_counter += 1;
+            if self.detect_counter >= STEREO_DETECT_DECIMATION {
+                let inv_n = 1.0 / STEREO_DETECT_DECIMATION as f32;
+                let avg_mag = self.detect_pilot_mag_acc * inv_n;
+                let avg_abs = self.detect_pilot_abs_acc * inv_n;
+                let pilot_coherence = (avg_mag / (avg_abs + 1e-4)).clamp(0.0, 1.0);
+                let pilot_lock = ((pilot_coherence - 0.4) / 0.2).clamp(0.0, 1.0);
+                let stereo_drive = (avg_mag * pilot_lock * 120.0).clamp(0.0, 1.0);
+                let detect_coeff = if stereo_drive > self.stereo_detect_level {
+                    0.0008 * STEREO_DETECT_DECIMATION as f32
+                } else {
+                    0.00005 * STEREO_DETECT_DECIMATION as f32
+                };
+                self.stereo_detect_level +=
+                    detect_coeff * (stereo_drive - self.stereo_detect_level);
+                if self.stereo_detected {
+                    if self.stereo_detect_level < 0.22 {
+                        self.stereo_detected = false;
+                    }
+                } else if self.stereo_detect_level > 0.6 {
+                    self.stereo_detected = true;
+                }
+                self.detect_counter = 0;
+                self.detect_pilot_mag_acc = 0.0;
+                self.detect_pilot_abs_acc = 0.0;
+            }
+            let stereo_blend_target = if self.stereo_detected { 1.0 } else { 0.0 };
+
+            let rds_quality = (0.35 + pilot_mag * 20.0).clamp(0.35, 1.0);
+            let rds_clean = self.rds_dc.process(self.rds_bpf.process(x));
+            let _ = self.rds_decoder.process_sample(rds_clean, rds_quality);
+
+            let sum = self.sum_lpf2.process(self.sum_lpf1.process(x));
+
+            let sin_est = sin_p * err_cos + cos_p * err_sin;
+            let cos_est = cos_p * err_cos - sin_p * err_sin;
+            let sin_2p = 2.0 * sin_est * cos_est;
+            let cos_2p = 2.0 * cos_est * cos_est - 1.0;
+            let x_notched = self.diff_pilot_notch.process(x);
+            let diff_i = self.diff_dc.process(
+                self.diff_lpf2
+                    .process(self.diff_lpf1.process(x_notched * (cos_2p * 2.0))),
+            );
+            let diff_q = self.diff_q_dc.process(
+                self.diff_q_lpf2
+                    .process(self.diff_q_lpf1.process(x_notched * (-sin_2p * 2.0))),
+            );
+
+            let pos = self.hist_pos;
+            self.sum_hist[pos] = sum;
+            self.diff_hist[pos] = diff_i;
+            self.diff_q_hist[pos] = diff_q;
+            self.hist_pos = (pos + 1) & (WFM_RESAMP_TAPS - 1);
+
+            let prev_phase = self.output_phase;
+            self.output_phase += self.output_phase_inc;
+            if self.output_phase < 1.0 {
+                self.prev_blend = stereo_blend_target;
+                continue;
+            }
+            self.output_phase -= 1.0;
+
+            let frac = ((1.0 - prev_phase) / self.output_phase_inc) as f32;
+            let ring_pos = self.hist_pos;
+            let sum_i =
+                polyphase_resample_ring(&self.sum_hist, ring_pos, &self.resample_bank, frac);
+            let diff_i =
+                polyphase_resample_ring(&self.diff_hist, ring_pos, &self.resample_bank, frac);
+            let diff_q =
+                polyphase_resample_ring(&self.diff_q_hist, ring_pos, &self.resample_bank, frac);
+            let blend_i =
+                (self.prev_blend + frac * (stereo_blend_target - self.prev_blend)).clamp(0.0, 1.0);
+            self.prev_blend = stereo_blend_target;
+            let diff_i = (diff_i * trim_cos + diff_q * trim_sin) * STEREO_SEPARATION_GAIN;
+            let diff_i = self.denoise.process(sum_i, diff_i, diff_q);
+
+            if self.output_channels >= 2 && self.stereo_enabled {
+                let diff = diff_i * blend_i;
+                let left = self
+                    .dc_l
+                    .process(self.deemph_l.process((sum_i + diff) * STEREO_MATRIX_GAIN))
+                    .clamp(-1.0, 1.0);
+                let right = self
+                    .dc_r
+                    .process(self.deemph_r.process((sum_i - diff) * STEREO_MATRIX_GAIN))
+                    .clamp(-1.0, 1.0);
+                output.push(left);
+                output.push(right);
+            } else {
+                let mono = self
+                    .dc_m
+                    .process(self.deemph_m.process(self.sum_notch.process(sum_i)))
+                    .clamp(-1.0, 1.0);
+                output.push(mono);
+                if self.output_channels >= 2 {
+                    output.push(mono);
+                }
+            }
+        }
+
+        output
+    }
+
+    pub fn set_stereo_enabled(&mut self, enabled: bool) {
+        self.stereo_enabled = enabled;
+    }
+
+    pub fn rds_data(&self) -> Option<RdsData> {
+        self.rds_decoder.snapshot()
+    }
+
+    pub fn reset_rds(&mut self) {
+        self.rds_decoder.reset();
+    }
+
+    pub fn reset_stereo_detect(&mut self) {
+        self.stereo_detect_level = 0.0;
+        self.stereo_detected = false;
+        self.detect_counter = 0;
+        self.detect_pilot_mag_acc = 0.0;
+        self.detect_pilot_abs_acc = 0.0;
+    }
+
+    pub fn reset_demod_state(&mut self) {
+        self.prev_iq = None;
+    }
+
+    pub fn reset_state(&mut self) {
+        self.rds_decoder.reset();
+        self.rds_bpf.reset();
+        self.rds_dc.reset();
+        self.prev_iq = None;
+        self.nco_cos = 1.0;
+        self.nco_sin = 0.0;
+        self.nco_counter = 0;
+        self.pilot_i_lp.reset();
+        self.pilot_q_lp.reset();
+        self.pilot_abs_lp.reset();
+        self.pilot_bpf.reset();
+        self.sum_lpf1.reset();
+        self.sum_lpf2.reset();
+        self.sum_notch.reset();
+        self.diff_pilot_notch.reset();
+        self.diff_lpf1.reset();
+        self.diff_lpf2.reset();
+        self.diff_q_lpf1.reset();
+        self.diff_q_lpf2.reset();
+        self.diff_dc.reset();
+        self.diff_q_dc.reset();
+        self.dc_m.reset();
+        self.dc_l.reset();
+        self.dc_r.reset();
+        self.deemph_m.reset();
+        self.deemph_l.reset();
+        self.deemph_r.reset();
+        self.stereo_detect_level = 0.0;
+        self.stereo_detected = false;
+        self.detect_counter = 0;
+        self.detect_pilot_mag_acc = 0.0;
+        self.detect_pilot_abs_acc = 0.0;
+        self.sum_hist = [0.0; WFM_RESAMP_TAPS];
+        self.diff_hist = [0.0; WFM_RESAMP_TAPS];
+        self.diff_q_hist = [0.0; WFM_RESAMP_TAPS];
+        self.hist_pos = 0;
+        self.denoise.reset();
+        self.prev_blend = 0.0;
+        self.output_phase = 0.0;
+    }
+
+    pub fn set_denoise_enabled(&mut self, enabled: bool) {
+        self.denoise.enabled = enabled;
+    }
+
+    pub fn stereo_detected(&self) -> bool {
+        self.stereo_detected
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_wfm_stereo_separation() {
+        use std::f32::consts::TAU;
+
+        let composite_rate: u32 = 240_000;
+        let audio_rate: u32 = 48_000;
+        let fs = composite_rate as f32;
+        let duration_secs = 0.5_f32;
+        let num_samples = (fs * duration_secs) as usize;
+
+        let audio_freq = 1000.0_f32;
+        let pilot_freq = 19_000.0_f32;
+        let carrier_freq = 38_000.0_f32;
+        let mut composite = vec![0.0_f32; num_samples];
+        for (n, sample) in composite.iter_mut().enumerate() {
+            let t = n as f32 / fs;
+            let audio = (TAU * audio_freq * t).sin();
+            let sum = audio;
+            let diff = audio;
+            let pilot = 0.1 * (TAU * pilot_freq * t).cos();
+            let carrier = (TAU * carrier_freq * t).cos();
+            *sample = sum + pilot + diff * carrier;
+        }
+
+        let peak_composite = 2.1_f32;
+        let deviation_hz = 75_000.0_f32;
+        let mod_index = TAU * deviation_hz / (peak_composite * fs);
+        let mut phase: f32 = 0.0;
+        let mut iq = Vec::with_capacity(num_samples);
+        for &c in &composite {
+            phase += mod_index * c;
+            iq.push(Complex::from_polar(1.0, phase));
+        }
+
+        let mut decoder = WfmStereoDecoder::new(composite_rate, audio_rate, 2, true, 50);
+        let output = decoder.process_iq(&iq);
+
+        let skip_samples = (0.2 * audio_rate as f32) as usize;
+        let stereo_pairs = output.len() / 2;
+        assert!(stereo_pairs > skip_samples + 100);
+
+        let mut l_energy = 0.0_f64;
+        let mut r_energy = 0.0_f64;
+        let mut count = 0_u64;
+        for idx in skip_samples..stereo_pairs {
+            let l = output[2 * idx] as f64;
+            let r = output[2 * idx + 1] as f64;
+            l_energy += l * l;
+            r_energy += r * r;
+            count += 1;
+        }
+        let l_rms = (l_energy / count as f64).sqrt();
+        let r_rms = (r_energy / count as f64).sqrt();
+        let separation_db = if r_rms > 1e-10 {
+            20.0 * (l_rms / r_rms).log10()
+        } else {
+            f64::INFINITY
+        };
+
+        assert!(l_rms > 0.01);
+        assert!(separation_db > 20.0);
+    }
+
+    #[test]
+    fn test_wfm_stereo_separation_multitone() {
+        use std::f32::consts::TAU;
+
+        let composite_rate: u32 = 240_000;
+        let audio_rate: u32 = 48_000;
+        let fs = composite_rate as f32;
+        let duration_secs = 0.8_f32;
+        let num_samples = (fs * duration_secs) as usize;
+        let freqs = [400.0_f32, 2_000.0, 8_000.0, 14_000.0];
+        let pilot_freq = 19_000.0_f32;
+        let carrier_freq = 38_000.0_f32;
+
+        for (label, diff_sign) in [("L-only", 1.0_f32), ("R-only", -1.0_f32)] {
+            let mut composite = vec![0.0_f32; num_samples];
+            for (n, sample) in composite.iter_mut().enumerate() {
+                let t = n as f32 / fs;
+                let audio: f32 = freqs
+                    .iter()
+                    .map(|&freq| (TAU * freq * t).sin())
+                    .sum::<f32>()
+                    / freqs.len() as f32;
+                let sum = audio;
+                let diff = audio * diff_sign;
+                let pilot = 0.1 * (TAU * pilot_freq * t).cos();
+                let carrier = (TAU * carrier_freq * t).cos();
+                *sample = sum + pilot + diff * carrier;
+            }
+
+            let peak_composite = 2.1_f32;
+            let deviation_hz = 75_000.0_f32;
+            let mod_index = TAU * deviation_hz / (peak_composite * fs);
+            let mut phase: f32 = 0.0;
+            let mut iq = Vec::with_capacity(num_samples);
+            for &c in &composite {
+                phase += mod_index * c;
+                iq.push(Complex::from_polar(1.0, phase));
+            }
+
+            let mut decoder = WfmStereoDecoder::new(composite_rate, audio_rate, 2, true, 50);
+            let output = decoder.process_iq(&iq);
+
+            let skip_samples = (0.3 * audio_rate as f32) as usize;
+            let stereo_pairs = output.len() / 2;
+            assert!(
+                stereo_pairs > skip_samples + 100,
+                "{label}: not enough output"
+            );
+
+            let mut active_energy = 0.0_f64;
+            let mut silent_energy = 0.0_f64;
+            let mut count = 0_u64;
+            for idx in skip_samples..stereo_pairs {
+                let l = output[2 * idx] as f64;
+                let r = output[2 * idx + 1] as f64;
+                if diff_sign > 0.0 {
+                    active_energy += l * l;
+                    silent_energy += r * r;
+                } else {
+                    active_energy += r * r;
+                    silent_energy += l * l;
+                }
+                count += 1;
+            }
+            let active_rms = (active_energy / count as f64).sqrt();
+            let silent_rms = (silent_energy / count as f64).sqrt();
+            let separation_db = if silent_rms > 1e-10 {
+                20.0 * (active_rms / silent_rms).log10()
+            } else {
+                f64::INFINITY
+            };
+
+            assert!(active_rms > 0.01, "{label}: no active energy");
+            assert!(separation_db > 15.0, "{label}: separation too low");
+        }
+    }
+
+    #[test]
+    fn test_wfm_no_pilot_stays_mono_detect() {
+        use std::f32::consts::TAU;
+
+        let composite_rate: u32 = 240_000;
+        let audio_rate: u32 = 48_000;
+        let fs = composite_rate as f32;
+        let duration_secs = 0.5_f32;
+        let num_samples = (fs * duration_secs) as usize;
+
+        let audio_freq = 1000.0_f32;
+        let mut composite = vec![0.0_f32; num_samples];
+        for (n, sample) in composite.iter_mut().enumerate() {
+            let t = n as f32 / fs;
+            *sample = (TAU * audio_freq * t).sin();
+        }
+
+        let deviation_hz = 75_000.0_f32;
+        let mod_index = TAU * deviation_hz / fs;
+        let mut phase: f32 = 0.0;
+        let mut iq = Vec::with_capacity(num_samples);
+        for &c in &composite {
+            phase += mod_index * c;
+            iq.push(Complex::from_polar(1.0, phase));
+        }
+
+        let mut decoder = WfmStereoDecoder::new(composite_rate, audio_rate, 2, true, 50);
+        let output = decoder.process_iq(&iq);
+
+        assert!(!decoder.stereo_detected());
+
+        let skip_samples = (0.2 * audio_rate as f32) as usize;
+        let stereo_pairs = output.len() / 2;
+        assert!(stereo_pairs > skip_samples + 100);
+
+        let mut diff_energy = 0.0_f64;
+        let mut count = 0_u64;
+        for idx in skip_samples..stereo_pairs {
+            let l = output[2 * idx] as f64;
+            let r = output[2 * idx + 1] as f64;
+            let d = l - r;
+            diff_energy += d * d;
+            count += 1;
+        }
+        let diff_rms = (diff_energy / count as f64).sqrt();
+        assert!(diff_rms < 0.01);
+    }
+
+    #[test]
+    fn test_denoise_noisy_diff_attenuation() {
+        let audio_rate = 48_000.0;
+        let num_samples = (audio_rate * 0.5) as usize;
+        let mut denoise = StereoDenoise::new(audio_rate);
+
+        let mut rng_state = 12345_u64;
+        let mut next_noise = || -> f32 {
+            rng_state = rng_state.wrapping_mul(6364136223846793005).wrapping_add(1);
+            ((rng_state >> 33) as f32 / (1u64 << 31) as f32) - 1.0
+        };
+
+        let mut input_energy = 0.0_f64;
+        let mut output_energy = 0.0_f64;
+        let skip = (audio_rate * 0.1) as usize;
+
+        for idx in 0..num_samples {
+            let noise_i = next_noise() * 0.5;
+            let noise_q = next_noise() * 0.5;
+            let out = denoise.process(0.0, noise_i, noise_q);
+            if idx >= skip {
+                input_energy += (noise_i as f64) * (noise_i as f64);
+                output_energy += (out as f64) * (out as f64);
+            }
+        }
+
+        let input_rms = (input_energy / (num_samples - skip) as f64).sqrt();
+        let output_rms = (output_energy / (num_samples - skip) as f64).sqrt();
+        let attenuation_db = 20.0 * (input_rms / (output_rms + 1e-20)).log10();
+        assert!(attenuation_db > 6.0);
+    }
+
+    #[test]
+    fn test_denoise_clean_stereo_preservation() {
+        use std::f32::consts::TAU;
+
+        let audio_rate = 48_000.0;
+        let num_samples = (audio_rate * 0.5) as usize;
+        let mut denoise = StereoDenoise::new(audio_rate);
+        let tone_freq = 1000.0;
+
+        let mut input_energy = 0.0_f64;
+        let mut output_energy = 0.0_f64;
+        let skip = (audio_rate * 0.15) as usize;
+
+        for idx in 0..num_samples {
+            let t = idx as f32 / audio_rate;
+            let tone = (TAU * tone_freq * t).sin() * 0.5;
+            let out = denoise.process(tone, tone, 0.0);
+            if idx >= skip {
+                input_energy += (tone as f64) * (tone as f64);
+                output_energy += (out as f64) * (out as f64);
+            }
+        }
+
+        let input_rms = (input_energy / (num_samples - skip) as f64).sqrt();
+        let output_rms = (output_energy / (num_samples - skip) as f64).sqrt();
+        let loss_db = 20.0 * (input_rms / (output_rms + 1e-20)).log10();
+        assert!(loss_db < 4.0);
+    }
+
+    #[test]
+    fn test_denoise_bypass_when_disabled() {
+        let mut denoise = StereoDenoise::new(48_000.0);
+        denoise.enabled = false;
+
+        for &value in &[0.0_f32, 0.5, -0.3, 1.0, -1.0, 0.001] {
+            assert_eq!(denoise.process(0.1, value, 0.2), value);
+        }
+    }
+
+    #[test]
+    fn test_denoise_per_band_selectivity() {
+        use std::f32::consts::TAU;
+
+        let audio_rate = 48_000.0;
+        let num_samples = (audio_rate * 0.5) as usize;
+        let mut denoise = StereoDenoise::new(audio_rate);
+        let low_freq = 400.0;
+
+        let mut rng_state = 67890_u64;
+        let mut next_noise = || -> f32 {
+            rng_state = rng_state.wrapping_mul(6364136223846793005).wrapping_add(1);
+            ((rng_state >> 33) as f32 / (1u64 << 31) as f32) - 1.0
+        };
+
+        let skip = (audio_rate * 0.15) as usize;
+        let mut output_samples = Vec::with_capacity(num_samples - skip);
+
+        for idx in 0..num_samples {
+            let t = idx as f32 / audio_rate;
+            let low_tone = (TAU * low_freq * t).sin() * 0.3;
+            let high_noise = next_noise() * 0.3;
+            let out = denoise.process(low_tone, low_tone + high_noise, high_noise);
+            if idx >= skip {
+                output_samples.push(out);
+            }
+        }
+
+        let mut low_bp = BiquadBandPass::new(audio_rate, low_freq, 2.0);
+        let mut low_energy = 0.0_f64;
+        let mut total_energy = 0.0_f64;
+        for &sample in &output_samples {
+            let low_part = low_bp.process(sample);
+            low_energy += (low_part as f64) * (low_part as f64);
+            total_energy += (sample as f64) * (sample as f64);
+        }
+
+        let low_ratio = low_energy / (total_energy + 1e-20);
+        assert!(low_ratio > 0.3);
+    }
+
+    #[test]
+    fn test_wfm_stereo_separation_with_denoise() {
+        use std::f32::consts::TAU;
+
+        let composite_rate: u32 = 240_000;
+        let audio_rate: u32 = 48_000;
+        let fs = composite_rate as f32;
+        let duration_secs = 0.5_f32;
+        let num_samples = (fs * duration_secs) as usize;
+
+        let audio_freq = 1000.0_f32;
+        let pilot_freq = 19_000.0_f32;
+        let carrier_freq = 38_000.0_f32;
+        let mut composite = vec![0.0_f32; num_samples];
+        for (n, sample) in composite.iter_mut().enumerate() {
+            let t = n as f32 / fs;
+            let audio = (TAU * audio_freq * t).sin();
+            let sum = audio;
+            let diff = audio;
+            let pilot = 0.1 * (TAU * pilot_freq * t).cos();
+            let carrier = (TAU * carrier_freq * t).cos();
+            *sample = sum + pilot + diff * carrier;
+        }
+
+        let peak_composite = 2.1_f32;
+        let deviation_hz = 75_000.0_f32;
+        let mod_index = TAU * deviation_hz / (peak_composite * fs);
+        let mut phase: f32 = 0.0;
+        let mut iq = Vec::with_capacity(num_samples);
+        for &c in &composite {
+            phase += mod_index * c;
+            iq.push(Complex::from_polar(1.0, phase));
+        }
+
+        let mut decoder = WfmStereoDecoder::new(composite_rate, audio_rate, 2, true, 50);
+        let output = decoder.process_iq(&iq);
+
+        let skip_samples = (0.2 * audio_rate as f32) as usize;
+        let stereo_pairs = output.len() / 2;
+        assert!(stereo_pairs > skip_samples + 100);
+
+        let mut l_energy = 0.0_f64;
+        let mut r_energy = 0.0_f64;
+        let mut count = 0_u64;
+        for idx in skip_samples..stereo_pairs {
+            let l = output[2 * idx] as f64;
+            let r = output[2 * idx + 1] as f64;
+            l_energy += l * l;
+            r_energy += r * r;
+            count += 1;
+        }
+        let l_rms = (l_energy / count as f64).sqrt();
+        let r_rms = (r_energy / count as f64).sqrt();
+        let separation_db = if r_rms > 1e-10 {
+            20.0 * (l_rms / r_rms).log10()
+        } else {
+            f64::INFINITY
+        };
+
+        assert!(l_rms > 0.01);
+        assert!(separation_db > 15.0);
+    }
+}
diff --git a/src/trx-server/trx-backend/trx-backend-soapysdr/src/dsp.rs b/src/trx-server/trx-backend/trx-backend-soapysdr/src/dsp.rs
index e0ccb93..5f9b657 100644
--- a/src/trx-server/trx-backend/trx-backend-soapysdr/src/dsp.rs
+++ b/src/trx-server/trx-backend/trx-backend-soapysdr/src/dsp.rs
@@ -11,16 +11,20 @@
 //! approach costs O(M log M) — a significant saving for the tap counts (64+)
 //! and block sizes (4096) used here.
 
+mod channel;
+mod filter;
+
 use std::f32::consts::PI;
 use std::sync::{Arc, Mutex};
 
 use num_complex::Complex;
 use rustfft::num_complex::Complex as FftComplex;
-use rustfft::{Fft, FftPlanner};
+use rustfft::FftPlanner;
 use tokio::sync::broadcast;
-use trx_core::rig::state::{RdsData, RigMode};
+use trx_core::rig::state::RigMode;
 
-use crate::demod::{DcBlocker, Demodulator, SoftAgc, WfmStereoDecoder};
+pub use self::channel::ChannelDsp;
+pub use self::filter::{BlockFirFilter, BlockFirFilterPair, FirFilter};
 
 // ---------------------------------------------------------------------------
 // IQ source abstraction
@@ -73,858 +77,6 @@ impl IqSource for MockIqSource {
     }
 }
 
-// ---------------------------------------------------------------------------
-// Windowed-sinc coefficient generator (shared by FirFilter and BlockFirFilter)
-// ---------------------------------------------------------------------------
-
-fn windowed_sinc_coeffs(cutoff_norm: f32, taps: usize) -> Vec<f32> {
-    assert!(taps >= 1, "FIR filter must have at least 1 tap");
-    let m = (taps - 1) as f32;
-    let mut coeffs = Vec::with_capacity(taps);
-    for i in 0..taps {
-        let x = i as f32 - m / 2.0;
-        let sinc = if x == 0.0 {
-            2.0 * cutoff_norm
-        } else {
-            (2.0 * PI * cutoff_norm * x).sin() / (PI * x)
-        };
-        let window = if taps == 1 {
-            1.0
-        } else {
-            0.5 * (1.0 - (2.0 * PI * i as f32 / m).cos())
-        };
-        coeffs.push(sinc * window);
-    }
-    let sum: f32 = coeffs.iter().sum();
-    if sum.abs() > 1e-12 {
-        let inv = 1.0 / sum;
-        for c in &mut coeffs {
-            *c *= inv;
-        }
-    }
-    coeffs
-}
-
-// ---------------------------------------------------------------------------
-// FIR low-pass filter (sample-by-sample, kept for unit tests)
-// ---------------------------------------------------------------------------
-
-/// A simple windowed-sinc FIR low-pass filter (sample-by-sample interface).
-///
-/// Used only in unit tests.  The DSP pipeline uses [`BlockFirFilter`] instead.
-pub struct FirFilter {
-    coeffs: Vec<f32>,
-    state: Vec<f32>,
-    pos: usize,
-}
-
-impl FirFilter {
-    pub fn new(cutoff_norm: f32, taps: usize) -> Self {
-        let coeffs = windowed_sinc_coeffs(cutoff_norm, taps);
-        let state_len = taps.saturating_sub(1);
-        Self {
-            coeffs,
-            state: vec![0.0; state_len],
-            pos: 0,
-        }
-    }
-
-    pub fn process(&mut self, sample: f32) -> f32 {
-        let n = self.state.len();
-        if n == 0 {
-            return sample * self.coeffs[0];
-        }
-        self.state[self.pos] = sample;
-        self.pos = (self.pos + 1) % n;
-        let mut acc = self.coeffs[0] * sample;
-        for k in 1..self.coeffs.len() {
-            let idx = (self.pos + n - k) % n;
-            acc += self.coeffs[k] * self.state[idx];
-        }
-        acc
-    }
-}
-
-// ---------------------------------------------------------------------------
-// Block FIR filter — overlap-save via rustfft
-// ---------------------------------------------------------------------------
-
-/// FFT-based overlap-save FIR low-pass filter (block interface).
-///
-/// For a block of M samples and N taps the direct cost is O(N·M); here it
-/// is O(M log M) plus a single coefficient FFT computed once at construction.
-///
-/// The filter is initialised for a nominal block size of [`IQ_BLOCK_SIZE`].
-/// Smaller blocks are handled correctly (they incur a small padding overhead).
-pub struct BlockFirFilter {
-    /// Frequency-domain filter coefficients (pre-computed, length `fft_size`).
-    h_freq: Vec<FftComplex<f32>>,
-    /// Overlap buffer: last `n_taps - 1` input samples (zero-initialised).
-    overlap: Vec<f32>,
-    n_taps: usize,
-    fft_size: usize,
-    fft: Arc<dyn Fft<f32>>,
-    ifft: Arc<dyn Fft<f32>>,
-    scratch_freq: Vec<FftComplex<f32>>,
-}
-
-pub struct BlockFirFilterPair {
-    h_freq: Vec<FftComplex<f32>>,
-    overlap: Vec<FftComplex<f32>>,
-    n_taps: usize,
-    fft_size: usize,
-    fft: Arc<dyn Fft<f32>>,
-    ifft: Arc<dyn Fft<f32>>,
-    scratch_freq: Vec<FftComplex<f32>>,
-}
-
-type FirKernel = (
-    Vec<FftComplex<f32>>,
-    usize,
-    Arc<dyn Fft<f32>>,
-    Arc<dyn Fft<f32>>,
-);
-
-fn build_fir_kernel(
-    cutoff_norm: f32,
-    taps: usize,
-    block_size: usize,
-) -> FirKernel {
-    let coeffs = windowed_sinc_coeffs(cutoff_norm, taps);
-    let fft_size = (block_size + taps - 1).next_power_of_two();
-
-    let mut planner = FftPlanner::<f32>::new();
-    let fft = planner.plan_fft_forward(fft_size);
-    let ifft = planner.plan_fft_inverse(fft_size);
-
-    let mut h_buf: Vec<FftComplex<f32>> =
-        coeffs.iter().map(|&c| FftComplex::new(c, 0.0)).collect();
-    h_buf.resize(fft_size, FftComplex::new(0.0, 0.0));
-    fft.process(&mut h_buf);
-
-    (h_buf, fft_size, fft, ifft)
-}
-
-fn mul_freq_domain_scalar(buf: &mut [FftComplex<f32>], h_freq: &[FftComplex<f32>], scale: f32) {
-    for (x, &h) in buf.iter_mut().zip(h_freq.iter()) {
-        *x = FftComplex::new(
-            (x.re * h.re - x.im * h.im) * scale,
-            (x.re * h.im + x.im * h.re) * scale,
-        );
-    }
-}
-
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-#[target_feature(enable = "avx2")]
-unsafe fn mul_freq_domain_avx2(buf: &mut [FftComplex<f32>], h_freq: &[FftComplex<f32>], scale: f32) {
-    #[cfg(target_arch = "x86")]
-    use std::arch::x86::*;
-    #[cfg(target_arch = "x86_64")]
-    use std::arch::x86_64::*;
-
-    let len = buf.len().min(h_freq.len());
-    let mut i = 0usize;
-    let scale_v = _mm256_set1_ps(scale);
-    while i + 4 <= len {
-        let x_ptr = unsafe { buf.as_mut_ptr().add(i) as *mut f32 };
-        let h_ptr = unsafe { h_freq.as_ptr().add(i) as *const f32 };
-        let x_v = unsafe { _mm256_loadu_ps(x_ptr) };
-        let h_v = unsafe { _mm256_loadu_ps(h_ptr) };
-        let h_re = _mm256_moveldup_ps(h_v);
-        let h_im = _mm256_movehdup_ps(h_v);
-        let x_swapped = _mm256_permute_ps(x_v, 0xB1);
-        let prod = _mm256_addsub_ps(_mm256_mul_ps(x_v, h_re), _mm256_mul_ps(x_swapped, h_im));
-        let out = _mm256_mul_ps(prod, scale_v);
-        unsafe { _mm256_storeu_ps(x_ptr, out) };
-        i += 4;
-    }
-
-    mul_freq_domain_scalar(&mut buf[i..len], &h_freq[i..len], scale);
-}
-
-fn mul_freq_domain(buf: &mut [FftComplex<f32>], h_freq: &[FftComplex<f32>], scale: f32) {
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    {
-        if std::arch::is_x86_feature_detected!("avx2") {
-            unsafe {
-                mul_freq_domain_avx2(buf, h_freq, scale);
-            }
-            return;
-        }
-    }
-
-    mul_freq_domain_scalar(buf, h_freq, scale);
-}
-
-impl BlockFirFilter {
-    /// Create a new `BlockFirFilter`.
-    ///
-    /// `cutoff_norm`: normalised cutoff (0.0–0.5), i.e. `cutoff_hz / sample_rate`.
-    /// `taps`: number of FIR taps.
-    /// `block_size`: expected input block length (used to size the internal FFT).
-    pub fn new(cutoff_norm: f32, taps: usize, block_size: usize) -> Self {
-        let taps = taps.max(1);
-        let (h_buf, fft_size, fft, ifft) = build_fir_kernel(cutoff_norm, taps, block_size);
-
-        Self {
-            h_freq: h_buf,
-            overlap: vec![0.0; taps.saturating_sub(1)],
-            n_taps: taps,
-            fft_size,
-            fft,
-            ifft,
-            scratch_freq: vec![FftComplex::new(0.0, 0.0); fft_size],
-        }
-    }
-
-    /// Filter a block of real samples and return filtered samples of the same length.
-    ///
-    /// Internally performs one forward FFT, a point-wise multiply with the
-    /// pre-computed filter response, and one inverse FFT.
-    pub fn filter_block_into(&mut self, input: &[f32], output: &mut Vec<f32>) {
-        let n_new = input.len();
-        let n_overlap = self.n_taps.saturating_sub(1);
-
-        // Build the time-domain frame: [overlap (N-1)] ++ [new input] ++ [zeros]
-        let buf = &mut self.scratch_freq;
-        buf.clear();
-        buf.reserve(self.fft_size.saturating_sub(buf.capacity()));
-        for &s in &self.overlap {
-            buf.push(FftComplex::new(s, 0.0));
-        }
-        for &s in input {
-            buf.push(FftComplex::new(s, 0.0));
-        }
-        buf.resize(self.fft_size, FftComplex::new(0.0, 0.0));
-
-        // Forward FFT.
-        self.fft.process(buf);
-
-        // Point-wise multiply with H(f); fold in the IFFT normalisation here
-        // to avoid a second pass.
-        let scale = 1.0 / self.fft_size as f32;
-        mul_freq_domain(buf, &self.h_freq, scale);
-
-        // Inverse FFT.
-        self.ifft.process(buf);
-
-        // Extract the valid output: discard the first n_overlap samples.
-        let end = (n_overlap + n_new).min(buf.len());
-        output.clear();
-        output.reserve(n_new.saturating_sub(output.capacity()));
-        output.extend(buf[n_overlap..end].iter().map(|s| s.re));
-
-        // Update overlap with the tail of the current input.
-        if n_overlap > 0 {
-            if n_new >= n_overlap {
-                let new_start = n_new - n_overlap;
-                self.overlap.copy_from_slice(&input[new_start..]);
-            } else {
-                let keep_old = n_overlap - n_new;
-                self.overlap.copy_within(n_new..n_overlap, 0);
-                self.overlap[keep_old..].copy_from_slice(input);
-            }
-        }
-    }
-
-    pub fn filter_block(&mut self, input: &[f32]) -> Vec<f32> {
-        let mut output = Vec::with_capacity(input.len());
-        self.filter_block_into(input, &mut output);
-        output
-    }
-}
-
-impl BlockFirFilterPair {
-    pub fn new(cutoff_norm: f32, taps: usize, block_size: usize) -> Self {
-        let taps = taps.max(1);
-        let (h_buf, fft_size, fft, ifft) = build_fir_kernel(cutoff_norm, taps, block_size);
-        Self {
-            h_freq: h_buf,
-            overlap: vec![FftComplex::new(0.0, 0.0); taps.saturating_sub(1)],
-            n_taps: taps,
-            fft_size,
-            fft,
-            ifft,
-            scratch_freq: vec![FftComplex::new(0.0, 0.0); fft_size],
-        }
-    }
-
-    pub fn filter_block_into(
-        &mut self,
-        input_i: &[f32],
-        input_q: &[f32],
-        output_i: &mut Vec<f32>,
-        output_q: &mut Vec<f32>,
-    ) {
-        let n_new = input_i.len().min(input_q.len());
-        let n_overlap = self.n_taps.saturating_sub(1);
-
-        let buf = &mut self.scratch_freq;
-        buf.clear();
-        buf.reserve(self.fft_size.saturating_sub(buf.capacity()));
-        buf.extend(self.overlap.iter().copied());
-        for idx in 0..n_new {
-            buf.push(FftComplex::new(input_i[idx], input_q[idx]));
-        }
-        buf.resize(self.fft_size, FftComplex::new(0.0, 0.0));
-
-        self.fft.process(buf);
-        let scale = 1.0 / self.fft_size as f32;
-        mul_freq_domain(buf, &self.h_freq, scale);
-        self.ifft.process(buf);
-
-        let end = (n_overlap + n_new).min(buf.len());
-        output_i.clear();
-        output_q.clear();
-        output_i.reserve(n_new.saturating_sub(output_i.capacity()));
-        output_q.reserve(n_new.saturating_sub(output_q.capacity()));
-        for sample in &buf[n_overlap..end] {
-            output_i.push(sample.re);
-            output_q.push(sample.im);
-        }
-
-        if n_overlap > 0 {
-            if n_new >= n_overlap {
-                let new_start = n_new - n_overlap;
-                for (dst, idx) in self.overlap.iter_mut().zip(new_start..n_new) {
-                    *dst = FftComplex::new(input_i[idx], input_q[idx]);
-                }
-            } else {
-                let keep_old = n_overlap - n_new;
-                self.overlap.copy_within(n_new..n_overlap, 0);
-                for (dst, idx) in self.overlap[keep_old..].iter_mut().zip(0..n_new) {
-                    *dst = FftComplex::new(input_i[idx], input_q[idx]);
-                }
-            }
-        }
-    }
-}
-
-// ---------------------------------------------------------------------------
-// Per-mode post-processing helpers (DC block + AGC)
-// ---------------------------------------------------------------------------
-
-/// Build the AGC for a given mode.
-///
-/// AM AGC must be far slower than audio modulation.  With a 50 Hz bass
-/// component the modulation period is 20 ms; an attack faster than that
-/// causes the AGC to follow the audio envelope and distort (pumping).
-/// 500 ms / 5 s only reacts to slow carrier-amplitude fading, not audio.
-///
-/// CW uses a fast attack/release to follow individual dots and dashes.
-/// All other modes use 5 ms / 500 ms, suitable for SSB voice and FM.
-fn agc_for_mode(mode: &RigMode, audio_sample_rate: u32) -> SoftAgc {
-    let sr = audio_sample_rate.max(1) as f32;
-    match mode {
-        RigMode::CW | RigMode::CWR => SoftAgc::new(sr, 1.0, 50.0, 0.5, 30.0),
-        RigMode::AM => SoftAgc::new(sr, 500.0, 5_000.0, 0.5, 30.0),
-        _ => SoftAgc::new(sr, 5.0, 500.0, 0.5, 30.0),
-    }
-}
-
-/// Build a pre-demod complex AGC for FM-family modes.
-///
-/// This stabilizes I/Q magnitude before the discriminator while preserving
-/// phase, which helps the FM/WFM demod path behave better when strong signals
-/// drive large amplitude swings inside the channel filter.
-fn iq_agc_for_mode(mode: &RigMode, sample_rate: u32) -> Option<SoftAgc> {
-    let sr = sample_rate.max(1) as f32;
-    match mode {
-        RigMode::FM | RigMode::PKT => Some(SoftAgc::new(sr, 0.5, 150.0, 0.8, 12.0)),
-        // WFM uses a hard IQ limiter instead of AGC to preserve phase accuracy.
-        RigMode::WFM => None,
-        _ => None,
-    }
-}
-
-/// Build the DC blocker for a given mode, or `None` if not applicable.
-///
-/// WFM is excluded because it has its own internal DC blockers per channel.
-/// All other modes use r = 0.9999 (corner ≈ 0.76 Hz @ 48 kHz), which strips
-/// only true carrier DC without affecting any audible bass content.
-fn dc_for_mode(mode: &RigMode) -> Option<DcBlocker> {
-    match mode {
-        RigMode::WFM => None,
-        _ => Some(DcBlocker::new(0.9999)),
-    }
-}
-
-// ---------------------------------------------------------------------------
-// Channel DSP context
-// ---------------------------------------------------------------------------
-
-/// Per-channel DSP state: mixer, FFT-FIR, decimator, demodulator, frame accumulator.
-pub struct ChannelDsp {
-    /// Frequency offset of this channel from the SDR centre (Hz).
-    pub channel_if_hz: f64,
-    /// Current demodulator (can be swapped via `set_mode`).
-    pub demodulator: Demodulator,
-    /// Current rig mode so the decimation pipeline can be rebuilt.
-    mode: RigMode,
-    /// FFT-based FIR low-pass filter applied to packed I/Q before decimation.
-    lpf_iq: BlockFirFilterPair,
-    /// SDR capture sample rate — kept for filter rebuilds.
-    sdr_sample_rate: u32,
-    /// Output audio sample rate.
-    audio_sample_rate: u32,
-    /// Requested audio bandwidth.
-    audio_bandwidth_hz: u32,
-    /// FIR tap count used when rebuilding filters.
-    fir_taps: usize,
-    /// WFM deemphasis time constant in microseconds.
-    wfm_deemphasis_us: u32,
-    /// Whether WFM stereo decoding is enabled.
-    wfm_stereo: bool,
-    /// Whether WFM stereo denoise is enabled.
-    wfm_denoise: bool,
-    /// Decimation factor: `sdr_sample_rate / audio_sample_rate`.
-    pub decim_factor: usize,
-    /// Number of PCM channels emitted in each frame.
-    output_channels: usize,
-    /// Accumulator for output PCM frames.
-    pub frame_buf: Vec<f32>,
-    /// Read cursor into `frame_buf` for completed frame emission.
-    frame_buf_offset: usize,
-    /// Target frame size in samples.
-    pub frame_size: usize,
-    /// Sender for completed PCM frames.
-    pub pcm_tx: broadcast::Sender<Vec<f32>>,
-    /// Reusable scratch buffer for mixed I samples.
-    scratch_mixed_i: Vec<f32>,
-    /// Reusable scratch buffer for mixed Q samples.
-    scratch_mixed_q: Vec<f32>,
-    /// Reusable scratch buffer for filtered I samples.
-    scratch_filtered_i: Vec<f32>,
-    /// Reusable scratch buffer for filtered Q samples.
-    scratch_filtered_q: Vec<f32>,
-    /// Reusable scratch buffer for decimated IQ samples.
-    scratch_decimated: Vec<Complex<f32>>,
-    /// Current oscillator phase (radians).
-    pub mixer_phase: f64,
-    /// Phase increment per IQ sample.
-    pub mixer_phase_inc: f64,
-    /// Decimation counter (used only for the WFM path).
-    decim_counter: usize,
-    /// Fractional-phase resampler for non-WFM paths.
-    /// Advances by `resample_phase_inc` per IQ sample and emits an output
-    /// sample whenever it crosses 1.0, giving a long-term output rate of
-    /// exactly `audio_sample_rate` regardless of SDR rate rounding.
-    resample_phase: f64,
-    /// `audio_sample_rate / sdr_sample_rate`  (updated in `rebuild_filters`).
-    resample_phase_inc: f64,
-    /// Dedicated WFM decoder that preserves the FM composite baseband.
-    wfm_decoder: Option<WfmStereoDecoder>,
-    /// Complex-domain AGC/limiter applied before FM-family demodulation.
-    iq_agc: Option<SoftAgc>,
-    /// Soft AGC applied to all demodulated audio for consistent cross-mode levels.
-    audio_agc: SoftAgc,
-    /// DC blocker for modes whose demodulator output can carry a DC offset
-    /// (USB/LSB/AM/FM/DIG).  None for CW and WFM.
-    audio_dc: Option<DcBlocker>,
-}
-
-impl ChannelDsp {
-    fn clamp_bandwidth_for_mode(mode: &RigMode, bandwidth_hz: u32) -> u32 {
-        let _ = mode;
-        bandwidth_hz
-    }
-
-    pub fn set_channel_if_hz(&mut self, channel_if_hz: f64) {
-        self.channel_if_hz = channel_if_hz;
-        self.mixer_phase_inc = if self.sdr_sample_rate == 0 {
-            0.0
-        } else {
-            2.0 * std::f64::consts::PI * channel_if_hz / self.sdr_sample_rate as f64
-        };
-    }
-
-    fn pipeline_rates(
-        mode: &RigMode,
-        sdr_sample_rate: u32,
-        audio_sample_rate: u32,
-        audio_bandwidth_hz: u32,
-    ) -> (usize, u32) {
-        if sdr_sample_rate == 0 {
-            return (1, audio_sample_rate.max(1));
-        }
-
-        let target_rate = if *mode == RigMode::WFM {
-            audio_bandwidth_hz.max(audio_sample_rate.saturating_mul(4))
-        } else {
-            audio_sample_rate.max(1)
-        };
-        let decim_factor = (sdr_sample_rate / target_rate.max(1)).max(1) as usize;
-        let channel_sample_rate = (sdr_sample_rate / decim_factor as u32).max(1);
-        (decim_factor, channel_sample_rate)
-    }
-
-    fn rebuild_filters(&mut self, reset_wfm_decoder: bool) {
-        self.audio_bandwidth_hz = Self::clamp_bandwidth_for_mode(&self.mode, self.audio_bandwidth_hz);
-        let (next_decim_factor, channel_sample_rate) = Self::pipeline_rates(
-            &self.mode,
-            self.sdr_sample_rate,
-            self.audio_sample_rate,
-            self.audio_bandwidth_hz,
-        );
-        let iq_filter_bw = self.audio_bandwidth_hz;
-        let cutoff_hz = iq_filter_bw.min(channel_sample_rate.saturating_sub(1)) as f32 / 2.0;
-        let cutoff_norm = if self.sdr_sample_rate == 0 {
-            0.1
-        } else {
-            (cutoff_hz / self.sdr_sample_rate as f32).min(0.499)
-        };
-        self.lpf_iq = BlockFirFilterPair::new(cutoff_norm, self.fir_taps, IQ_BLOCK_SIZE);
-        let rate_changed = self.decim_factor != next_decim_factor;
-        self.decim_factor = next_decim_factor;
-        self.decim_counter = 0;
-        self.resample_phase = 0.0;
-        self.resample_phase_inc = if self.sdr_sample_rate == 0 {
-            1.0
-        } else {
-            self.audio_sample_rate as f64 / self.sdr_sample_rate as f64
-        };
-        if self.mode == RigMode::WFM {
-            if reset_wfm_decoder || rate_changed || self.wfm_decoder.is_none() {
-                self.wfm_decoder = Some(WfmStereoDecoder::new(
-                    channel_sample_rate,
-                    self.audio_sample_rate,
-                    self.output_channels,
-                    self.wfm_stereo,
-                    self.wfm_deemphasis_us,
-                ));
-            }
-        } else {
-            self.wfm_decoder = None;
-        }
-        self.iq_agc = iq_agc_for_mode(&self.mode, channel_sample_rate);
-        self.audio_agc = agc_for_mode(&self.mode, self.audio_sample_rate);
-        self.audio_dc = dc_for_mode(&self.mode);
-        self.frame_buf.clear();
-        self.frame_buf_offset = 0;
-    }
-
-    #[allow(clippy::too_many_arguments)]
-    pub fn new(
-        channel_if_hz: f64,
-        mode: &RigMode,
-        sdr_sample_rate: u32,
-        audio_sample_rate: u32,
-        output_channels: usize,
-        frame_duration_ms: u16,
-        audio_bandwidth_hz: u32,
-        wfm_deemphasis_us: u32,
-        wfm_stereo: bool,
-        fir_taps: usize,
-        pcm_tx: broadcast::Sender<Vec<f32>>,
-    ) -> Self {
-        let output_channels = output_channels.max(1);
-        let audio_bandwidth_hz = Self::clamp_bandwidth_for_mode(mode, audio_bandwidth_hz);
-        let frame_size = if audio_sample_rate == 0 || frame_duration_ms == 0 {
-            960 * output_channels
-        } else {
-            (audio_sample_rate as usize * frame_duration_ms as usize * output_channels) / 1000
-        };
-
-        let taps = fir_taps.max(1);
-        let (decim_factor, channel_sample_rate) =
-            Self::pipeline_rates(mode, sdr_sample_rate, audio_sample_rate, audio_bandwidth_hz);
-        // For WFM, widen the IQ filter enough to pass the RDS subcarrier at
-        // 57 kHz (requires cutoff ≥ 65 kHz → bandwidth ≥ 130 kHz).
-        let iq_filter_bw = audio_bandwidth_hz;
-        let cutoff_hz = iq_filter_bw.min(channel_sample_rate.saturating_sub(1)) as f32 / 2.0;
-        let cutoff_norm = if sdr_sample_rate == 0 {
-            0.1
-        } else {
-            (cutoff_hz / sdr_sample_rate as f32).min(0.499)
-        };
-
-        let mixer_phase_inc = if sdr_sample_rate == 0 {
-            0.0
-        } else {
-            2.0 * std::f64::consts::PI * channel_if_hz / sdr_sample_rate as f64
-        };
-
-        Self {
-            channel_if_hz,
-            demodulator: Demodulator::for_mode(mode),
-            mode: mode.clone(),
-            lpf_iq: BlockFirFilterPair::new(cutoff_norm, taps, IQ_BLOCK_SIZE),
-            sdr_sample_rate,
-            audio_sample_rate,
-            audio_bandwidth_hz,
-            fir_taps: taps,
-            wfm_deemphasis_us,
-            wfm_stereo,
-            wfm_denoise: true,
-            decim_factor,
-            output_channels,
-            frame_buf: Vec::with_capacity(frame_size + output_channels),
-            frame_buf_offset: 0,
-            frame_size,
-            pcm_tx,
-            scratch_mixed_i: Vec::with_capacity(IQ_BLOCK_SIZE),
-            scratch_mixed_q: Vec::with_capacity(IQ_BLOCK_SIZE),
-            scratch_filtered_i: Vec::with_capacity(IQ_BLOCK_SIZE),
-            scratch_filtered_q: Vec::with_capacity(IQ_BLOCK_SIZE),
-            scratch_decimated: Vec::with_capacity(IQ_BLOCK_SIZE / decim_factor.max(1) + 1),
-            mixer_phase: 0.0,
-            mixer_phase_inc,
-            decim_counter: 0,
-            resample_phase: 0.0,
-            resample_phase_inc: if sdr_sample_rate == 0 {
-                1.0
-            } else {
-                audio_sample_rate as f64 / sdr_sample_rate as f64
-            },
-            wfm_decoder: if *mode == RigMode::WFM {
-                Some(WfmStereoDecoder::new(
-                    channel_sample_rate,
-                    audio_sample_rate,
-                    output_channels,
-                    wfm_stereo,
-                    wfm_deemphasis_us,
-                ))
-            } else {
-                None
-            },
-            iq_agc: iq_agc_for_mode(mode, channel_sample_rate),
-            audio_agc: agc_for_mode(mode, audio_sample_rate),
-            audio_dc: dc_for_mode(mode),
-        }
-    }
-
-    pub fn set_mode(&mut self, mode: &RigMode) {
-        self.mode = mode.clone();
-        if *mode != RigMode::WFM {
-            self.audio_bandwidth_hz = default_bandwidth_for_mode(mode);
-        }
-        self.demodulator = Demodulator::for_mode(mode);
-        self.rebuild_filters(true);
-    }
-}
-
-/// Returns the appropriate channel filter bandwidth for a given mode.
-fn default_bandwidth_for_mode(mode: &RigMode) -> u32 {
-    match mode {
-        RigMode::LSB | RigMode::USB | RigMode::PKT | RigMode::DIG => 3_000,
-        RigMode::CW | RigMode::CWR => 500,
-        RigMode::AM => 12_000,
-        RigMode::FM => 12_500,
-        RigMode::WFM => 180_000,
-        RigMode::Other(_) => 3_000,
-    }
-}
-
-impl ChannelDsp {
-
-    /// Rebuild the FIR low-pass filters with new bandwidth and tap count.
-    ///
-    /// Changes take effect on the next call to `process_block`.
-    pub fn set_filter(&mut self, bandwidth_hz: u32, taps: usize) {
-        self.audio_bandwidth_hz = Self::clamp_bandwidth_for_mode(&self.mode, bandwidth_hz);
-        self.fir_taps = taps.max(1);
-        self.rebuild_filters(false);
-    }
-
-    pub fn set_wfm_deemphasis(&mut self, deemphasis_us: u32) {
-        self.wfm_deemphasis_us = deemphasis_us;
-        self.rebuild_filters(true);
-    }
-
-    pub fn set_wfm_stereo(&mut self, enabled: bool) {
-        self.wfm_stereo = enabled;
-        if let Some(decoder) = &mut self.wfm_decoder {
-            decoder.set_stereo_enabled(enabled);
-        }
-    }
-
-    pub fn set_wfm_denoise(&mut self, enabled: bool) {
-        self.wfm_denoise = enabled;
-        if let Some(decoder) = &mut self.wfm_decoder {
-            decoder.set_denoise_enabled(enabled);
-        }
-    }
-
-    pub fn rds_data(&self) -> Option<RdsData> {
-        self.wfm_decoder.as_ref().and_then(WfmStereoDecoder::rds_data)
-    }
-
-    pub fn wfm_stereo_detected(&self) -> bool {
-        self.wfm_decoder
-            .as_ref()
-            .map(WfmStereoDecoder::stereo_detected)
-            .unwrap_or(false)
-    }
-
-    pub fn reset_rds(&mut self) {
-        if let Some(decoder) = &mut self.wfm_decoder {
-            decoder.reset_rds();
-        }
-    }
-
-    pub fn reset_wfm_state(&mut self) {
-        if let Some(decoder) = &mut self.wfm_decoder {
-            decoder.reset_state();
-        }
-    }
-
-    /// Process a block of raw IQ samples through the full DSP chain.
-    ///
-    /// 1. **Batch mixer**: compute the full LO signal for the block at once,
-    ///    then multiply element-wise.  The loop body has no cross-iteration
-    ///    data dependency so the compiler can auto-vectorise it.
-    /// 2. **FFT FIR**: apply the overlap-save FIR to I and Q in one FFT pair
-    ///    each instead of N multiplies per sample.
-    /// 3. Decimate.
-    /// 4. Demodulate.
-    /// 5. Emit complete PCM frames.
-    pub fn process_block(&mut self, block: &[Complex<f32>]) {
-        let n = block.len();
-        if n == 0 {
-            return;
-        }
-
-        // --- 1. Batch mixer -------------------------------------------------
-        // Pre-compute I and Q mixer outputs for the whole block.
-        // Use an oscillator recurrence so we only call `sin_cos` once for the
-        // block instead of once per sample.
-        self.scratch_mixed_i.resize(n, 0.0);
-        self.scratch_mixed_q.resize(n, 0.0);
-        let mixed_i = &mut self.scratch_mixed_i;
-        let mixed_q = &mut self.scratch_mixed_q;
-
-        let phase_start = self.mixer_phase;
-        let phase_inc = self.mixer_phase_inc;
-        let (mut sin_phase, mut cos_phase) = phase_start.sin_cos();
-        let (sin_inc, cos_inc) = phase_inc.sin_cos();
-        for (idx, &sample) in block.iter().enumerate() {
-            let lo_re = cos_phase as f32;
-            let lo_im = -(sin_phase as f32);
-            // mixed = sample * exp(-j*phase) = sample * (cos - j*sin)
-            mixed_i[idx] = sample.re * lo_re - sample.im * lo_im;
-            mixed_q[idx] = sample.re * lo_im + sample.im * lo_re;
-            let next_sin = sin_phase * cos_inc + cos_phase * sin_inc;
-            let next_cos = cos_phase * cos_inc - sin_phase * sin_inc;
-            sin_phase = next_sin;
-            cos_phase = next_cos;
-        }
-        // Advance phase with wrap to avoid precision loss.
-        self.mixer_phase = (phase_start + n as f64 * phase_inc).rem_euclid(std::f64::consts::TAU);
-
-        // --- 2. FFT FIR (overlap-save) --------------------------------------
-        self.lpf_iq.filter_block_into(
-            mixed_i,
-            mixed_q,
-            &mut self.scratch_filtered_i,
-            &mut self.scratch_filtered_q,
-        );
-        let filtered_i = &self.scratch_filtered_i;
-        let filtered_q = &self.scratch_filtered_q;
-
-        // --- 3. Decimate / resample -----------------------------------------
-        let capacity = n / self.decim_factor + 1;
-        self.scratch_decimated.clear();
-        if self.scratch_decimated.capacity() < capacity {
-            self.scratch_decimated
-                .reserve(capacity - self.scratch_decimated.capacity());
-        }
-        let decimated = &mut self.scratch_decimated;
-        if self.wfm_decoder.is_some() {
-            // WFM: integer decimation preserves the FM composite signal at the
-            // rate expected by WfmStereoDecoder.  Final rate correction is done
-            // inside WfmStereoDecoder via its own fractional-phase accumulator.
-            for i in 0..n {
-                self.decim_counter += 1;
-                if self.decim_counter >= self.decim_factor {
-                    self.decim_counter = 0;
-                    let fi = filtered_i.get(i).copied().unwrap_or(0.0);
-                    let fq = filtered_q.get(i).copied().unwrap_or(0.0);
-                    decimated.push(Complex::new(fi, fq));
-                }
-            }
-        } else {
-            // Non-WFM: fractional-phase resampler so the long-term output rate
-            // equals exactly `audio_sample_rate` regardless of SDR rate rounding.
-            for i in 0..n {
-                self.resample_phase += self.resample_phase_inc;
-                if self.resample_phase >= 1.0 {
-                    self.resample_phase -= 1.0;
-                    let fi = filtered_i.get(i).copied().unwrap_or(0.0);
-                    let fq = filtered_q.get(i).copied().unwrap_or(0.0);
-                    decimated.push(Complex::new(fi, fq));
-                }
-            }
-        }
-
-        if decimated.is_empty() {
-            return;
-        }
-
-        if let Some(iq_agc) = &mut self.iq_agc {
-            for sample in decimated.iter_mut() {
-                *sample = iq_agc.process_complex(*sample);
-            }
-        }
-
-        // Hard-limit IQ magnitude before the WFM discriminator so overdeviated
-        // signals don't produce clipped composite baseband.
-        if self.wfm_decoder.is_some() {
-            for sample in decimated.iter_mut() {
-                let mag = (sample.re * sample.re + sample.im * sample.im).sqrt();
-                if mag > 1.0 {
-                    *sample /= mag;
-                }
-            }
-        }
-
-        // --- 4. Demodulate + post-process -----------------------------------
-        // WFM: full composite decoder (handles its own DC blocks + deemphasis).
-        // No AGC — WFM uses IQ hard limiting + fixed output gain to preserve
-        // stereo separation and avoid AGC pumping on broadcast audio.
-        const WFM_OUTPUT_GAIN: f32 = 0.32;
-        let audio = if let Some(decoder) = self.wfm_decoder.as_mut() {
-            let mut out = decoder.process_iq(decimated);
-            for sample in &mut out {
-                *sample = (*sample * WFM_OUTPUT_GAIN).clamp(-1.0, 1.0);
-            }
-            out
-        } else {
-            let mut raw = self.demodulator.demodulate(decimated);
-            for s in &mut raw {
-                if let Some(dc) = &mut self.audio_dc {
-                    *s = dc.process(*s);
-                }
-                *s = self.audio_agc.process(*s);
-            }
-            if self.output_channels >= 2 {
-                let mut stereo = Vec::with_capacity(raw.len() * self.output_channels);
-                for sample in raw {
-                    stereo.push(sample);
-                    stereo.push(sample);
-                }
-                stereo
-            } else {
-                raw
-            }
-        };
-
-        // --- 5. Emit complete PCM frames ------------------------------------
-        self.frame_buf.extend_from_slice(&audio);
-        while self.frame_buf.len().saturating_sub(self.frame_buf_offset) >= self.frame_size {
-            let start = self.frame_buf_offset;
-            let end = start + self.frame_size;
-            let frame = self.frame_buf[start..end].to_vec();
-            self.frame_buf_offset = end;
-            let _ = self.pcm_tx.send(frame);
-        }
-        if self.frame_buf_offset > 0 && self.frame_buf_offset * 2 >= self.frame_buf.len() {
-            self.frame_buf.copy_within(self.frame_buf_offset.., 0);
-            self.frame_buf.truncate(self.frame_buf.len() - self.frame_buf_offset);
-            self.frame_buf_offset = 0;
-        }
-    }
-}
-
 // ---------------------------------------------------------------------------
 // Top-level pipeline struct
 // ---------------------------------------------------------------------------
@@ -1234,25 +386,6 @@ mod tests {
         assert_eq!(out.len(), 128, "output length should equal input length");
     }
 
-    #[test]
-    fn channel_dsp_processes_silence() {
-        let (pcm_tx, _pcm_rx) = broadcast::channel::<Vec<f32>>(8);
-        let mut dsp =
-            ChannelDsp::new(0.0, &RigMode::USB, 48_000, 8_000, 1, 20, 3000, 75, true, 31, pcm_tx);
-        let block = vec![Complex::new(0.0_f32, 0.0_f32); 4096];
-        dsp.process_block(&block);
-    }
-
-    #[test]
-    fn channel_dsp_set_mode() {
-        let (pcm_tx, _) = broadcast::channel::<Vec<f32>>(8);
-        let mut dsp =
-            ChannelDsp::new(0.0, &RigMode::USB, 48_000, 8_000, 1, 20, 3000, 75, true, 31, pcm_tx);
-        assert_eq!(dsp.demodulator, Demodulator::Usb);
-        dsp.set_mode(&RigMode::FM);
-        assert_eq!(dsp.demodulator, Demodulator::Fm);
-    }
-
     #[test]
     fn pipeline_starts_with_mock_source() {
         let pipeline = SdrPipeline::start(
@@ -1271,7 +404,16 @@ mod tests {
 
     #[test]
     fn pipeline_empty_channels() {
-        let pipeline = SdrPipeline::start(Box::new(MockIqSource), 1_920_000, 48_000, 1, 20, 75, true, &[]);
+        let pipeline = SdrPipeline::start(
+            Box::new(MockIqSource),
+            1_920_000,
+            48_000,
+            1,
+            20,
+            75,
+            true,
+            &[],
+        );
         assert_eq!(pipeline.pcm_senders.len(), 0);
         assert_eq!(pipeline.channel_dsps.len(), 0);
     }
diff --git a/src/trx-server/trx-backend/trx-backend-soapysdr/src/dsp/channel.rs b/src/trx-server/trx-backend/trx-backend-soapysdr/src/dsp/channel.rs
new file mode 100644
index 0000000..9846a8b
--- /dev/null
+++ b/src/trx-server/trx-backend/trx-backend-soapysdr/src/dsp/channel.rs
@@ -0,0 +1,485 @@
+// SPDX-FileCopyrightText: 2025 Stanislaw Grams <stanislawgrams@gmail.com>
+//
+// SPDX-License-Identifier: BSD-2-Clause
+
+use num_complex::Complex;
+use tokio::sync::broadcast;
+use trx_core::rig::state::{RdsData, RigMode};
+
+use crate::demod::{DcBlocker, Demodulator, SoftAgc, WfmStereoDecoder};
+
+use super::{BlockFirFilterPair, IQ_BLOCK_SIZE};
+
+fn agc_for_mode(mode: &RigMode, audio_sample_rate: u32) -> SoftAgc {
+    let sr = audio_sample_rate.max(1) as f32;
+    match mode {
+        RigMode::CW | RigMode::CWR => SoftAgc::new(sr, 1.0, 50.0, 0.5, 30.0),
+        RigMode::AM => SoftAgc::new(sr, 500.0, 5_000.0, 0.5, 30.0),
+        _ => SoftAgc::new(sr, 5.0, 500.0, 0.5, 30.0),
+    }
+}
+
+fn iq_agc_for_mode(mode: &RigMode, sample_rate: u32) -> Option<SoftAgc> {
+    let sr = sample_rate.max(1) as f32;
+    match mode {
+        RigMode::FM | RigMode::PKT => Some(SoftAgc::new(sr, 0.5, 150.0, 0.8, 12.0)),
+        RigMode::WFM => None,
+        _ => None,
+    }
+}
+
+fn dc_for_mode(mode: &RigMode) -> Option<DcBlocker> {
+    match mode {
+        RigMode::WFM => None,
+        _ => Some(DcBlocker::new(0.9999)),
+    }
+}
+
+fn default_bandwidth_for_mode(mode: &RigMode) -> u32 {
+    match mode {
+        RigMode::LSB | RigMode::USB | RigMode::PKT | RigMode::DIG => 3_000,
+        RigMode::CW | RigMode::CWR => 500,
+        RigMode::AM => 12_000,
+        RigMode::FM => 12_500,
+        RigMode::WFM => 180_000,
+        RigMode::Other(_) => 3_000,
+    }
+}
+
+/// Per-channel DSP state: mixer, FFT-FIR, decimator, demodulator, frame accumulator.
+pub struct ChannelDsp {
+    pub channel_if_hz: f64,
+    pub demodulator: Demodulator,
+    mode: RigMode,
+    lpf_iq: BlockFirFilterPair,
+    sdr_sample_rate: u32,
+    audio_sample_rate: u32,
+    audio_bandwidth_hz: u32,
+    fir_taps: usize,
+    wfm_deemphasis_us: u32,
+    wfm_stereo: bool,
+    wfm_denoise: bool,
+    pub decim_factor: usize,
+    output_channels: usize,
+    pub frame_buf: Vec<f32>,
+    frame_buf_offset: usize,
+    pub frame_size: usize,
+    pub pcm_tx: broadcast::Sender<Vec<f32>>,
+    scratch_mixed_i: Vec<f32>,
+    scratch_mixed_q: Vec<f32>,
+    scratch_filtered_i: Vec<f32>,
+    scratch_filtered_q: Vec<f32>,
+    scratch_decimated: Vec<Complex<f32>>,
+    pub mixer_phase: f64,
+    pub mixer_phase_inc: f64,
+    decim_counter: usize,
+    resample_phase: f64,
+    resample_phase_inc: f64,
+    wfm_decoder: Option<WfmStereoDecoder>,
+    iq_agc: Option<SoftAgc>,
+    audio_agc: SoftAgc,
+    audio_dc: Option<DcBlocker>,
+}
+
+impl ChannelDsp {
+    fn clamp_bandwidth_for_mode(mode: &RigMode, bandwidth_hz: u32) -> u32 {
+        let _ = mode;
+        bandwidth_hz
+    }
+
+    pub fn set_channel_if_hz(&mut self, channel_if_hz: f64) {
+        self.channel_if_hz = channel_if_hz;
+        self.mixer_phase_inc = if self.sdr_sample_rate == 0 {
+            0.0
+        } else {
+            2.0 * std::f64::consts::PI * channel_if_hz / self.sdr_sample_rate as f64
+        };
+    }
+
+    fn pipeline_rates(
+        mode: &RigMode,
+        sdr_sample_rate: u32,
+        audio_sample_rate: u32,
+        audio_bandwidth_hz: u32,
+    ) -> (usize, u32) {
+        if sdr_sample_rate == 0 {
+            return (1, audio_sample_rate.max(1));
+        }
+
+        let target_rate = if *mode == RigMode::WFM {
+            audio_bandwidth_hz.max(audio_sample_rate.saturating_mul(4))
+        } else {
+            audio_sample_rate.max(1)
+        };
+        let decim_factor = (sdr_sample_rate / target_rate.max(1)).max(1) as usize;
+        let channel_sample_rate = (sdr_sample_rate / decim_factor as u32).max(1);
+        (decim_factor, channel_sample_rate)
+    }
+
+    fn rebuild_filters(&mut self, reset_wfm_decoder: bool) {
+        self.audio_bandwidth_hz =
+            Self::clamp_bandwidth_for_mode(&self.mode, self.audio_bandwidth_hz);
+        let (next_decim_factor, channel_sample_rate) = Self::pipeline_rates(
+            &self.mode,
+            self.sdr_sample_rate,
+            self.audio_sample_rate,
+            self.audio_bandwidth_hz,
+        );
+        let cutoff_hz = self
+            .audio_bandwidth_hz
+            .min(channel_sample_rate.saturating_sub(1)) as f32
+            / 2.0;
+        let cutoff_norm = if self.sdr_sample_rate == 0 {
+            0.1
+        } else {
+            (cutoff_hz / self.sdr_sample_rate as f32).min(0.499)
+        };
+        self.lpf_iq = BlockFirFilterPair::new(cutoff_norm, self.fir_taps, IQ_BLOCK_SIZE);
+        let rate_changed = self.decim_factor != next_decim_factor;
+        self.decim_factor = next_decim_factor;
+        self.decim_counter = 0;
+        self.resample_phase = 0.0;
+        self.resample_phase_inc = if self.sdr_sample_rate == 0 {
+            1.0
+        } else {
+            self.audio_sample_rate as f64 / self.sdr_sample_rate as f64
+        };
+        if self.mode == RigMode::WFM {
+            if reset_wfm_decoder || rate_changed || self.wfm_decoder.is_none() {
+                self.wfm_decoder = Some(WfmStereoDecoder::new(
+                    channel_sample_rate,
+                    self.audio_sample_rate,
+                    self.output_channels,
+                    self.wfm_stereo,
+                    self.wfm_deemphasis_us,
+                ));
+            }
+        } else {
+            self.wfm_decoder = None;
+        }
+        self.iq_agc = iq_agc_for_mode(&self.mode, channel_sample_rate);
+        self.audio_agc = agc_for_mode(&self.mode, self.audio_sample_rate);
+        self.audio_dc = dc_for_mode(&self.mode);
+        self.frame_buf.clear();
+        self.frame_buf_offset = 0;
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    pub fn new(
+        channel_if_hz: f64,
+        mode: &RigMode,
+        sdr_sample_rate: u32,
+        audio_sample_rate: u32,
+        output_channels: usize,
+        frame_duration_ms: u16,
+        audio_bandwidth_hz: u32,
+        wfm_deemphasis_us: u32,
+        wfm_stereo: bool,
+        fir_taps: usize,
+        pcm_tx: broadcast::Sender<Vec<f32>>,
+    ) -> Self {
+        let output_channels = output_channels.max(1);
+        let audio_bandwidth_hz = Self::clamp_bandwidth_for_mode(mode, audio_bandwidth_hz);
+        let frame_size = if audio_sample_rate == 0 || frame_duration_ms == 0 {
+            960 * output_channels
+        } else {
+            (audio_sample_rate as usize * frame_duration_ms as usize * output_channels) / 1000
+        };
+
+        let taps = fir_taps.max(1);
+        let (decim_factor, channel_sample_rate) =
+            Self::pipeline_rates(mode, sdr_sample_rate, audio_sample_rate, audio_bandwidth_hz);
+        let cutoff_hz = audio_bandwidth_hz.min(channel_sample_rate.saturating_sub(1)) as f32 / 2.0;
+        let cutoff_norm = if sdr_sample_rate == 0 {
+            0.1
+        } else {
+            (cutoff_hz / sdr_sample_rate as f32).min(0.499)
+        };
+
+        let mixer_phase_inc = if sdr_sample_rate == 0 {
+            0.0
+        } else {
+            2.0 * std::f64::consts::PI * channel_if_hz / sdr_sample_rate as f64
+        };
+
+        Self {
+            channel_if_hz,
+            demodulator: Demodulator::for_mode(mode),
+            mode: mode.clone(),
+            lpf_iq: BlockFirFilterPair::new(cutoff_norm, taps, IQ_BLOCK_SIZE),
+            sdr_sample_rate,
+            audio_sample_rate,
+            audio_bandwidth_hz,
+            fir_taps: taps,
+            wfm_deemphasis_us,
+            wfm_stereo,
+            wfm_denoise: true,
+            decim_factor,
+            output_channels,
+            frame_buf: Vec::with_capacity(frame_size + output_channels),
+            frame_buf_offset: 0,
+            frame_size,
+            pcm_tx,
+            scratch_mixed_i: Vec::with_capacity(IQ_BLOCK_SIZE),
+            scratch_mixed_q: Vec::with_capacity(IQ_BLOCK_SIZE),
+            scratch_filtered_i: Vec::with_capacity(IQ_BLOCK_SIZE),
+            scratch_filtered_q: Vec::with_capacity(IQ_BLOCK_SIZE),
+            scratch_decimated: Vec::with_capacity(IQ_BLOCK_SIZE / decim_factor.max(1) + 1),
+            mixer_phase: 0.0,
+            mixer_phase_inc,
+            decim_counter: 0,
+            resample_phase: 0.0,
+            resample_phase_inc: if sdr_sample_rate == 0 {
+                1.0
+            } else {
+                audio_sample_rate as f64 / sdr_sample_rate as f64
+            },
+            wfm_decoder: if *mode == RigMode::WFM {
+                Some(WfmStereoDecoder::new(
+                    channel_sample_rate,
+                    audio_sample_rate,
+                    output_channels,
+                    wfm_stereo,
+                    wfm_deemphasis_us,
+                ))
+            } else {
+                None
+            },
+            iq_agc: iq_agc_for_mode(mode, channel_sample_rate),
+            audio_agc: agc_for_mode(mode, audio_sample_rate),
+            audio_dc: dc_for_mode(mode),
+        }
+    }
+
+    pub fn set_mode(&mut self, mode: &RigMode) {
+        self.mode = mode.clone();
+        if *mode != RigMode::WFM {
+            self.audio_bandwidth_hz = default_bandwidth_for_mode(mode);
+        }
+        self.demodulator = Demodulator::for_mode(mode);
+        self.rebuild_filters(true);
+    }
+
+    pub fn set_filter(&mut self, bandwidth_hz: u32, taps: usize) {
+        self.audio_bandwidth_hz = Self::clamp_bandwidth_for_mode(&self.mode, bandwidth_hz);
+        self.fir_taps = taps.max(1);
+        self.rebuild_filters(false);
+    }
+
+    pub fn set_wfm_deemphasis(&mut self, deemphasis_us: u32) {
+        self.wfm_deemphasis_us = deemphasis_us;
+        self.rebuild_filters(true);
+    }
+
+    pub fn set_wfm_stereo(&mut self, enabled: bool) {
+        self.wfm_stereo = enabled;
+        if let Some(decoder) = &mut self.wfm_decoder {
+            decoder.set_stereo_enabled(enabled);
+        }
+    }
+
+    pub fn set_wfm_denoise(&mut self, enabled: bool) {
+        self.wfm_denoise = enabled;
+        if let Some(decoder) = &mut self.wfm_decoder {
+            decoder.set_denoise_enabled(enabled);
+        }
+    }
+
+    pub fn rds_data(&self) -> Option<RdsData> {
+        self.wfm_decoder
+            .as_ref()
+            .and_then(WfmStereoDecoder::rds_data)
+    }
+
+    pub fn wfm_stereo_detected(&self) -> bool {
+        self.wfm_decoder
+            .as_ref()
+            .map(WfmStereoDecoder::stereo_detected)
+            .unwrap_or(false)
+    }
+
+    pub fn reset_rds(&mut self) {
+        if let Some(decoder) = &mut self.wfm_decoder {
+            decoder.reset_rds();
+        }
+    }
+
+    pub fn reset_wfm_state(&mut self) {
+        if let Some(decoder) = &mut self.wfm_decoder {
+            decoder.reset_state();
+        }
+    }
+
+    pub fn process_block(&mut self, block: &[Complex<f32>]) {
+        let n = block.len();
+        if n == 0 {
+            return;
+        }
+
+        self.scratch_mixed_i.resize(n, 0.0);
+        self.scratch_mixed_q.resize(n, 0.0);
+        let mixed_i = &mut self.scratch_mixed_i;
+        let mixed_q = &mut self.scratch_mixed_q;
+
+        let phase_start = self.mixer_phase;
+        let phase_inc = self.mixer_phase_inc;
+        let (mut sin_phase, mut cos_phase) = phase_start.sin_cos();
+        let (sin_inc, cos_inc) = phase_inc.sin_cos();
+        for (idx, &sample) in block.iter().enumerate() {
+            let lo_re = cos_phase as f32;
+            let lo_im = -(sin_phase as f32);
+            mixed_i[idx] = sample.re * lo_re - sample.im * lo_im;
+            mixed_q[idx] = sample.re * lo_im + sample.im * lo_re;
+            let next_sin = sin_phase * cos_inc + cos_phase * sin_inc;
+            let next_cos = cos_phase * cos_inc - sin_phase * sin_inc;
+            sin_phase = next_sin;
+            cos_phase = next_cos;
+        }
+        self.mixer_phase = (phase_start + n as f64 * phase_inc).rem_euclid(std::f64::consts::TAU);
+
+        self.lpf_iq.filter_block_into(
+            mixed_i,
+            mixed_q,
+            &mut self.scratch_filtered_i,
+            &mut self.scratch_filtered_q,
+        );
+        let filtered_i = &self.scratch_filtered_i;
+        let filtered_q = &self.scratch_filtered_q;
+
+        let capacity = n / self.decim_factor + 1;
+        self.scratch_decimated.clear();
+        if self.scratch_decimated.capacity() < capacity {
+            self.scratch_decimated
+                .reserve(capacity - self.scratch_decimated.capacity());
+        }
+        let decimated = &mut self.scratch_decimated;
+        if self.wfm_decoder.is_some() {
+            for idx in 0..n {
+                self.decim_counter += 1;
+                if self.decim_counter >= self.decim_factor {
+                    self.decim_counter = 0;
+                    let fi = filtered_i.get(idx).copied().unwrap_or(0.0);
+                    let fq = filtered_q.get(idx).copied().unwrap_or(0.0);
+                    decimated.push(Complex::new(fi, fq));
+                }
+            }
+        } else {
+            for idx in 0..n {
+                self.resample_phase += self.resample_phase_inc;
+                if self.resample_phase >= 1.0 {
+                    self.resample_phase -= 1.0;
+                    let fi = filtered_i.get(idx).copied().unwrap_or(0.0);
+                    let fq = filtered_q.get(idx).copied().unwrap_or(0.0);
+                    decimated.push(Complex::new(fi, fq));
+                }
+            }
+        }
+
+        if decimated.is_empty() {
+            return;
+        }
+
+        if let Some(iq_agc) = &mut self.iq_agc {
+            for sample in decimated.iter_mut() {
+                *sample = iq_agc.process_complex(*sample);
+            }
+        }
+
+        if self.wfm_decoder.is_some() {
+            for sample in decimated.iter_mut() {
+                let mag = (sample.re * sample.re + sample.im * sample.im).sqrt();
+                if mag > 1.0 {
+                    *sample /= mag;
+                }
+            }
+        }
+
+        const WFM_OUTPUT_GAIN: f32 = 0.32;
+        let audio = if let Some(decoder) = self.wfm_decoder.as_mut() {
+            let mut out = decoder.process_iq(decimated);
+            for sample in &mut out {
+                *sample = (*sample * WFM_OUTPUT_GAIN).clamp(-1.0, 1.0);
+            }
+            out
+        } else {
+            let mut raw = self.demodulator.demodulate(decimated);
+            for sample in &mut raw {
+                if let Some(dc) = &mut self.audio_dc {
+                    *sample = dc.process(*sample);
+                }
+                *sample = self.audio_agc.process(*sample);
+            }
+            if self.output_channels >= 2 {
+                let mut stereo = Vec::with_capacity(raw.len() * self.output_channels);
+                for sample in raw {
+                    stereo.push(sample);
+                    stereo.push(sample);
+                }
+                stereo
+            } else {
+                raw
+            }
+        };
+
+        self.frame_buf.extend_from_slice(&audio);
+        while self.frame_buf.len().saturating_sub(self.frame_buf_offset) >= self.frame_size {
+            let start = self.frame_buf_offset;
+            let end = start + self.frame_size;
+            let frame = self.frame_buf[start..end].to_vec();
+            self.frame_buf_offset = end;
+            let _ = self.pcm_tx.send(frame);
+        }
+        if self.frame_buf_offset > 0 && self.frame_buf_offset * 2 >= self.frame_buf.len() {
+            self.frame_buf.copy_within(self.frame_buf_offset.., 0);
+            self.frame_buf
+                .truncate(self.frame_buf.len() - self.frame_buf_offset);
+            self.frame_buf_offset = 0;
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn channel_dsp_processes_silence() {
+        let (pcm_tx, _pcm_rx) = broadcast::channel::<Vec<f32>>(8);
+        let mut dsp = ChannelDsp::new(
+            0.0,
+            &RigMode::USB,
+            48_000,
+            8_000,
+            1,
+            20,
+            3000,
+            75,
+            true,
+            31,
+            pcm_tx,
+        );
+        let block = vec![Complex::new(0.0_f32, 0.0_f32); 4096];
+        dsp.process_block(&block);
+    }
+
+    #[test]
+    fn channel_dsp_set_mode() {
+        let (pcm_tx, _) = broadcast::channel::<Vec<f32>>(8);
+        let mut dsp = ChannelDsp::new(
+            0.0,
+            &RigMode::USB,
+            48_000,
+            8_000,
+            1,
+            20,
+            3000,
+            75,
+            true,
+            31,
+            pcm_tx,
+        );
+        assert_eq!(dsp.demodulator, Demodulator::Usb);
+        dsp.set_mode(&RigMode::FM);
+        assert_eq!(dsp.demodulator, Demodulator::Fm);
+    }
+}
diff --git a/src/trx-server/trx-backend/trx-backend-soapysdr/src/dsp/filter.rs b/src/trx-server/trx-backend/trx-backend-soapysdr/src/dsp/filter.rs
new file mode 100644
index 0000000..02292a8
--- /dev/null
+++ b/src/trx-server/trx-backend/trx-backend-soapysdr/src/dsp/filter.rs
@@ -0,0 +1,300 @@
+// SPDX-FileCopyrightText: 2025 Stanislaw Grams <stanislawgrams@gmail.com>
+//
+// SPDX-License-Identifier: BSD-2-Clause
+
+use std::f32::consts::PI;
+use std::sync::Arc;
+
+use rustfft::num_complex::Complex as FftComplex;
+use rustfft::{Fft, FftPlanner};
+
+fn windowed_sinc_coeffs(cutoff_norm: f32, taps: usize) -> Vec<f32> {
+    assert!(taps >= 1, "FIR filter must have at least 1 tap");
+    let m = (taps - 1) as f32;
+    let mut coeffs = Vec::with_capacity(taps);
+    for i in 0..taps {
+        let x = i as f32 - m / 2.0;
+        let sinc = if x == 0.0 {
+            2.0 * cutoff_norm
+        } else {
+            (2.0 * PI * cutoff_norm * x).sin() / (PI * x)
+        };
+        let window = if taps == 1 {
+            1.0
+        } else {
+            0.5 * (1.0 - (2.0 * PI * i as f32 / m).cos())
+        };
+        coeffs.push(sinc * window);
+    }
+    let sum: f32 = coeffs.iter().sum();
+    if sum.abs() > 1e-12 {
+        let inv = 1.0 / sum;
+        for coeff in &mut coeffs {
+            *coeff *= inv;
+        }
+    }
+    coeffs
+}
+
+/// A simple windowed-sinc FIR low-pass filter (sample-by-sample interface).
+///
+/// Used only in unit tests. The DSP pipeline uses [`BlockFirFilter`] instead.
+pub struct FirFilter {
+    coeffs: Vec<f32>,
+    state: Vec<f32>,
+    pos: usize,
+}
+
+impl FirFilter {
+    pub fn new(cutoff_norm: f32, taps: usize) -> Self {
+        let coeffs = windowed_sinc_coeffs(cutoff_norm, taps);
+        let state_len = taps.saturating_sub(1);
+        Self {
+            coeffs,
+            state: vec![0.0; state_len],
+            pos: 0,
+        }
+    }
+
+    pub fn process(&mut self, sample: f32) -> f32 {
+        let n = self.state.len();
+        if n == 0 {
+            return sample * self.coeffs[0];
+        }
+        self.state[self.pos] = sample;
+        self.pos = (self.pos + 1) % n;
+        let mut acc = self.coeffs[0] * sample;
+        for k in 1..self.coeffs.len() {
+            let idx = (self.pos + n - k) % n;
+            acc += self.coeffs[k] * self.state[idx];
+        }
+        acc
+    }
+}
+
+/// FFT-based overlap-save FIR low-pass filter (block interface).
+pub struct BlockFirFilter {
+    h_freq: Vec<FftComplex<f32>>,
+    overlap: Vec<f32>,
+    n_taps: usize,
+    fft_size: usize,
+    fft: Arc<dyn Fft<f32>>,
+    ifft: Arc<dyn Fft<f32>>,
+    scratch_freq: Vec<FftComplex<f32>>,
+}
+
+pub struct BlockFirFilterPair {
+    h_freq: Vec<FftComplex<f32>>,
+    overlap: Vec<FftComplex<f32>>,
+    n_taps: usize,
+    fft_size: usize,
+    fft: Arc<dyn Fft<f32>>,
+    ifft: Arc<dyn Fft<f32>>,
+    scratch_freq: Vec<FftComplex<f32>>,
+}
+
+type FirKernel = (
+    Vec<FftComplex<f32>>,
+    usize,
+    Arc<dyn Fft<f32>>,
+    Arc<dyn Fft<f32>>,
+);
+
+fn build_fir_kernel(cutoff_norm: f32, taps: usize, block_size: usize) -> FirKernel {
+    let coeffs = windowed_sinc_coeffs(cutoff_norm, taps);
+    let fft_size = (block_size + taps - 1).next_power_of_two();
+
+    let mut planner = FftPlanner::<f32>::new();
+    let fft = planner.plan_fft_forward(fft_size);
+    let ifft = planner.plan_fft_inverse(fft_size);
+
+    let mut h_buf: Vec<FftComplex<f32>> = coeffs
+        .iter()
+        .map(|&coeff| FftComplex::new(coeff, 0.0))
+        .collect();
+    fft.process({
+        h_buf.resize(fft_size, FftComplex::new(0.0, 0.0));
+        &mut h_buf
+    });
+
+    (h_buf, fft_size, fft, ifft)
+}
+
+fn mul_freq_domain_scalar(buf: &mut [FftComplex<f32>], h_freq: &[FftComplex<f32>], scale: f32) {
+    for (x, &h) in buf.iter_mut().zip(h_freq.iter()) {
+        *x = FftComplex::new(
+            (x.re * h.re - x.im * h.im) * scale,
+            (x.re * h.im + x.im * h.re) * scale,
+        );
+    }
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[target_feature(enable = "avx2")]
+unsafe fn mul_freq_domain_avx2(
+    buf: &mut [FftComplex<f32>],
+    h_freq: &[FftComplex<f32>],
+    scale: f32,
+) {
+    #[cfg(target_arch = "x86")]
+    use std::arch::x86::*;
+    #[cfg(target_arch = "x86_64")]
+    use std::arch::x86_64::*;
+
+    let len = buf.len().min(h_freq.len());
+    let mut i = 0usize;
+    let scale_v = _mm256_set1_ps(scale);
+    while i + 4 <= len {
+        let x_ptr = buf.as_mut_ptr().add(i) as *mut f32;
+        let h_ptr = h_freq.as_ptr().add(i) as *const f32;
+        let x_v = _mm256_loadu_ps(x_ptr);
+        let h_v = _mm256_loadu_ps(h_ptr);
+        let h_re = _mm256_moveldup_ps(h_v);
+        let h_im = _mm256_movehdup_ps(h_v);
+        let x_swapped = _mm256_permute_ps(x_v, 0xB1);
+        let prod = _mm256_addsub_ps(_mm256_mul_ps(x_v, h_re), _mm256_mul_ps(x_swapped, h_im));
+        let out = _mm256_mul_ps(prod, scale_v);
+        _mm256_storeu_ps(x_ptr, out);
+        i += 4;
+    }
+
+    mul_freq_domain_scalar(&mut buf[i..len], &h_freq[i..len], scale);
+}
+
+fn mul_freq_domain(buf: &mut [FftComplex<f32>], h_freq: &[FftComplex<f32>], scale: f32) {
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    {
+        if std::arch::is_x86_feature_detected!("avx2") {
+            unsafe {
+                mul_freq_domain_avx2(buf, h_freq, scale);
+            }
+            return;
+        }
+    }
+
+    mul_freq_domain_scalar(buf, h_freq, scale);
+}
+
+impl BlockFirFilter {
+    pub fn new(cutoff_norm: f32, taps: usize, block_size: usize) -> Self {
+        let taps = taps.max(1);
+        let (h_buf, fft_size, fft, ifft) = build_fir_kernel(cutoff_norm, taps, block_size);
+
+        Self {
+            h_freq: h_buf,
+            overlap: vec![0.0; taps.saturating_sub(1)],
+            n_taps: taps,
+            fft_size,
+            fft,
+            ifft,
+            scratch_freq: vec![FftComplex::new(0.0, 0.0); fft_size],
+        }
+    }
+
+    pub fn filter_block_into(&mut self, input: &[f32], output: &mut Vec<f32>) {
+        let n_new = input.len();
+        let n_overlap = self.n_taps.saturating_sub(1);
+
+        let buf = &mut self.scratch_freq;
+        buf.clear();
+        buf.reserve(self.fft_size.saturating_sub(buf.capacity()));
+        for &sample in &self.overlap {
+            buf.push(FftComplex::new(sample, 0.0));
+        }
+        for &sample in input {
+            buf.push(FftComplex::new(sample, 0.0));
+        }
+        buf.resize(self.fft_size, FftComplex::new(0.0, 0.0));
+
+        self.fft.process(buf);
+        mul_freq_domain(buf, &self.h_freq, 1.0 / self.fft_size as f32);
+        self.ifft.process(buf);
+
+        let end = (n_overlap + n_new).min(buf.len());
+        output.clear();
+        output.reserve(n_new.saturating_sub(output.capacity()));
+        output.extend(buf[n_overlap..end].iter().map(|sample| sample.re));
+
+        if n_overlap > 0 {
+            if n_new >= n_overlap {
+                let new_start = n_new - n_overlap;
+                self.overlap.copy_from_slice(&input[new_start..]);
+            } else {
+                let keep_old = n_overlap - n_new;
+                self.overlap.copy_within(n_new..n_overlap, 0);
+                self.overlap[keep_old..].copy_from_slice(input);
+            }
+        }
+    }
+
+    pub fn filter_block(&mut self, input: &[f32]) -> Vec<f32> {
+        let mut output = Vec::with_capacity(input.len());
+        self.filter_block_into(input, &mut output);
+        output
+    }
+}
+
+impl BlockFirFilterPair {
+    pub fn new(cutoff_norm: f32, taps: usize, block_size: usize) -> Self {
+        let taps = taps.max(1);
+        let (h_buf, fft_size, fft, ifft) = build_fir_kernel(cutoff_norm, taps, block_size);
+        Self {
+            h_freq: h_buf,
+            overlap: vec![FftComplex::new(0.0, 0.0); taps.saturating_sub(1)],
+            n_taps: taps,
+            fft_size,
+            fft,
+            ifft,
+            scratch_freq: vec![FftComplex::new(0.0, 0.0); fft_size],
+        }
+    }
+
+    pub fn filter_block_into(
+        &mut self,
+        input_i: &[f32],
+        input_q: &[f32],
+        output_i: &mut Vec<f32>,
+        output_q: &mut Vec<f32>,
+    ) {
+        let n_new = input_i.len().min(input_q.len());
+        let n_overlap = self.n_taps.saturating_sub(1);
+
+        let buf = &mut self.scratch_freq;
+        buf.clear();
+        buf.reserve(self.fft_size.saturating_sub(buf.capacity()));
+        buf.extend(self.overlap.iter().copied());
+        for idx in 0..n_new {
+            buf.push(FftComplex::new(input_i[idx], input_q[idx]));
+        }
+        buf.resize(self.fft_size, FftComplex::new(0.0, 0.0));
+
+        self.fft.process(buf);
+        mul_freq_domain(buf, &self.h_freq, 1.0 / self.fft_size as f32);
+        self.ifft.process(buf);
+
+        let end = (n_overlap + n_new).min(buf.len());
+        output_i.clear();
+        output_q.clear();
+        output_i.reserve(n_new.saturating_sub(output_i.capacity()));
+        output_q.reserve(n_new.saturating_sub(output_q.capacity()));
+        for sample in &buf[n_overlap..end] {
+            output_i.push(sample.re);
+            output_q.push(sample.im);
+        }
+
+        if n_overlap > 0 {
+            if n_new >= n_overlap {
+                let new_start = n_new - n_overlap;
+                for (dst, idx) in self.overlap.iter_mut().zip(new_start..n_new) {
+                    *dst = FftComplex::new(input_i[idx], input_q[idx]);
+                }
+            } else {
+                let keep_old = n_overlap - n_new;
+                self.overlap.copy_within(n_new..n_overlap, 0);
+                for (dst, idx) in self.overlap[keep_old..].iter_mut().zip(0..n_new) {
+                    *dst = FftComplex::new(input_i[idx], input_q[idx]);
+                }
+            }
+        }
+    }
+}