[refactor](trx-ftx): optimize ft2 decode hot paths

Reuse FT2 downsample and bitmetric work buffers, speed up\nsync2d_score with precomputed references, and cache peak-search\nFFT state on the pipeline.\n\nCo-authored-by: OpenAI Codex <codex@openai.com> Signed-off-by: Stan Grams <sjg@haxx.space>
2026-03-18 23:08:42 +01:00
parent 7d20058c03
commit 0b28900082
5 changed files with 563 additions and 342 deletions
@@ -176,8 +176,7 @@ impl Ft8Decoder {
    /// Waterfall-based decode for FT8/FT4.
    fn decode_waterfall(&mut self, max_results: usize) -> Vec<Ft8DecodeResult> {
-        let candidates =
+        let candidates = ftx_find_candidates(&self.monitor.wf, MAX_CANDIDATES, MIN_CANDIDATE_SCORE);
            ftx_find_candidates(&self.monitor.wf, MAX_CANDIDATES, MIN_CANDIDATE_SCORE);
        let mut results = Vec::new();
        let mut seen: Vec<u16> = Vec::new();
@@ -209,13 +208,14 @@ impl Ft8Decoder {
            // Compute time offset
            let symbol_period = self.protocol.symbol_period();
-            let dt_s =
+            let dt_s = (cand.time_offset as f32
-                (cand.time_offset as f32 + cand.time_sub as f32 / self.monitor.wf.time_osr as f32)
+                + cand.time_sub as f32 / self.monitor.wf.time_osr as f32)
                * symbol_period
                - 0.5;
            // Compute frequency
-            let freq_hz = (self.monitor.min_bin as f32 + cand.freq_offset as f32
+            let freq_hz = (self.monitor.min_bin as f32
                + cand.freq_offset as f32
                + cand.freq_sub as f32 / self.monitor.wf.freq_osr as f32)
                / symbol_period;
@@ -232,7 +232,8 @@ impl Ft8Decoder {
    /// FT2-specific decode pipeline.
    fn decode_ft2(&mut self, max_results: usize) -> Vec<Ft8DecodeResult> {
-        let pipe = match self.ft2_pipeline.as_ref() {
+        let ft2_results = {
            let pipe = match self.ft2_pipeline.as_mut() {
                Some(p) => p,
                None => return Vec::new(),
            };
@@ -241,7 +242,8 @@ impl Ft8Decoder {
                return Vec::new();
            }
-        let ft2_results = pipe.decode(max_results);
+            pipe.decode(max_results)
        };
        let mut results = Vec::new();
        for r in ft2_results {
@@ -267,8 +269,7 @@ impl Ft8Decoder {
            payload: msg.payload,
            hash: msg.hash as u32,
        };
-        let (text, _offsets, _rc) =
+        let (text, _offsets, _rc) = message::ftx_message_decode(&m, &mut self.callsign_hash);
            message::ftx_message_decode(&m, &mut self.callsign_hash);
        if text.is_empty() {
            return None;
        }
@@ -15,51 +15,64 @@ use crate::constants::{FT4_COSTAS_PATTERN, FT4_GRAY_MAP};
 use super::{FT2_FRAME_SYMBOLS, FT2_NSS};
-/// Extract bit metrics from the downsampled signal region.
+const N_METRICS: usize = 2 * FT2_FRAME_SYMBOLS;
 ///
 /// Returns a 2D array of shape `[2 * FT2_FRAME_SYMBOLS][3]` where:
 /// - Index 0: 1-symbol scale metric
 /// - Index 1: 2-symbol scale metric
 /// - Index 2: 4-symbol scale metric
 ///
 /// Returns `None` if the sync quality is too poor (fewer than 4 of 16
 /// Costas sync tones decoded correctly).
 pub fn extract_bitmetrics_raw(
    signal: &[Complex32],
 ) -> Option<Vec<[f32; 3]>> {
    let n_metrics = 2 * FT2_FRAME_SYMBOLS;
    let mut bitmetrics = vec![[0.0f32; 3]; n_metrics];
-    // Per-symbol FFT to extract complex tone amplitudes
+/// Reusable FFT plans and scratch buffers for bit-metric extraction.
 pub struct BitMetricsWorkspace {
    fft: std::sync::Arc<dyn rustfft::Fft<f32>>,
    scratch: Vec<Complex32>,
    symbols: [[Complex32; 4]; FT2_FRAME_SYMBOLS],
    s4: [[f32; 4]; FT2_FRAME_SYMBOLS],
    metric1: [f32; N_METRICS],
    metric2: [f32; N_METRICS],
    metric4: [f32; N_METRICS],
    bitmetrics: [[f32; 3]; N_METRICS],
    csymb: [Complex32; FT2_NSS],
 }
 impl BitMetricsWorkspace {
    pub fn new() -> Self {
        let mut planner = FftPlanner::<f32>::new();
        let fft = planner.plan_fft_forward(FT2_NSS);
-    let fft_scratch_len = fft.get_inplace_scratch_len();
+        let scratch = vec![Complex32::new(0.0, 0.0); fft.get_inplace_scratch_len()];
    let mut scratch = vec![Complex32::new(0.0, 0.0); fft_scratch_len];
-    // Complex symbols for each of the 4 tones at each frame symbol
+        Self {
-    let mut symbols = vec![[Complex32::new(0.0, 0.0); 4]; FT2_FRAME_SYMBOLS];
+            fft,
-    // Magnitude for each tone at each symbol
+            scratch,
-    let mut s4 = vec![[0.0f32; 4]; FT2_FRAME_SYMBOLS];
+            symbols: [[Complex32::new(0.0, 0.0); 4]; FT2_FRAME_SYMBOLS],
            s4: [[0.0; 4]; FT2_FRAME_SYMBOLS],
            metric1: [0.0; N_METRICS],
            metric2: [0.0; N_METRICS],
            metric4: [0.0; N_METRICS],
            bitmetrics: [[0.0; 3]; N_METRICS],
            csymb: [Complex32::new(0.0, 0.0); FT2_NSS],
        }
    }
    /// Extract bit metrics into a reusable internal buffer.
    pub fn extract<'a>(&'a mut self, signal: &[Complex32]) -> Option<&'a [[f32; 3]]> {
        self.metric1.fill(0.0);
        self.metric2.fill(0.0);
        self.metric4.fill(0.0);
        for sym in 0..FT2_FRAME_SYMBOLS {
            let offset = sym * FT2_NSS;
-        let mut csymb: Vec<Complex32> = (0..FT2_NSS)
+            if offset + FT2_NSS <= signal.len() {
-            .map(|i| {
+                self.csymb
-                if offset + i < signal.len() {
+                    .copy_from_slice(&signal[offset..(offset + FT2_NSS)]);
                    signal[offset + i]
            } else {
-                    Complex32::new(0.0, 0.0)
+                self.csymb.fill(Complex32::new(0.0, 0.0));
                let remaining = signal.len().saturating_sub(offset);
                self.csymb[..remaining].copy_from_slice(&signal[offset..(offset + remaining)]);
            }
            })
            .collect();
-        fft.process_with_scratch(&mut csymb, &mut scratch);
+            self.fft
                .process_with_scratch(&mut self.csymb, &mut self.scratch);
            for tone in 0..4 {
-            if tone < csymb.len() {
+                let symbol = self.csymb[tone];
-                symbols[sym][tone] = csymb[tone];
+                self.symbols[sym][tone] = symbol;
-                s4[sym][tone] = csymb[tone].norm();
+                self.s4[sym][tone] = symbol.norm();
            }
            }
        }
@@ -73,7 +86,7 @@ pub fn extract_bitmetrics_raw(
                }
                let mut best = 0;
                for tone in 1..4 {
-                if s4[base + i][tone] > s4[base + i][best] {
+                    if self.s4[base + i][tone] > self.s4[base + i][best] {
                        best = tone;
                    }
                }
@@ -87,93 +100,56 @@ pub fn extract_bitmetrics_raw(
            return None;
        }
    // Precompute one_mask: for each integer 0..255 and bit position 0..7,
    // whether that bit is set.
    let one_mask: Vec<[u8; 8]> = (0..256u16)
        .map(|i| {
            let mut m = [0u8; 8];
            for j in 0..8 {
                m[j] = if (i & (1 << j)) != 0 { 1 } else { 0 };
            }
            m
        })
        .collect();
    // Compute metrics at three scales
    let mut metric1 = vec![0.0f32; n_metrics];
    let mut metric2 = vec![0.0f32; n_metrics];
    let mut metric4 = vec![0.0f32; n_metrics];
        for nseq in 0..3 {
-        let nsym = match nseq {
+            let (nsym, metric): (usize, &mut [f32; N_METRICS]) = match nseq {
-            0 => 1,
+                0 => (1, &mut self.metric1),
-            1 => 2,
+                1 => (2, &mut self.metric2),
-            _ => 4,
+                _ => (4, &mut self.metric4),
            };
-        let nt = 1 << (2 * nsym); // number of tone sequences to enumerate
+            let nt = 1usize << (2 * nsym);
-
+            let ibmax = match nsym {
        let mut ks = 0;
        while ks + nsym <= FT2_FRAME_SYMBOLS {
            // Compute coherent magnitude for each possible tone sequence
            let mut s2 = vec![0.0f32; nt];
            for i in 0..nt {
                let i1 = i / 64;
                let i2 = (i & 63) / 16;
                let i3 = (i & 15) / 4;
                let i4 = i & 3;
                let sum = match nsym {
                    1 => symbols[ks][FT4_GRAY_MAP[i4] as usize],
                    2 => {
                        symbols[ks][FT4_GRAY_MAP[i3] as usize]
                            + symbols[ks + 1][FT4_GRAY_MAP[i4] as usize]
                    }
                    4 => {
                        symbols[ks][FT4_GRAY_MAP[i1] as usize]
                            + symbols[ks + 1][FT4_GRAY_MAP[i2] as usize]
                            + symbols[ks + 2][FT4_GRAY_MAP[i3] as usize]
                            + symbols[ks + 3][FT4_GRAY_MAP[i4] as usize]
                    }
                    _ => Complex32::new(0.0, 0.0),
                };
                s2[i] = sum.norm();
            }
            // Extract bit metrics: for each bit position, find max coherent
            // magnitude with that bit set vs unset
            let ipt = 2 * ks;
            let ibmax: usize = match nsym {
                1 => 1,
                2 => 3,
                4 => 7,
                _ => 0,
            };
-            for ib in 0..=ibmax {
+            let mut ks = 0;
-                let mut max_one = f32::NEG_INFINITY;
+            while ks + nsym <= FT2_FRAME_SYMBOLS {
-                let mut max_zero = f32::NEG_INFINITY;
+                let mut max_one = [f32::NEG_INFINITY; 8];
                let mut max_zero = [f32::NEG_INFINITY; 8];
                for i in 0..nt {
-                    if i < 256 {
+                    let sum = match nsym {
-                        if one_mask[i][ibmax - ib] != 0 {
+                        1 => self.symbols[ks][FT4_GRAY_MAP[i & 0x03] as usize],
-                            if s2[i] > max_one {
+                        2 => {
-                                max_one = s2[i];
+                            self.symbols[ks][FT4_GRAY_MAP[(i >> 2) & 0x03] as usize]
                                + self.symbols[ks + 1][FT4_GRAY_MAP[i & 0x03] as usize]
                        }
-                        } else if s2[i] > max_zero {
+                        4 => {
-                            max_zero = s2[i];
+                            self.symbols[ks][FT4_GRAY_MAP[(i >> 6) & 0x03] as usize]
                                + self.symbols[ks + 1][FT4_GRAY_MAP[(i >> 4) & 0x03] as usize]
                                + self.symbols[ks + 2][FT4_GRAY_MAP[(i >> 2) & 0x03] as usize]
                                + self.symbols[ks + 3][FT4_GRAY_MAP[i & 0x03] as usize]
                        }
                        _ => Complex32::new(0.0, 0.0),
                    };
                    let coherent = sum.norm();
                    for ib in 0..=ibmax {
                        if ((i >> (ibmax - ib)) & 1) != 0 {
                            max_one[ib] = max_one[ib].max(coherent);
                        } else {
                            max_zero[ib] = max_zero[ib].max(coherent);
                        }
                    }
                }
                let ipt = 2 * ks;
                for ib in 0..=ibmax {
                    let metric_idx = ipt + ib;
-                if metric_idx >= n_metrics {
+                    if metric_idx < N_METRICS {
-                    continue;
+                        metric[metric_idx] = max_one[ib] - max_zero[ib];
                }
                match nseq {
                    0 => metric1[metric_idx] = max_one - max_zero,
                    1 => metric2[metric_idx] = max_one - max_zero,
                    _ => metric4[metric_idx] = max_one - max_zero,
                    }
                }
@@ -182,30 +158,49 @@ pub fn extract_bitmetrics_raw(
        }
        // Patch boundary metrics where multi-symbol integration overruns
-    if n_metrics >= 206 {
+        self.metric2[204] = self.metric1[204];
-        metric2[204] = metric1[204];
+        self.metric2[205] = self.metric1[205];
-        metric2[205] = metric1[205];
+        self.metric4[200] = self.metric2[200];
-        metric4[200] = metric2[200];
+        self.metric4[201] = self.metric2[201];
-        metric4[201] = metric2[201];
+        self.metric4[202] = self.metric2[202];
-        metric4[202] = metric2[202];
+        self.metric4[203] = self.metric2[203];
-        metric4[203] = metric2[203];
+        self.metric4[204] = self.metric1[204];
-        metric4[204] = metric1[204];
+        self.metric4[205] = self.metric1[205];
-        metric4[205] = metric1[205];
+
        normalize_metric(&mut self.metric1);
        normalize_metric(&mut self.metric2);
        normalize_metric(&mut self.metric4);
        for i in 0..N_METRICS {
            self.bitmetrics[i][0] = self.metric1[i];
            self.bitmetrics[i][1] = self.metric2[i];
            self.bitmetrics[i][2] = self.metric4[i];
        }
-    // Normalize each metric scale independently
+        Some(&self.bitmetrics)
    normalize_metric(&mut metric1);
    normalize_metric(&mut metric2);
    normalize_metric(&mut metric4);
    // Pack into output
    for i in 0..n_metrics {
        bitmetrics[i][0] = metric1[i];
        bitmetrics[i][1] = metric2[i];
        bitmetrics[i][2] = metric4[i];
    }
 }
-    Some(bitmetrics)
+impl Default for BitMetricsWorkspace {
    fn default() -> Self {
        Self::new()
    }
 }
 /// Extract bit metrics from the downsampled signal region.
 ///
 /// Returns a 2D array of shape `[2 * FT2_FRAME_SYMBOLS][3]` where:
 /// - Index 0: 1-symbol scale metric
 /// - Index 1: 2-symbol scale metric
 /// - Index 2: 4-symbol scale metric
 ///
 /// Returns `None` if the sync quality is too poor (fewer than 4 of 16
 /// Costas sync tones decoded correctly).
 pub fn extract_bitmetrics_raw(signal: &[Complex32]) -> Option<Vec<[f32; 3]>> {
    let mut workspace = BitMetricsWorkspace::new();
    workspace
        .extract(signal)
        .map(|bitmetrics| bitmetrics.to_vec())
 }
 /// Normalize a metric array by dividing by its standard deviation.
@@ -14,6 +14,34 @@ use rustfft::FftPlanner;
 use super::{FT2_NDOWN, FT2_SYMBOL_PERIOD_F};
 /// Reusable scratch buffers for frequency-domain downsampling.
 pub struct DownsampleWorkspace {
    band: Vec<Complex32>,
    ifft_scratch: Vec<Complex32>,
 }
 impl DownsampleWorkspace {
    fn new(nfft2: usize, ifft_scratch_len: usize) -> Self {
        Self {
            band: vec![Complex32::new(0.0, 0.0); nfft2],
            ifft_scratch: vec![Complex32::new(0.0, 0.0); ifft_scratch_len],
        }
    }
    fn prepare(&mut self, nfft2: usize, ifft_scratch_len: usize) {
        if self.band.len() != nfft2 {
            self.band.resize(nfft2, Complex32::new(0.0, 0.0));
        } else {
            self.band.fill(Complex32::new(0.0, 0.0));
        }
        if self.ifft_scratch.len() != ifft_scratch_len {
            self.ifft_scratch
                .resize(ifft_scratch_len, Complex32::new(0.0, 0.0));
        }
    }
 }
 /// Downsample context holding precomputed FFT data and spectral window.
 pub struct DownsampleContext {
    /// Number of raw samples.
@@ -28,8 +56,8 @@ pub struct DownsampleContext {
    spectrum: Vec<Complex32>,
    /// IFFT plan for the downsampled length.
    ifft: std::sync::Arc<dyn rustfft::Fft<f32>>,
-    /// Scratch buffer for IFFT.
+    /// Scratch length required by the IFFT plan.
-    ifft_scratch: Vec<Complex32>,
+    ifft_scratch_len: usize,
 }
 impl DownsampleContext {
@@ -50,7 +78,11 @@ impl DownsampleContext {
        let df = sample_rate / nraw as f32;
        // Build spectral extraction window
-        let window = build_spectral_window(nfft2, df);
+        let mut window = build_spectral_window(nfft2, df);
        let inv_nfft2 = 1.0 / nfft2 as f32;
        for coeff in &mut window {
            *coeff *= inv_nfft2;
        }
        // Forward real FFT of raw audio
        let mut real_planner = realfft::RealFftPlanner::<f32>::new();
@@ -59,11 +91,7 @@ impl DownsampleContext {
        let mut output = fft.make_output_vec();
        let mut scratch = fft.make_scratch_vec();
-        for (i, s) in raw_audio.iter().enumerate() {
+        input.copy_from_slice(raw_audio);
            if i < input.len() {
                input[i] = *s;
            }
        }
        fft.process_with_scratch(&mut input, &mut output, &mut scratch)
            .ok()?;
@@ -72,7 +100,7 @@ impl DownsampleContext {
        // IFFT plan for downsampled length
        let mut planner = FftPlanner::<f32>::new();
        let ifft = planner.plan_fft_inverse(nfft2);
-        let ifft_scratch = vec![Complex32::new(0.0, 0.0); ifft.get_inplace_scratch_len()];
+        let ifft_scratch_len = ifft.get_inplace_scratch_len();
        Some(Self {
            nraw,
@@ -81,7 +109,7 @@ impl DownsampleContext {
            window,
            spectrum,
            ifft,
-            ifft_scratch,
+            ifft_scratch_len,
        })
    }
@@ -90,15 +118,31 @@ impl DownsampleContext {
        self.nfft2
    }
    /// Create reusable buffers for repeated downsampling with this context.
    pub fn workspace(&self) -> DownsampleWorkspace {
        DownsampleWorkspace::new(self.nfft2, self.ifft_scratch_len)
    }
    /// Downsample the raw audio around `freq_hz`, writing complex baseband
    /// samples into `out`. Returns the number of samples produced.
    pub fn downsample(&self, freq_hz: f32, out: &mut [Complex32]) -> usize {
        let mut workspace = self.workspace();
        self.downsample_with_workspace(freq_hz, out, &mut workspace)
    }
    /// Downsample the raw audio using reusable scratch buffers.
    pub fn downsample_with_workspace(
        &self,
        freq_hz: f32,
        out: &mut [Complex32],
        workspace: &mut DownsampleWorkspace,
    ) -> usize {
        if out.len() < self.nfft2 {
            return 0;
        }
-        // Working band buffer
+        workspace.prepare(self.nfft2, self.ifft_scratch_len);
-        let mut band = vec![Complex32::new(0.0, 0.0); self.nfft2];
+        let band = &mut workspace.band;
        let i0 = (freq_hz / self.df).round() as i32;
        let half_nraw = (self.nraw / 2) as i32;
@@ -119,21 +163,16 @@ impl DownsampleContext {
            }
        }
-        // Apply spectral window and scale
+        // Apply spectral window
        let inv_nfft2 = 1.0 / self.nfft2 as f32;
        for i in 0..self.nfft2 {
-            band[i] = Complex32::new(
+            band[i] *= self.window[i];
                band[i].re * self.window[i] * inv_nfft2,
                band[i].im * self.window[i] * inv_nfft2,
            );
        }
        // Inverse FFT (in-place)
        let mut scratch = self.ifft_scratch.clone();
        self.ifft
-            .process_with_scratch(&mut band, &mut scratch);
+            .process_with_scratch(band, &mut workspace.ifft_scratch);
-        out[..self.nfft2].copy_from_slice(&band);
+        out[..self.nfft2].copy_from_slice(band);
        self.nfft2
    }
 }
@@ -167,13 +206,13 @@ fn build_spectral_window(nfft2: usize, df: f32) -> Vec<f32> {
    // Raised-cosine trailing edge
    for i in (iwt + iwf)..(2 * iwt + iwf).min(nfft2) {
-        window[i] = 0.5 * (1.0 + (std::f32::consts::PI * (i - (iwt + iwf)) as f32 / iwt as f32).cos());
+        window[i] =
            0.5 * (1.0 + (std::f32::consts::PI * (i - (iwt + iwf)) as f32 / iwt as f32).cos());
    }
    // Circular shift by iws bins
    if iws > 0 && iws < nfft2 {
-        let shifted: Vec<f32> = (0..nfft2).map(|i| window[(i + iws) % nfft2]).collect();
+        window.rotate_left(iws);
        window.copy_from_slice(&shifted);
    }
    window
@@ -14,6 +14,7 @@ pub mod osd;
 pub mod sync;
 use num_complex::Complex32;
 use realfft::RealFftPlanner;
 use crate::constants::FT4_XOR_SEQUENCE;
 use crate::crc::{ftx_compute_crc, ftx_extract_crc};
@@ -21,8 +22,9 @@ use crate::decode::{pack_bits, FtxMessage};
 use crate::ldpc;
 use crate::protocol::*;
-use downsample::DownsampleContext;
+use bitmetrics::BitMetricsWorkspace;
-use sync::{prepare_sync_waveforms, sync2d_score};
+use downsample::{DownsampleContext, DownsampleWorkspace};
 use sync::{prepare_sync_waveforms, sync2d_score, SyncWaveforms};
 // FT2 DSP constants
 pub const FT2_NDOWN: usize = 9;
@@ -119,6 +121,62 @@ pub struct Ft2Pipeline {
    sample_rate: f32,
    raw_audio: Vec<f32>,
    raw_capacity: usize,
    waveforms: SyncWaveforms,
    peak_search: PeakSearchWorkspace,
 }
 struct Ft2DecodeWorkspace {
    downsample: DownsampleWorkspace,
    downsample_a: Vec<Complex32>,
    downsample_b: Vec<Complex32>,
    signal: Vec<Complex32>,
    bitmetrics: BitMetricsWorkspace,
 }
 impl Ft2DecodeWorkspace {
    fn new(ctx: &DownsampleContext) -> Self {
        let nfft2 = ctx.nfft2();
        Self {
            downsample: ctx.workspace(),
            downsample_a: vec![Complex32::new(0.0, 0.0); nfft2],
            downsample_b: vec![Complex32::new(0.0, 0.0); nfft2],
            signal: vec![Complex32::new(0.0, 0.0); FT2_FRAME_SAMPLES],
            bitmetrics: BitMetricsWorkspace::new(),
        }
    }
 }
 struct PeakSearchWorkspace {
    window: Vec<f32>,
    fft: std::sync::Arc<dyn realfft::RealToComplex<f32>>,
    fft_input: Vec<f32>,
    fft_output: Vec<Complex32>,
    fft_scratch: Vec<Complex32>,
    avg: Vec<f32>,
    smooth: Vec<f32>,
    baseline: Vec<f32>,
 }
 impl PeakSearchWorkspace {
    fn new() -> Self {
        let window = nuttall_window(FT2_NFFT1);
        let mut planner = RealFftPlanner::<f32>::new();
        let fft = planner.plan_fft_forward(FT2_NFFT1);
        let fft_input = fft.make_input_vec();
        let fft_output = fft.make_output_vec();
        let fft_scratch = fft.make_scratch_vec();
        Self {
            window,
            fft,
            fft_input,
            fft_output,
            fft_scratch,
            avg: vec![0.0; FT2_NH1],
            smooth: vec![0.0; FT2_NH1],
            baseline: vec![0.0; FT2_NH1],
        }
    }
 }
 impl Ft2Pipeline {
@@ -128,6 +186,8 @@ impl Ft2Pipeline {
            sample_rate: sample_rate as f32,
            raw_audio: Vec::with_capacity(FT2_NMAX),
            raw_capacity: FT2_NMAX,
            waveforms: prepare_sync_waveforms(),
            peak_search: PeakSearchWorkspace::new(),
        }
    }
@@ -157,7 +217,7 @@ impl Ft2Pipeline {
    }
    /// Run the full FT2 decode pipeline. Returns decoded messages.
-    pub fn decode(&self, max_results: usize) -> Vec<Ft2DecodeResult> {
+    pub fn decode(&mut self, max_results: usize) -> Vec<Ft2DecodeResult> {
        if self.raw_audio.len() < FT2_NFFT1 {
            return Vec::new();
        }
@@ -167,7 +227,8 @@ impl Ft2Pipeline {
            None => return Vec::new(),
        };
-        let hits = self.find_scan_hits(&ctx);
+        let mut workspace = Ft2DecodeWorkspace::new(&ctx);
        let hits = self.find_scan_hits(&ctx, &mut workspace);
        if hits.is_empty() {
            return Vec::new();
        }
@@ -179,11 +240,11 @@ impl Ft2Pipeline {
            if results.len() >= max_results {
                break;
            }
-            if let Some(result) = self.decode_hit(&ctx, hit) {
+            if let Some(result) = self.decode_hit(&ctx, hit, &mut workspace) {
                // Dedup
-                let dominated = seen_hashes.iter().any(|(h, p)| {
+                let dominated = seen_hashes
-                    *h == result.message.hash && *p == result.message.payload
+                    .iter()
-                });
+                    .any(|(h, p)| *h == result.message.hash && *p == result.message.payload);
                if dominated {
                    continue;
                }
@@ -196,7 +257,7 @@ impl Ft2Pipeline {
    }
    /// Find frequency peaks from averaged power spectrum.
-    fn find_frequency_peaks(&self) -> Vec<RawCandidate> {
+    fn find_frequency_peaks(&mut self) -> Vec<RawCandidate> {
        if self.raw_audio.len() < FT2_NFFT1 {
            return Vec::new();
        }
@@ -204,65 +265,68 @@ impl Ft2Pipeline {
        let fs = self.sample_rate;
        let df = fs / FT2_NFFT1 as f32;
        let n_frames = 1 + (self.raw_audio.len() - FT2_NFFT1) / FT2_NSTEP;
        let PeakSearchWorkspace {
            window,
            fft,
            fft_input,
            fft_output,
            fft_scratch,
            avg,
            smooth,
            baseline,
        } = &mut self.peak_search;
-        // Compute Nuttall window
+        avg.fill(0.0);
-        let window = nuttall_window(FT2_NFFT1);
+        smooth.fill(0.0);
-
+        baseline.fill(0.0);
        // Forward real FFT setup
        let mut real_planner = realfft::RealFftPlanner::<f32>::new();
        let fft = real_planner.plan_fft_forward(FT2_NFFT1);
        let mut fft_input = fft.make_input_vec();
        let mut fft_output = fft.make_output_vec();
        let mut fft_scratch = fft.make_scratch_vec();
        // Average power spectrum across frames
        let mut avg = vec![0.0f32; FT2_NH1];
        for frame in 0..n_frames {
            let start = frame * FT2_NSTEP;
-            for i in 0..FT2_NFFT1 {
+            let input = &self.raw_audio[start..(start + FT2_NFFT1)];
-                fft_input[i] = self.raw_audio[start + i] * window[i];
+            for (dst, (&sample, &coeff)) in
                fft_input.iter_mut().zip(input.iter().zip(window.iter()))
            {
                *dst = sample * coeff;
            }
-            fft.process_with_scratch(&mut fft_input, &mut fft_output, &mut fft_scratch)
+            fft.process_with_scratch(fft_input, fft_output, fft_scratch)
                .expect("FFT failed");
-            for bin in 1..FT2_NH1 {
+            for (bin, c) in fft_output.iter().enumerate().take(FT2_NH1).skip(1) {
-                if bin < fft_output.len() {
+                avg[bin] += c.norm_sqr();
                    let c = fft_output[bin];
                    let power = c.re * c.re + c.im * c.im;
                    avg[bin] += power;
                }
            }
        }
        let inv_n_frames = 1.0 / n_frames as f32;
        for bin in 1..FT2_NH1 {
-            avg[bin] /= n_frames as f32;
+            avg[bin] *= inv_n_frames;
        }
        // Smooth with 15-point moving average
-        let mut smooth = vec![0.0f32; FT2_NH1];
+        if FT2_NH1 > 16 {
            let mut sum: f32 = avg[1..16].iter().sum();
            for bin in 8..FT2_NH1.saturating_sub(8) {
            let mut sum = 0.0f32;
            for i in (bin.saturating_sub(7))..=(bin + 7).min(FT2_NH1 - 1) {
                sum += avg[i];
            }
                smooth[bin] = sum / 15.0;
                if bin + 8 < FT2_NH1 {
                    sum += avg[bin + 8] - avg[bin - 7];
                }
            }
        }
        // Baseline with 63-point moving average
-        let mut baseline = vec![0.0f32; FT2_NH1];
+        if FT2_NH1 > 64 {
            let mut sum: f32 = smooth[1..64].iter().sum();
            for bin in 32..FT2_NH1.saturating_sub(32) {
            let mut sum = 0.0f32;
            for i in (bin.saturating_sub(31))..=(bin + 31).min(FT2_NH1 - 1) {
                sum += smooth[i];
            }
                baseline[bin] = sum / 63.0 + 1e-9;
                if bin + 32 < FT2_NH1 {
                    sum += smooth[bin + 32] - smooth[bin - 31];
                }
            }
        }
        // Find peaks
        let min_bin = (200.0 / df).round() as usize;
        let max_bin = (4910.0 / df).round() as usize;
-        let mut candidates = Vec::new();
+        let mut candidates = Vec::with_capacity(FT2_MAX_RAW_CANDIDATES);
        let mut bin = min_bin + 1;
        while bin < max_bin.saturating_sub(1) && candidates.len() < FT2_MAX_RAW_CANDIDATES {
@@ -309,20 +373,25 @@ impl Ft2Pipeline {
        }
        // Sort by score descending
-        candidates.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
+        candidates.sort_by(|a, b| {
            b.score
                .partial_cmp(&a.score)
                .unwrap_or(std::cmp::Ordering::Equal)
        });
        candidates
    }
    /// Find scan hits by downsampling each frequency peak and computing sync scores.
-    fn find_scan_hits(&self, ctx: &DownsampleContext) -> Vec<ScanHit> {
+    fn find_scan_hits(
        &mut self,
        ctx: &DownsampleContext,
        workspace: &mut Ft2DecodeWorkspace,
    ) -> Vec<ScanHit> {
        let peaks = self.find_frequency_peaks();
        if peaks.is_empty() {
            return Vec::new();
        }
        let nfft2 = ctx.nfft2();
        let waveforms = prepare_sync_waveforms();
        let mut hits = Vec::new();
        for peak in &peaks {
@@ -330,12 +399,15 @@ impl Ft2Pipeline {
                break;
            }
-            let mut down = vec![Complex32::new(0.0, 0.0); nfft2];
+            let produced = ctx.downsample_with_workspace(
-            let produced = ctx.downsample(peak.freq_hz, &mut down);
+                peak.freq_hz,
                &mut workspace.downsample_a,
                &mut workspace.downsample,
            );
            if produced == 0 {
                continue;
            }
-            normalize_downsampled(&mut down[..produced], produced);
+            normalize_downsampled(&mut workspace.downsample_a[..produced], produced);
            // Coarse search
            let mut best_score: f32 = -1.0;
@@ -347,10 +419,10 @@ impl Ft2Pipeline {
                let mut start = -688i32;
                while start <= 2024 {
                    let score = sync2d_score(
-                        &down[..produced],
+                        &workspace.downsample_a[..produced],
                        start,
                        idf,
-                        &waveforms,
+                        &self.waveforms,
                    );
                    if score > best_score {
                        best_score = score;
@@ -373,10 +445,10 @@ impl Ft2Pipeline {
                }
                for start in (best_start - 5)..=(best_start + 5) {
                    let score = sync2d_score(
-                        &down[..produced],
+                        &workspace.downsample_a[..produced],
                        start,
                        idf,
-                        &waveforms,
+                        &self.waveforms,
                    );
                    if score > best_score {
                        best_score = score;
@@ -409,17 +481,22 @@ impl Ft2Pipeline {
    }
    /// Attempt to decode a single scan hit through the full pipeline.
-    fn decode_hit(&self, ctx: &DownsampleContext, hit: &ScanHit) -> Option<Ft2DecodeResult> {
+    fn decode_hit(
-        let nfft2 = ctx.nfft2();
+        &self,
-        let waveforms = prepare_sync_waveforms();
+        ctx: &DownsampleContext,
-
+        hit: &ScanHit,
        workspace: &mut Ft2DecodeWorkspace,
    ) -> Option<Ft2DecodeResult> {
        // Initial downsample for sync refinement
-        let mut cd2 = vec![Complex32::new(0.0, 0.0); nfft2];
+        let produced = ctx.downsample_with_workspace(
-        let produced = ctx.downsample(hit.freq_hz, &mut cd2);
+            hit.freq_hz,
            &mut workspace.downsample_a,
            &mut workspace.downsample,
        );
        if produced == 0 {
            return None;
        }
-        normalize_downsampled(&mut cd2[..produced], produced);
+        normalize_downsampled(&mut workspace.downsample_a[..produced], produced);
        // Refine sync
        let mut best_score: f32 = -1.0;
@@ -431,7 +508,12 @@ impl Ft2Pipeline {
                continue;
            }
            for start in (hit.start - 5)..=(hit.start + 5) {
-                let score = sync2d_score(&cd2[..produced], start, idf, &waveforms);
+                let score = sync2d_score(
                    &workspace.downsample_a[..produced],
                    start,
                    idf,
                    &self.waveforms,
                );
                if score > best_score {
                    best_score = score;
                    best_start = start;
@@ -451,19 +533,25 @@ impl Ft2Pipeline {
        }
        // Final downsample at corrected frequency
-        let mut cb = vec![Complex32::new(0.0, 0.0); nfft2];
+        let produced2 = ctx.downsample_with_workspace(
-        let produced2 = ctx.downsample(corrected_freq_hz, &mut cb);
+            corrected_freq_hz,
            &mut workspace.downsample_b,
            &mut workspace.downsample,
        );
        if produced2 == 0 {
            return None;
        }
-        normalize_downsampled(&mut cb[..produced2], FT2_FRAME_SAMPLES);
+        normalize_downsampled(&mut workspace.downsample_b[..produced2], FT2_FRAME_SAMPLES);
        // Extract signal region
-        let mut signal = vec![Complex32::new(0.0, 0.0); FT2_FRAME_SAMPLES];
+        extract_signal_region(
-        extract_signal_region(&cb[..produced2], best_start, &mut signal);
+            &workspace.downsample_b[..produced2],
            best_start,
            &mut workspace.signal,
        );
        // Extract bit metrics
-        let bitmetrics = bitmetrics::extract_bitmetrics_raw(&signal)?;
+        let bitmetrics = workspace.bitmetrics.extract(&workspace.signal)?;
        // Sync quality check using known Costas bit patterns
        let sync_bits_a: [u8; 8] = [0, 0, 0, 1, 1, 0, 1, 1];
@@ -472,10 +560,26 @@ impl Ft2Pipeline {
        let sync_bits_d: [u8; 8] = [1, 0, 1, 1, 0, 0, 0, 1];
        let mut sync_qual = 0;
        for i in 0..8 {
-            sync_qual += if (bitmetrics[i][0] >= 0.0) as u8 == sync_bits_a[i] { 1 } else { 0 };
+            sync_qual += if (bitmetrics[i][0] >= 0.0) as u8 == sync_bits_a[i] {
-            sync_qual += if (bitmetrics[66 + i][0] >= 0.0) as u8 == sync_bits_b[i] { 1 } else { 0 };
+                1
-            sync_qual += if (bitmetrics[132 + i][0] >= 0.0) as u8 == sync_bits_c[i] { 1 } else { 0 };
+            } else {
-            sync_qual += if (bitmetrics[198 + i][0] >= 0.0) as u8 == sync_bits_d[i] { 1 } else { 0 };
+                0
            };
            sync_qual += if (bitmetrics[66 + i][0] >= 0.0) as u8 == sync_bits_b[i] {
                1
            } else {
                0
            };
            sync_qual += if (bitmetrics[132 + i][0] >= 0.0) as u8 == sync_bits_c[i] {
                1
            } else {
                0
            };
            sync_qual += if (bitmetrics[198 + i][0] >= 0.0) as u8 == sync_bits_d[i] {
                1
            } else {
                0
            };
        }
        if sync_qual < 10 {
            return None;
@@ -591,8 +695,18 @@ impl Ft2Pipeline {
        }
        // Compute refined timing via parabolic interpolation
-        let sm1 = sync2d_score(&cd2[..produced], best_start - 1, best_idf, &waveforms);
+        let sm1 = sync2d_score(
-        let sp1 = sync2d_score(&cd2[..produced], best_start + 1, best_idf, &waveforms);
+            &workspace.downsample_a[..produced],
            best_start - 1,
            best_idf,
            &self.waveforms,
        );
        let sp1 = sync2d_score(
            &workspace.downsample_a[..produced],
            best_start + 1,
            best_idf,
            &self.waveforms,
        );
        let mut xstart = best_start as f32;
        let den = sm1 - 2.0 * best_score + sp1;
        if den.abs() > 1e-6 {
@@ -635,7 +749,11 @@ fn normalize_downsampled(samples: &mut [Complex32], ref_count: usize) {
    if power <= 0.0 {
        return;
    }
-    let rc = if ref_count == 0 { samples.len() } else { ref_count };
+    let rc = if ref_count == 0 {
        samples.len()
    } else {
        ref_count
    };
    let scale = (rc as f32 / power).sqrt();
    for s in samples.iter_mut() {
        *s *= scale;
@@ -644,14 +762,17 @@ fn normalize_downsampled(samples: &mut [Complex32], ref_count: usize) {
 /// Extract a signal region starting at `start` into `out_signal`.
 fn extract_signal_region(input: &[Complex32], start: i32, out_signal: &mut [Complex32]) {
-    for i in 0..out_signal.len() {
+    out_signal.fill(Complex32::new(0.0, 0.0));
-        let src = start + i as i32;
+
-        out_signal[i] = if src >= 0 && (src as usize) < input.len() {
+    let src_start = start.max(0) as usize;
-            input[src as usize]
+    let dst_start = (-start).max(0) as usize;
-        } else {
+    if dst_start >= out_signal.len() || src_start >= input.len() {
-            Complex32::new(0.0, 0.0)
+        return;
        };
    }
    let copy_len = (input.len() - src_start).min(out_signal.len() - dst_start);
    out_signal[dst_start..(dst_start + copy_len)]
        .copy_from_slice(&input[src_start..(src_start + copy_len)]);
 }
 /// Normalize LLR array (divide by standard deviation).
@@ -9,6 +9,7 @@
 //! reference across time and frequency offsets.
 use num_complex::Complex32;
 use std::sync::OnceLock;
 use crate::constants::FT4_COSTAS_PATTERN;
@@ -16,6 +17,12 @@ use super::{FT2_NDOWN, FT2_NSS, FT2_SYMBOL_PERIOD_F, FT2_SYNC_TWEAK_MAX, FT2_SYN
 /// Number of frequency tweak entries.
 const NUM_TWEAKS: usize = (FT2_SYNC_TWEAK_MAX - FT2_SYNC_TWEAK_MIN) as usize + 1;
 const SYNC_GROUP_COUNT: usize = 4;
 const SYNC_SAMPLES: usize = 64;
 const SAMPLE_STRIDE: usize = 2;
 const GROUP_STRIDE: i32 = 33 * FT2_NSS as i32;
 const GROUP_LAST_SAMPLE_OFFSET: i32 = SAMPLE_STRIDE as i32 * (SYNC_SAMPLES as i32 - 1);
 const FRAME_LAST_SAMPLE_OFFSET: i32 = 3 * GROUP_STRIDE + GROUP_LAST_SAMPLE_OFFSET;
 /// Precomputed sync and frequency-tweak waveforms.
 pub struct SyncWaveforms {
@@ -73,6 +80,74 @@ pub fn prepare_sync_waveforms() -> SyncWaveforms {
    }
 }
 type SyncReferenceBank = [[[Complex32; SYNC_SAMPLES]; SYNC_GROUP_COUNT]; NUM_TWEAKS];
 fn sync_reference_bank() -> &'static SyncReferenceBank {
    static REFS: OnceLock<SyncReferenceBank> = OnceLock::new();
    REFS.get_or_init(|| {
        let waveforms = prepare_sync_waveforms();
        let mut refs = [[[Complex32::new(0.0, 0.0); SYNC_SAMPLES]; SYNC_GROUP_COUNT]; NUM_TWEAKS];
        for tw_idx in 0..NUM_TWEAKS {
            for group in 0..SYNC_GROUP_COUNT {
                for i in 0..SYNC_SAMPLES {
                    refs[tw_idx][group][i] =
                        (waveforms.sync_wave[group][i] * waveforms.tweak_wave[tw_idx][i]).conj();
                }
            }
        }
        refs
    })
 }
 #[inline(always)]
 fn correlate_group_fast(
    samples: &[Complex32],
    pos: usize,
    refs: &[Complex32; SYNC_SAMPLES],
 ) -> f32 {
    let mut sum_re = 0.0f32;
    let mut sum_im = 0.0f32;
    for i in 0..SYNC_SAMPLES {
        let sample = samples[pos + i * SAMPLE_STRIDE];
        let reference = refs[i];
        sum_re += sample.re * reference.re - sample.im * reference.im;
        sum_im += sample.re * reference.im + sample.im * reference.re;
    }
    (sum_re * sum_re + sum_im * sum_im).sqrt()
 }
 #[inline(always)]
 fn correlate_group_clipped(
    samples: &[Complex32],
    pos: i32,
    refs: &[Complex32; SYNC_SAMPLES],
 ) -> (f32, usize) {
    let mut sum_re = 0.0f32;
    let mut sum_im = 0.0f32;
    let mut usable = 0usize;
    let n_samples = samples.len() as i32;
    for i in 0..SYNC_SAMPLES {
        let sample_idx = pos + i as i32 * SAMPLE_STRIDE as i32;
        if sample_idx < 0 || sample_idx >= n_samples {
            continue;
        }
        let sample = samples[sample_idx as usize];
        let reference = refs[i];
        sum_re += sample.re * reference.re - sample.im * reference.im;
        sum_im += sample.re * reference.im + sample.im * reference.re;
        usable += 1;
    }
    ((sum_re * sum_re + sum_im * sum_im).sqrt(), usable)
 }
 /// Compute the 2D sync score for a given time offset and frequency tweak.
 ///
 /// Correlates the downsampled complex samples against the four Costas sync
@@ -88,46 +163,36 @@ pub fn sync2d_score(
    samples: &[Complex32],
    start: i32,
    idf: i32,
-    waveforms: &SyncWaveforms,
+    _waveforms: &SyncWaveforms,
 ) -> f32 {
    let nss = FT2_NSS as i32;
    let n_samples = samples.len() as i32;
    // The four sync groups are at symbol positions 0, 33, 66, 99 within the frame
    let positions = [
        start,
        start + 33 * nss,
        start + 66 * nss,
        start + 99 * nss,
    ];
    let tw_idx = (idf - FT2_SYNC_TWEAK_MIN) as usize;
-    if tw_idx >= waveforms.tweak_wave.len() {
+    if tw_idx >= NUM_TWEAKS {
        return 0.0;
    }
-    let tweak = &waveforms.tweak_wave[tw_idx];
+
    let refs = &sync_reference_bank()[tw_idx];
    let scale = 1.0 / (2.0 * FT2_NSS as f32);
    let mut score = 0.0f32;
-    for group in 0..4 {
+    if start >= 0 && start + FRAME_LAST_SAMPLE_OFFSET < n_samples {
-        let pos = positions[group];
+        for (group, refs_group) in refs.iter().enumerate() {
-        let mut sum = Complex32::new(0.0, 0.0);
+            let pos = (start + group as i32 * GROUP_STRIDE) as usize;
-        let mut usable = 0;
+            score += correlate_group_fast(samples, pos, refs_group) * scale;
        }
        return score;
    }
-        for i in 0..64 {
+    for (group, refs_group) in refs.iter().enumerate() {
-            let sample_idx = pos + 2 * i as i32;
+        let pos = start + group as i32 * GROUP_STRIDE;
-            if sample_idx < 0 || sample_idx >= n_samples {
+        if pos >= n_samples || pos + GROUP_LAST_SAMPLE_OFFSET < 0 {
            continue;
        }
            // Correlate: multiply received sample by conjugate of
            // (sync_reference * tweak_phasor)
            let reference = waveforms.sync_wave[group][i] * tweak[i];
            sum += samples[sample_idx as usize] * reference.conj();
            usable += 1;
        }
        let (corr, usable) = correlate_group_clipped(samples, pos, refs_group);
        if usable > 16 {
-            score += sum.norm() / (2.0 * FT2_NSS as f32);
+            score += corr * scale;
        }
    }