From 0b289000829d812bf58f4e95586a832c17096d9c Mon Sep 17 00:00:00 2001 From: Stan Grams Date: Wed, 18 Mar 2026 23:08:42 +0100 Subject: [PATCH] [refactor](trx-ftx): optimize ft2 decode hot paths Reuse FT2 downsample and bitmetric work buffers, speed up\nsync2d_score with precomputed references, and cache peak-search\nFFT state on the pipeline.\n\nCo-authored-by: OpenAI Codex Signed-off-by: Stan Grams --- src/decoders/trx-ftx/src/decoder.rs | 37 +-- src/decoders/trx-ftx/src/ft2/bitmetrics.rs | 359 ++++++++++----------- src/decoders/trx-ftx/src/ft2/downsample.rs | 87 +++-- src/decoders/trx-ftx/src/ft2/mod.rs | 301 +++++++++++------ src/decoders/trx-ftx/src/ft2/sync.rs | 121 +++++-- 5 files changed, 563 insertions(+), 342 deletions(-) diff --git a/src/decoders/trx-ftx/src/decoder.rs b/src/decoders/trx-ftx/src/decoder.rs index 3d5d832..3738345 100644 --- a/src/decoders/trx-ftx/src/decoder.rs +++ b/src/decoders/trx-ftx/src/decoder.rs @@ -176,8 +176,7 @@ impl Ft8Decoder { /// Waterfall-based decode for FT8/FT4. fn decode_waterfall(&mut self, max_results: usize) -> Vec { - let candidates = - ftx_find_candidates(&self.monitor.wf, MAX_CANDIDATES, MIN_CANDIDATE_SCORE); + let candidates = ftx_find_candidates(&self.monitor.wf, MAX_CANDIDATES, MIN_CANDIDATE_SCORE); let mut results = Vec::new(); let mut seen: Vec = Vec::new(); @@ -209,13 +208,14 @@ impl Ft8Decoder { // Compute time offset let symbol_period = self.protocol.symbol_period(); - let dt_s = - (cand.time_offset as f32 + cand.time_sub as f32 / self.monitor.wf.time_osr as f32) - * symbol_period - - 0.5; + let dt_s = (cand.time_offset as f32 + + cand.time_sub as f32 / self.monitor.wf.time_osr as f32) + * symbol_period + - 0.5; // Compute frequency - let freq_hz = (self.monitor.min_bin as f32 + cand.freq_offset as f32 + let freq_hz = (self.monitor.min_bin as f32 + + cand.freq_offset as f32 + cand.freq_sub as f32 / self.monitor.wf.freq_osr as f32) / symbol_period; @@ -232,16 +232,18 @@ impl Ft8Decoder { /// FT2-specific decode pipeline. fn decode_ft2(&mut self, max_results: usize) -> Vec { - let pipe = match self.ft2_pipeline.as_ref() { - Some(p) => p, - None => return Vec::new(), + let ft2_results = { + let pipe = match self.ft2_pipeline.as_mut() { + Some(p) => p, + None => return Vec::new(), + }; + + if !pipe.is_ready() { + return Vec::new(); + } + + pipe.decode(max_results) }; - - if !pipe.is_ready() { - return Vec::new(); - } - - let ft2_results = pipe.decode(max_results); let mut results = Vec::new(); for r in ft2_results { @@ -267,8 +269,7 @@ impl Ft8Decoder { payload: msg.payload, hash: msg.hash as u32, }; - let (text, _offsets, _rc) = - message::ftx_message_decode(&m, &mut self.callsign_hash); + let (text, _offsets, _rc) = message::ftx_message_decode(&m, &mut self.callsign_hash); if text.is_empty() { return None; } diff --git a/src/decoders/trx-ftx/src/ft2/bitmetrics.rs b/src/decoders/trx-ftx/src/ft2/bitmetrics.rs index bbfc8f4..f363e5f 100644 --- a/src/decoders/trx-ftx/src/ft2/bitmetrics.rs +++ b/src/decoders/trx-ftx/src/ft2/bitmetrics.rs @@ -15,6 +15,178 @@ use crate::constants::{FT4_COSTAS_PATTERN, FT4_GRAY_MAP}; use super::{FT2_FRAME_SYMBOLS, FT2_NSS}; +const N_METRICS: usize = 2 * FT2_FRAME_SYMBOLS; + +/// Reusable FFT plans and scratch buffers for bit-metric extraction. +pub struct BitMetricsWorkspace { + fft: std::sync::Arc>, + scratch: Vec, + symbols: [[Complex32; 4]; FT2_FRAME_SYMBOLS], + s4: [[f32; 4]; FT2_FRAME_SYMBOLS], + metric1: [f32; N_METRICS], + metric2: [f32; N_METRICS], + metric4: [f32; N_METRICS], + bitmetrics: [[f32; 3]; N_METRICS], + csymb: [Complex32; FT2_NSS], +} + +impl BitMetricsWorkspace { + pub fn new() -> Self { + let mut planner = FftPlanner::::new(); + let fft = planner.plan_fft_forward(FT2_NSS); + let scratch = vec![Complex32::new(0.0, 0.0); fft.get_inplace_scratch_len()]; + + Self { + fft, + scratch, + symbols: [[Complex32::new(0.0, 0.0); 4]; FT2_FRAME_SYMBOLS], + s4: [[0.0; 4]; FT2_FRAME_SYMBOLS], + metric1: [0.0; N_METRICS], + metric2: [0.0; N_METRICS], + metric4: [0.0; N_METRICS], + bitmetrics: [[0.0; 3]; N_METRICS], + csymb: [Complex32::new(0.0, 0.0); FT2_NSS], + } + } + + /// Extract bit metrics into a reusable internal buffer. + pub fn extract<'a>(&'a mut self, signal: &[Complex32]) -> Option<&'a [[f32; 3]]> { + self.metric1.fill(0.0); + self.metric2.fill(0.0); + self.metric4.fill(0.0); + + for sym in 0..FT2_FRAME_SYMBOLS { + let offset = sym * FT2_NSS; + if offset + FT2_NSS <= signal.len() { + self.csymb + .copy_from_slice(&signal[offset..(offset + FT2_NSS)]); + } else { + self.csymb.fill(Complex32::new(0.0, 0.0)); + let remaining = signal.len().saturating_sub(offset); + self.csymb[..remaining].copy_from_slice(&signal[offset..(offset + remaining)]); + } + + self.fft + .process_with_scratch(&mut self.csymb, &mut self.scratch); + + for tone in 0..4 { + let symbol = self.csymb[tone]; + self.symbols[sym][tone] = symbol; + self.s4[sym][tone] = symbol.norm(); + } + } + + // Sync quality check: verify Costas patterns are detectable + let mut sync_ok = 0; + for group in 0..4 { + let base = group * 33; + for i in 0..4 { + if base + i >= FT2_FRAME_SYMBOLS { + continue; + } + let mut best = 0; + for tone in 1..4 { + if self.s4[base + i][tone] > self.s4[base + i][best] { + best = tone; + } + } + if best == FT4_COSTAS_PATTERN[group][i] as usize { + sync_ok += 1; + } + } + } + + if sync_ok < 4 { + return None; + } + + for nseq in 0..3 { + let (nsym, metric): (usize, &mut [f32; N_METRICS]) = match nseq { + 0 => (1, &mut self.metric1), + 1 => (2, &mut self.metric2), + _ => (4, &mut self.metric4), + }; + let nt = 1usize << (2 * nsym); + let ibmax = match nsym { + 1 => 1, + 2 => 3, + 4 => 7, + _ => 0, + }; + + let mut ks = 0; + while ks + nsym <= FT2_FRAME_SYMBOLS { + let mut max_one = [f32::NEG_INFINITY; 8]; + let mut max_zero = [f32::NEG_INFINITY; 8]; + + for i in 0..nt { + let sum = match nsym { + 1 => self.symbols[ks][FT4_GRAY_MAP[i & 0x03] as usize], + 2 => { + self.symbols[ks][FT4_GRAY_MAP[(i >> 2) & 0x03] as usize] + + self.symbols[ks + 1][FT4_GRAY_MAP[i & 0x03] as usize] + } + 4 => { + self.symbols[ks][FT4_GRAY_MAP[(i >> 6) & 0x03] as usize] + + self.symbols[ks + 1][FT4_GRAY_MAP[(i >> 4) & 0x03] as usize] + + self.symbols[ks + 2][FT4_GRAY_MAP[(i >> 2) & 0x03] as usize] + + self.symbols[ks + 3][FT4_GRAY_MAP[i & 0x03] as usize] + } + _ => Complex32::new(0.0, 0.0), + }; + let coherent = sum.norm(); + + for ib in 0..=ibmax { + if ((i >> (ibmax - ib)) & 1) != 0 { + max_one[ib] = max_one[ib].max(coherent); + } else { + max_zero[ib] = max_zero[ib].max(coherent); + } + } + } + + let ipt = 2 * ks; + for ib in 0..=ibmax { + let metric_idx = ipt + ib; + if metric_idx < N_METRICS { + metric[metric_idx] = max_one[ib] - max_zero[ib]; + } + } + + ks += nsym; + } + } + + // Patch boundary metrics where multi-symbol integration overruns + self.metric2[204] = self.metric1[204]; + self.metric2[205] = self.metric1[205]; + self.metric4[200] = self.metric2[200]; + self.metric4[201] = self.metric2[201]; + self.metric4[202] = self.metric2[202]; + self.metric4[203] = self.metric2[203]; + self.metric4[204] = self.metric1[204]; + self.metric4[205] = self.metric1[205]; + + normalize_metric(&mut self.metric1); + normalize_metric(&mut self.metric2); + normalize_metric(&mut self.metric4); + + for i in 0..N_METRICS { + self.bitmetrics[i][0] = self.metric1[i]; + self.bitmetrics[i][1] = self.metric2[i]; + self.bitmetrics[i][2] = self.metric4[i]; + } + + Some(&self.bitmetrics) + } +} + +impl Default for BitMetricsWorkspace { + fn default() -> Self { + Self::new() + } +} + /// Extract bit metrics from the downsampled signal region. /// /// Returns a 2D array of shape `[2 * FT2_FRAME_SYMBOLS][3]` where: @@ -24,188 +196,11 @@ use super::{FT2_FRAME_SYMBOLS, FT2_NSS}; /// /// Returns `None` if the sync quality is too poor (fewer than 4 of 16 /// Costas sync tones decoded correctly). -pub fn extract_bitmetrics_raw( - signal: &[Complex32], -) -> Option> { - let n_metrics = 2 * FT2_FRAME_SYMBOLS; - let mut bitmetrics = vec![[0.0f32; 3]; n_metrics]; - - // Per-symbol FFT to extract complex tone amplitudes - let mut planner = FftPlanner::::new(); - let fft = planner.plan_fft_forward(FT2_NSS); - let fft_scratch_len = fft.get_inplace_scratch_len(); - let mut scratch = vec![Complex32::new(0.0, 0.0); fft_scratch_len]; - - // Complex symbols for each of the 4 tones at each frame symbol - let mut symbols = vec![[Complex32::new(0.0, 0.0); 4]; FT2_FRAME_SYMBOLS]; - // Magnitude for each tone at each symbol - let mut s4 = vec![[0.0f32; 4]; FT2_FRAME_SYMBOLS]; - - for sym in 0..FT2_FRAME_SYMBOLS { - let offset = sym * FT2_NSS; - let mut csymb: Vec = (0..FT2_NSS) - .map(|i| { - if offset + i < signal.len() { - signal[offset + i] - } else { - Complex32::new(0.0, 0.0) - } - }) - .collect(); - - fft.process_with_scratch(&mut csymb, &mut scratch); - - for tone in 0..4 { - if tone < csymb.len() { - symbols[sym][tone] = csymb[tone]; - s4[sym][tone] = csymb[tone].norm(); - } - } - } - - // Sync quality check: verify Costas patterns are detectable - let mut sync_ok = 0; - for group in 0..4 { - let base = group * 33; - for i in 0..4 { - if base + i >= FT2_FRAME_SYMBOLS { - continue; - } - let mut best = 0; - for tone in 1..4 { - if s4[base + i][tone] > s4[base + i][best] { - best = tone; - } - } - if best == FT4_COSTAS_PATTERN[group][i] as usize { - sync_ok += 1; - } - } - } - - if sync_ok < 4 { - return None; - } - - // Precompute one_mask: for each integer 0..255 and bit position 0..7, - // whether that bit is set. - let one_mask: Vec<[u8; 8]> = (0..256u16) - .map(|i| { - let mut m = [0u8; 8]; - for j in 0..8 { - m[j] = if (i & (1 << j)) != 0 { 1 } else { 0 }; - } - m - }) - .collect(); - - // Compute metrics at three scales - let mut metric1 = vec![0.0f32; n_metrics]; - let mut metric2 = vec![0.0f32; n_metrics]; - let mut metric4 = vec![0.0f32; n_metrics]; - - for nseq in 0..3 { - let nsym = match nseq { - 0 => 1, - 1 => 2, - _ => 4, - }; - let nt = 1 << (2 * nsym); // number of tone sequences to enumerate - - let mut ks = 0; - while ks + nsym <= FT2_FRAME_SYMBOLS { - // Compute coherent magnitude for each possible tone sequence - let mut s2 = vec![0.0f32; nt]; - for i in 0..nt { - let i1 = i / 64; - let i2 = (i & 63) / 16; - let i3 = (i & 15) / 4; - let i4 = i & 3; - - let sum = match nsym { - 1 => symbols[ks][FT4_GRAY_MAP[i4] as usize], - 2 => { - symbols[ks][FT4_GRAY_MAP[i3] as usize] - + symbols[ks + 1][FT4_GRAY_MAP[i4] as usize] - } - 4 => { - symbols[ks][FT4_GRAY_MAP[i1] as usize] - + symbols[ks + 1][FT4_GRAY_MAP[i2] as usize] - + symbols[ks + 2][FT4_GRAY_MAP[i3] as usize] - + symbols[ks + 3][FT4_GRAY_MAP[i4] as usize] - } - _ => Complex32::new(0.0, 0.0), - }; - s2[i] = sum.norm(); - } - - // Extract bit metrics: for each bit position, find max coherent - // magnitude with that bit set vs unset - let ipt = 2 * ks; - let ibmax: usize = match nsym { - 1 => 1, - 2 => 3, - 4 => 7, - _ => 0, - }; - - for ib in 0..=ibmax { - let mut max_one = f32::NEG_INFINITY; - let mut max_zero = f32::NEG_INFINITY; - - for i in 0..nt { - if i < 256 { - if one_mask[i][ibmax - ib] != 0 { - if s2[i] > max_one { - max_one = s2[i]; - } - } else if s2[i] > max_zero { - max_zero = s2[i]; - } - } - } - - let metric_idx = ipt + ib; - if metric_idx >= n_metrics { - continue; - } - - match nseq { - 0 => metric1[metric_idx] = max_one - max_zero, - 1 => metric2[metric_idx] = max_one - max_zero, - _ => metric4[metric_idx] = max_one - max_zero, - } - } - - ks += nsym; - } - } - - // Patch boundary metrics where multi-symbol integration overruns - if n_metrics >= 206 { - metric2[204] = metric1[204]; - metric2[205] = metric1[205]; - metric4[200] = metric2[200]; - metric4[201] = metric2[201]; - metric4[202] = metric2[202]; - metric4[203] = metric2[203]; - metric4[204] = metric1[204]; - metric4[205] = metric1[205]; - } - - // Normalize each metric scale independently - normalize_metric(&mut metric1); - normalize_metric(&mut metric2); - normalize_metric(&mut metric4); - - // Pack into output - for i in 0..n_metrics { - bitmetrics[i][0] = metric1[i]; - bitmetrics[i][1] = metric2[i]; - bitmetrics[i][2] = metric4[i]; - } - - Some(bitmetrics) +pub fn extract_bitmetrics_raw(signal: &[Complex32]) -> Option> { + let mut workspace = BitMetricsWorkspace::new(); + workspace + .extract(signal) + .map(|bitmetrics| bitmetrics.to_vec()) } /// Normalize a metric array by dividing by its standard deviation. diff --git a/src/decoders/trx-ftx/src/ft2/downsample.rs b/src/decoders/trx-ftx/src/ft2/downsample.rs index 3768dc4..7e028a7 100644 --- a/src/decoders/trx-ftx/src/ft2/downsample.rs +++ b/src/decoders/trx-ftx/src/ft2/downsample.rs @@ -14,6 +14,34 @@ use rustfft::FftPlanner; use super::{FT2_NDOWN, FT2_SYMBOL_PERIOD_F}; +/// Reusable scratch buffers for frequency-domain downsampling. +pub struct DownsampleWorkspace { + band: Vec, + ifft_scratch: Vec, +} + +impl DownsampleWorkspace { + fn new(nfft2: usize, ifft_scratch_len: usize) -> Self { + Self { + band: vec![Complex32::new(0.0, 0.0); nfft2], + ifft_scratch: vec![Complex32::new(0.0, 0.0); ifft_scratch_len], + } + } + + fn prepare(&mut self, nfft2: usize, ifft_scratch_len: usize) { + if self.band.len() != nfft2 { + self.band.resize(nfft2, Complex32::new(0.0, 0.0)); + } else { + self.band.fill(Complex32::new(0.0, 0.0)); + } + + if self.ifft_scratch.len() != ifft_scratch_len { + self.ifft_scratch + .resize(ifft_scratch_len, Complex32::new(0.0, 0.0)); + } + } +} + /// Downsample context holding precomputed FFT data and spectral window. pub struct DownsampleContext { /// Number of raw samples. @@ -28,8 +56,8 @@ pub struct DownsampleContext { spectrum: Vec, /// IFFT plan for the downsampled length. ifft: std::sync::Arc>, - /// Scratch buffer for IFFT. - ifft_scratch: Vec, + /// Scratch length required by the IFFT plan. + ifft_scratch_len: usize, } impl DownsampleContext { @@ -50,7 +78,11 @@ impl DownsampleContext { let df = sample_rate / nraw as f32; // Build spectral extraction window - let window = build_spectral_window(nfft2, df); + let mut window = build_spectral_window(nfft2, df); + let inv_nfft2 = 1.0 / nfft2 as f32; + for coeff in &mut window { + *coeff *= inv_nfft2; + } // Forward real FFT of raw audio let mut real_planner = realfft::RealFftPlanner::::new(); @@ -59,11 +91,7 @@ impl DownsampleContext { let mut output = fft.make_output_vec(); let mut scratch = fft.make_scratch_vec(); - for (i, s) in raw_audio.iter().enumerate() { - if i < input.len() { - input[i] = *s; - } - } + input.copy_from_slice(raw_audio); fft.process_with_scratch(&mut input, &mut output, &mut scratch) .ok()?; @@ -72,7 +100,7 @@ impl DownsampleContext { // IFFT plan for downsampled length let mut planner = FftPlanner::::new(); let ifft = planner.plan_fft_inverse(nfft2); - let ifft_scratch = vec![Complex32::new(0.0, 0.0); ifft.get_inplace_scratch_len()]; + let ifft_scratch_len = ifft.get_inplace_scratch_len(); Some(Self { nraw, @@ -81,7 +109,7 @@ impl DownsampleContext { window, spectrum, ifft, - ifft_scratch, + ifft_scratch_len, }) } @@ -90,15 +118,31 @@ impl DownsampleContext { self.nfft2 } + /// Create reusable buffers for repeated downsampling with this context. + pub fn workspace(&self) -> DownsampleWorkspace { + DownsampleWorkspace::new(self.nfft2, self.ifft_scratch_len) + } + /// Downsample the raw audio around `freq_hz`, writing complex baseband /// samples into `out`. Returns the number of samples produced. pub fn downsample(&self, freq_hz: f32, out: &mut [Complex32]) -> usize { + let mut workspace = self.workspace(); + self.downsample_with_workspace(freq_hz, out, &mut workspace) + } + + /// Downsample the raw audio using reusable scratch buffers. + pub fn downsample_with_workspace( + &self, + freq_hz: f32, + out: &mut [Complex32], + workspace: &mut DownsampleWorkspace, + ) -> usize { if out.len() < self.nfft2 { return 0; } - // Working band buffer - let mut band = vec![Complex32::new(0.0, 0.0); self.nfft2]; + workspace.prepare(self.nfft2, self.ifft_scratch_len); + let band = &mut workspace.band; let i0 = (freq_hz / self.df).round() as i32; let half_nraw = (self.nraw / 2) as i32; @@ -119,21 +163,16 @@ impl DownsampleContext { } } - // Apply spectral window and scale - let inv_nfft2 = 1.0 / self.nfft2 as f32; + // Apply spectral window for i in 0..self.nfft2 { - band[i] = Complex32::new( - band[i].re * self.window[i] * inv_nfft2, - band[i].im * self.window[i] * inv_nfft2, - ); + band[i] *= self.window[i]; } // Inverse FFT (in-place) - let mut scratch = self.ifft_scratch.clone(); self.ifft - .process_with_scratch(&mut band, &mut scratch); + .process_with_scratch(band, &mut workspace.ifft_scratch); - out[..self.nfft2].copy_from_slice(&band); + out[..self.nfft2].copy_from_slice(band); self.nfft2 } } @@ -167,13 +206,13 @@ fn build_spectral_window(nfft2: usize, df: f32) -> Vec { // Raised-cosine trailing edge for i in (iwt + iwf)..(2 * iwt + iwf).min(nfft2) { - window[i] = 0.5 * (1.0 + (std::f32::consts::PI * (i - (iwt + iwf)) as f32 / iwt as f32).cos()); + window[i] = + 0.5 * (1.0 + (std::f32::consts::PI * (i - (iwt + iwf)) as f32 / iwt as f32).cos()); } // Circular shift by iws bins if iws > 0 && iws < nfft2 { - let shifted: Vec = (0..nfft2).map(|i| window[(i + iws) % nfft2]).collect(); - window.copy_from_slice(&shifted); + window.rotate_left(iws); } window diff --git a/src/decoders/trx-ftx/src/ft2/mod.rs b/src/decoders/trx-ftx/src/ft2/mod.rs index 71e1b42..e87f509 100644 --- a/src/decoders/trx-ftx/src/ft2/mod.rs +++ b/src/decoders/trx-ftx/src/ft2/mod.rs @@ -14,6 +14,7 @@ pub mod osd; pub mod sync; use num_complex::Complex32; +use realfft::RealFftPlanner; use crate::constants::FT4_XOR_SEQUENCE; use crate::crc::{ftx_compute_crc, ftx_extract_crc}; @@ -21,8 +22,9 @@ use crate::decode::{pack_bits, FtxMessage}; use crate::ldpc; use crate::protocol::*; -use downsample::DownsampleContext; -use sync::{prepare_sync_waveforms, sync2d_score}; +use bitmetrics::BitMetricsWorkspace; +use downsample::{DownsampleContext, DownsampleWorkspace}; +use sync::{prepare_sync_waveforms, sync2d_score, SyncWaveforms}; // FT2 DSP constants pub const FT2_NDOWN: usize = 9; @@ -119,6 +121,62 @@ pub struct Ft2Pipeline { sample_rate: f32, raw_audio: Vec, raw_capacity: usize, + waveforms: SyncWaveforms, + peak_search: PeakSearchWorkspace, +} + +struct Ft2DecodeWorkspace { + downsample: DownsampleWorkspace, + downsample_a: Vec, + downsample_b: Vec, + signal: Vec, + bitmetrics: BitMetricsWorkspace, +} + +impl Ft2DecodeWorkspace { + fn new(ctx: &DownsampleContext) -> Self { + let nfft2 = ctx.nfft2(); + Self { + downsample: ctx.workspace(), + downsample_a: vec![Complex32::new(0.0, 0.0); nfft2], + downsample_b: vec![Complex32::new(0.0, 0.0); nfft2], + signal: vec![Complex32::new(0.0, 0.0); FT2_FRAME_SAMPLES], + bitmetrics: BitMetricsWorkspace::new(), + } + } +} + +struct PeakSearchWorkspace { + window: Vec, + fft: std::sync::Arc>, + fft_input: Vec, + fft_output: Vec, + fft_scratch: Vec, + avg: Vec, + smooth: Vec, + baseline: Vec, +} + +impl PeakSearchWorkspace { + fn new() -> Self { + let window = nuttall_window(FT2_NFFT1); + let mut planner = RealFftPlanner::::new(); + let fft = planner.plan_fft_forward(FT2_NFFT1); + let fft_input = fft.make_input_vec(); + let fft_output = fft.make_output_vec(); + let fft_scratch = fft.make_scratch_vec(); + + Self { + window, + fft, + fft_input, + fft_output, + fft_scratch, + avg: vec![0.0; FT2_NH1], + smooth: vec![0.0; FT2_NH1], + baseline: vec![0.0; FT2_NH1], + } + } } impl Ft2Pipeline { @@ -128,6 +186,8 @@ impl Ft2Pipeline { sample_rate: sample_rate as f32, raw_audio: Vec::with_capacity(FT2_NMAX), raw_capacity: FT2_NMAX, + waveforms: prepare_sync_waveforms(), + peak_search: PeakSearchWorkspace::new(), } } @@ -157,7 +217,7 @@ impl Ft2Pipeline { } /// Run the full FT2 decode pipeline. Returns decoded messages. - pub fn decode(&self, max_results: usize) -> Vec { + pub fn decode(&mut self, max_results: usize) -> Vec { if self.raw_audio.len() < FT2_NFFT1 { return Vec::new(); } @@ -167,7 +227,8 @@ impl Ft2Pipeline { None => return Vec::new(), }; - let hits = self.find_scan_hits(&ctx); + let mut workspace = Ft2DecodeWorkspace::new(&ctx); + let hits = self.find_scan_hits(&ctx, &mut workspace); if hits.is_empty() { return Vec::new(); } @@ -179,11 +240,11 @@ impl Ft2Pipeline { if results.len() >= max_results { break; } - if let Some(result) = self.decode_hit(&ctx, hit) { + if let Some(result) = self.decode_hit(&ctx, hit, &mut workspace) { // Dedup - let dominated = seen_hashes.iter().any(|(h, p)| { - *h == result.message.hash && *p == result.message.payload - }); + let dominated = seen_hashes + .iter() + .any(|(h, p)| *h == result.message.hash && *p == result.message.payload); if dominated { continue; } @@ -196,7 +257,7 @@ impl Ft2Pipeline { } /// Find frequency peaks from averaged power spectrum. - fn find_frequency_peaks(&self) -> Vec { + fn find_frequency_peaks(&mut self) -> Vec { if self.raw_audio.len() < FT2_NFFT1 { return Vec::new(); } @@ -204,65 +265,68 @@ impl Ft2Pipeline { let fs = self.sample_rate; let df = fs / FT2_NFFT1 as f32; let n_frames = 1 + (self.raw_audio.len() - FT2_NFFT1) / FT2_NSTEP; + let PeakSearchWorkspace { + window, + fft, + fft_input, + fft_output, + fft_scratch, + avg, + smooth, + baseline, + } = &mut self.peak_search; - // Compute Nuttall window - let window = nuttall_window(FT2_NFFT1); - - // Forward real FFT setup - let mut real_planner = realfft::RealFftPlanner::::new(); - let fft = real_planner.plan_fft_forward(FT2_NFFT1); - let mut fft_input = fft.make_input_vec(); - let mut fft_output = fft.make_output_vec(); - let mut fft_scratch = fft.make_scratch_vec(); - - // Average power spectrum across frames - let mut avg = vec![0.0f32; FT2_NH1]; + avg.fill(0.0); + smooth.fill(0.0); + baseline.fill(0.0); for frame in 0..n_frames { let start = frame * FT2_NSTEP; - for i in 0..FT2_NFFT1 { - fft_input[i] = self.raw_audio[start + i] * window[i]; + let input = &self.raw_audio[start..(start + FT2_NFFT1)]; + for (dst, (&sample, &coeff)) in + fft_input.iter_mut().zip(input.iter().zip(window.iter())) + { + *dst = sample * coeff; } - fft.process_with_scratch(&mut fft_input, &mut fft_output, &mut fft_scratch) + fft.process_with_scratch(fft_input, fft_output, fft_scratch) .expect("FFT failed"); - for bin in 1..FT2_NH1 { - if bin < fft_output.len() { - let c = fft_output[bin]; - let power = c.re * c.re + c.im * c.im; - avg[bin] += power; + for (bin, c) in fft_output.iter().enumerate().take(FT2_NH1).skip(1) { + avg[bin] += c.norm_sqr(); + } + } + + let inv_n_frames = 1.0 / n_frames as f32; + for bin in 1..FT2_NH1 { + avg[bin] *= inv_n_frames; + } + + // Smooth with 15-point moving average + if FT2_NH1 > 16 { + let mut sum: f32 = avg[1..16].iter().sum(); + for bin in 8..FT2_NH1.saturating_sub(8) { + smooth[bin] = sum / 15.0; + if bin + 8 < FT2_NH1 { + sum += avg[bin + 8] - avg[bin - 7]; } } } - for bin in 1..FT2_NH1 { - avg[bin] /= n_frames as f32; - } - - // Smooth with 15-point moving average - let mut smooth = vec![0.0f32; FT2_NH1]; - for bin in 8..FT2_NH1.saturating_sub(8) { - let mut sum = 0.0f32; - for i in (bin.saturating_sub(7))..=(bin + 7).min(FT2_NH1 - 1) { - sum += avg[i]; - } - smooth[bin] = sum / 15.0; - } - // Baseline with 63-point moving average - let mut baseline = vec![0.0f32; FT2_NH1]; - for bin in 32..FT2_NH1.saturating_sub(32) { - let mut sum = 0.0f32; - for i in (bin.saturating_sub(31))..=(bin + 31).min(FT2_NH1 - 1) { - sum += smooth[i]; + if FT2_NH1 > 64 { + let mut sum: f32 = smooth[1..64].iter().sum(); + for bin in 32..FT2_NH1.saturating_sub(32) { + baseline[bin] = sum / 63.0 + 1e-9; + if bin + 32 < FT2_NH1 { + sum += smooth[bin + 32] - smooth[bin - 31]; + } } - baseline[bin] = sum / 63.0 + 1e-9; } // Find peaks let min_bin = (200.0 / df).round() as usize; let max_bin = (4910.0 / df).round() as usize; - let mut candidates = Vec::new(); + let mut candidates = Vec::with_capacity(FT2_MAX_RAW_CANDIDATES); let mut bin = min_bin + 1; while bin < max_bin.saturating_sub(1) && candidates.len() < FT2_MAX_RAW_CANDIDATES { @@ -309,20 +373,25 @@ impl Ft2Pipeline { } // Sort by score descending - candidates.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal)); + candidates.sort_by(|a, b| { + b.score + .partial_cmp(&a.score) + .unwrap_or(std::cmp::Ordering::Equal) + }); candidates } /// Find scan hits by downsampling each frequency peak and computing sync scores. - fn find_scan_hits(&self, ctx: &DownsampleContext) -> Vec { + fn find_scan_hits( + &mut self, + ctx: &DownsampleContext, + workspace: &mut Ft2DecodeWorkspace, + ) -> Vec { let peaks = self.find_frequency_peaks(); if peaks.is_empty() { return Vec::new(); } - let nfft2 = ctx.nfft2(); - let waveforms = prepare_sync_waveforms(); - let mut hits = Vec::new(); for peak in &peaks { @@ -330,12 +399,15 @@ impl Ft2Pipeline { break; } - let mut down = vec![Complex32::new(0.0, 0.0); nfft2]; - let produced = ctx.downsample(peak.freq_hz, &mut down); + let produced = ctx.downsample_with_workspace( + peak.freq_hz, + &mut workspace.downsample_a, + &mut workspace.downsample, + ); if produced == 0 { continue; } - normalize_downsampled(&mut down[..produced], produced); + normalize_downsampled(&mut workspace.downsample_a[..produced], produced); // Coarse search let mut best_score: f32 = -1.0; @@ -347,10 +419,10 @@ impl Ft2Pipeline { let mut start = -688i32; while start <= 2024 { let score = sync2d_score( - &down[..produced], + &workspace.downsample_a[..produced], start, idf, - &waveforms, + &self.waveforms, ); if score > best_score { best_score = score; @@ -373,10 +445,10 @@ impl Ft2Pipeline { } for start in (best_start - 5)..=(best_start + 5) { let score = sync2d_score( - &down[..produced], + &workspace.downsample_a[..produced], start, idf, - &waveforms, + &self.waveforms, ); if score > best_score { best_score = score; @@ -409,17 +481,22 @@ impl Ft2Pipeline { } /// Attempt to decode a single scan hit through the full pipeline. - fn decode_hit(&self, ctx: &DownsampleContext, hit: &ScanHit) -> Option { - let nfft2 = ctx.nfft2(); - let waveforms = prepare_sync_waveforms(); - + fn decode_hit( + &self, + ctx: &DownsampleContext, + hit: &ScanHit, + workspace: &mut Ft2DecodeWorkspace, + ) -> Option { // Initial downsample for sync refinement - let mut cd2 = vec![Complex32::new(0.0, 0.0); nfft2]; - let produced = ctx.downsample(hit.freq_hz, &mut cd2); + let produced = ctx.downsample_with_workspace( + hit.freq_hz, + &mut workspace.downsample_a, + &mut workspace.downsample, + ); if produced == 0 { return None; } - normalize_downsampled(&mut cd2[..produced], produced); + normalize_downsampled(&mut workspace.downsample_a[..produced], produced); // Refine sync let mut best_score: f32 = -1.0; @@ -431,7 +508,12 @@ impl Ft2Pipeline { continue; } for start in (hit.start - 5)..=(hit.start + 5) { - let score = sync2d_score(&cd2[..produced], start, idf, &waveforms); + let score = sync2d_score( + &workspace.downsample_a[..produced], + start, + idf, + &self.waveforms, + ); if score > best_score { best_score = score; best_start = start; @@ -451,19 +533,25 @@ impl Ft2Pipeline { } // Final downsample at corrected frequency - let mut cb = vec![Complex32::new(0.0, 0.0); nfft2]; - let produced2 = ctx.downsample(corrected_freq_hz, &mut cb); + let produced2 = ctx.downsample_with_workspace( + corrected_freq_hz, + &mut workspace.downsample_b, + &mut workspace.downsample, + ); if produced2 == 0 { return None; } - normalize_downsampled(&mut cb[..produced2], FT2_FRAME_SAMPLES); + normalize_downsampled(&mut workspace.downsample_b[..produced2], FT2_FRAME_SAMPLES); // Extract signal region - let mut signal = vec![Complex32::new(0.0, 0.0); FT2_FRAME_SAMPLES]; - extract_signal_region(&cb[..produced2], best_start, &mut signal); + extract_signal_region( + &workspace.downsample_b[..produced2], + best_start, + &mut workspace.signal, + ); // Extract bit metrics - let bitmetrics = bitmetrics::extract_bitmetrics_raw(&signal)?; + let bitmetrics = workspace.bitmetrics.extract(&workspace.signal)?; // Sync quality check using known Costas bit patterns let sync_bits_a: [u8; 8] = [0, 0, 0, 1, 1, 0, 1, 1]; @@ -472,10 +560,26 @@ impl Ft2Pipeline { let sync_bits_d: [u8; 8] = [1, 0, 1, 1, 0, 0, 0, 1]; let mut sync_qual = 0; for i in 0..8 { - sync_qual += if (bitmetrics[i][0] >= 0.0) as u8 == sync_bits_a[i] { 1 } else { 0 }; - sync_qual += if (bitmetrics[66 + i][0] >= 0.0) as u8 == sync_bits_b[i] { 1 } else { 0 }; - sync_qual += if (bitmetrics[132 + i][0] >= 0.0) as u8 == sync_bits_c[i] { 1 } else { 0 }; - sync_qual += if (bitmetrics[198 + i][0] >= 0.0) as u8 == sync_bits_d[i] { 1 } else { 0 }; + sync_qual += if (bitmetrics[i][0] >= 0.0) as u8 == sync_bits_a[i] { + 1 + } else { + 0 + }; + sync_qual += if (bitmetrics[66 + i][0] >= 0.0) as u8 == sync_bits_b[i] { + 1 + } else { + 0 + }; + sync_qual += if (bitmetrics[132 + i][0] >= 0.0) as u8 == sync_bits_c[i] { + 1 + } else { + 0 + }; + sync_qual += if (bitmetrics[198 + i][0] >= 0.0) as u8 == sync_bits_d[i] { + 1 + } else { + 0 + }; } if sync_qual < 10 { return None; @@ -591,8 +695,18 @@ impl Ft2Pipeline { } // Compute refined timing via parabolic interpolation - let sm1 = sync2d_score(&cd2[..produced], best_start - 1, best_idf, &waveforms); - let sp1 = sync2d_score(&cd2[..produced], best_start + 1, best_idf, &waveforms); + let sm1 = sync2d_score( + &workspace.downsample_a[..produced], + best_start - 1, + best_idf, + &self.waveforms, + ); + let sp1 = sync2d_score( + &workspace.downsample_a[..produced], + best_start + 1, + best_idf, + &self.waveforms, + ); let mut xstart = best_start as f32; let den = sm1 - 2.0 * best_score + sp1; if den.abs() > 1e-6 { @@ -635,7 +749,11 @@ fn normalize_downsampled(samples: &mut [Complex32], ref_count: usize) { if power <= 0.0 { return; } - let rc = if ref_count == 0 { samples.len() } else { ref_count }; + let rc = if ref_count == 0 { + samples.len() + } else { + ref_count + }; let scale = (rc as f32 / power).sqrt(); for s in samples.iter_mut() { *s *= scale; @@ -644,14 +762,17 @@ fn normalize_downsampled(samples: &mut [Complex32], ref_count: usize) { /// Extract a signal region starting at `start` into `out_signal`. fn extract_signal_region(input: &[Complex32], start: i32, out_signal: &mut [Complex32]) { - for i in 0..out_signal.len() { - let src = start + i as i32; - out_signal[i] = if src >= 0 && (src as usize) < input.len() { - input[src as usize] - } else { - Complex32::new(0.0, 0.0) - }; + out_signal.fill(Complex32::new(0.0, 0.0)); + + let src_start = start.max(0) as usize; + let dst_start = (-start).max(0) as usize; + if dst_start >= out_signal.len() || src_start >= input.len() { + return; } + + let copy_len = (input.len() - src_start).min(out_signal.len() - dst_start); + out_signal[dst_start..(dst_start + copy_len)] + .copy_from_slice(&input[src_start..(src_start + copy_len)]); } /// Normalize LLR array (divide by standard deviation). diff --git a/src/decoders/trx-ftx/src/ft2/sync.rs b/src/decoders/trx-ftx/src/ft2/sync.rs index b7bbaa1..a385ade 100644 --- a/src/decoders/trx-ftx/src/ft2/sync.rs +++ b/src/decoders/trx-ftx/src/ft2/sync.rs @@ -9,6 +9,7 @@ //! reference across time and frequency offsets. use num_complex::Complex32; +use std::sync::OnceLock; use crate::constants::FT4_COSTAS_PATTERN; @@ -16,6 +17,12 @@ use super::{FT2_NDOWN, FT2_NSS, FT2_SYMBOL_PERIOD_F, FT2_SYNC_TWEAK_MAX, FT2_SYN /// Number of frequency tweak entries. const NUM_TWEAKS: usize = (FT2_SYNC_TWEAK_MAX - FT2_SYNC_TWEAK_MIN) as usize + 1; +const SYNC_GROUP_COUNT: usize = 4; +const SYNC_SAMPLES: usize = 64; +const SAMPLE_STRIDE: usize = 2; +const GROUP_STRIDE: i32 = 33 * FT2_NSS as i32; +const GROUP_LAST_SAMPLE_OFFSET: i32 = SAMPLE_STRIDE as i32 * (SYNC_SAMPLES as i32 - 1); +const FRAME_LAST_SAMPLE_OFFSET: i32 = 3 * GROUP_STRIDE + GROUP_LAST_SAMPLE_OFFSET; /// Precomputed sync and frequency-tweak waveforms. pub struct SyncWaveforms { @@ -73,6 +80,74 @@ pub fn prepare_sync_waveforms() -> SyncWaveforms { } } +type SyncReferenceBank = [[[Complex32; SYNC_SAMPLES]; SYNC_GROUP_COUNT]; NUM_TWEAKS]; + +fn sync_reference_bank() -> &'static SyncReferenceBank { + static REFS: OnceLock = OnceLock::new(); + + REFS.get_or_init(|| { + let waveforms = prepare_sync_waveforms(); + let mut refs = [[[Complex32::new(0.0, 0.0); SYNC_SAMPLES]; SYNC_GROUP_COUNT]; NUM_TWEAKS]; + + for tw_idx in 0..NUM_TWEAKS { + for group in 0..SYNC_GROUP_COUNT { + for i in 0..SYNC_SAMPLES { + refs[tw_idx][group][i] = + (waveforms.sync_wave[group][i] * waveforms.tweak_wave[tw_idx][i]).conj(); + } + } + } + + refs + }) +} + +#[inline(always)] +fn correlate_group_fast( + samples: &[Complex32], + pos: usize, + refs: &[Complex32; SYNC_SAMPLES], +) -> f32 { + let mut sum_re = 0.0f32; + let mut sum_im = 0.0f32; + + for i in 0..SYNC_SAMPLES { + let sample = samples[pos + i * SAMPLE_STRIDE]; + let reference = refs[i]; + sum_re += sample.re * reference.re - sample.im * reference.im; + sum_im += sample.re * reference.im + sample.im * reference.re; + } + + (sum_re * sum_re + sum_im * sum_im).sqrt() +} + +#[inline(always)] +fn correlate_group_clipped( + samples: &[Complex32], + pos: i32, + refs: &[Complex32; SYNC_SAMPLES], +) -> (f32, usize) { + let mut sum_re = 0.0f32; + let mut sum_im = 0.0f32; + let mut usable = 0usize; + let n_samples = samples.len() as i32; + + for i in 0..SYNC_SAMPLES { + let sample_idx = pos + i as i32 * SAMPLE_STRIDE as i32; + if sample_idx < 0 || sample_idx >= n_samples { + continue; + } + + let sample = samples[sample_idx as usize]; + let reference = refs[i]; + sum_re += sample.re * reference.re - sample.im * reference.im; + sum_im += sample.re * reference.im + sample.im * reference.re; + usable += 1; + } + + ((sum_re * sum_re + sum_im * sum_im).sqrt(), usable) +} + /// Compute the 2D sync score for a given time offset and frequency tweak. /// /// Correlates the downsampled complex samples against the four Costas sync @@ -88,46 +163,36 @@ pub fn sync2d_score( samples: &[Complex32], start: i32, idf: i32, - waveforms: &SyncWaveforms, + _waveforms: &SyncWaveforms, ) -> f32 { - let nss = FT2_NSS as i32; let n_samples = samples.len() as i32; - - // The four sync groups are at symbol positions 0, 33, 66, 99 within the frame - let positions = [ - start, - start + 33 * nss, - start + 66 * nss, - start + 99 * nss, - ]; - let tw_idx = (idf - FT2_SYNC_TWEAK_MIN) as usize; - if tw_idx >= waveforms.tweak_wave.len() { + if tw_idx >= NUM_TWEAKS { return 0.0; } - let tweak = &waveforms.tweak_wave[tw_idx]; + + let refs = &sync_reference_bank()[tw_idx]; + let scale = 1.0 / (2.0 * FT2_NSS as f32); let mut score = 0.0f32; - for group in 0..4 { - let pos = positions[group]; - let mut sum = Complex32::new(0.0, 0.0); - let mut usable = 0; + if start >= 0 && start + FRAME_LAST_SAMPLE_OFFSET < n_samples { + for (group, refs_group) in refs.iter().enumerate() { + let pos = (start + group as i32 * GROUP_STRIDE) as usize; + score += correlate_group_fast(samples, pos, refs_group) * scale; + } + return score; + } - for i in 0..64 { - let sample_idx = pos + 2 * i as i32; - if sample_idx < 0 || sample_idx >= n_samples { - continue; - } - // Correlate: multiply received sample by conjugate of - // (sync_reference * tweak_phasor) - let reference = waveforms.sync_wave[group][i] * tweak[i]; - sum += samples[sample_idx as usize] * reference.conj(); - usable += 1; + for (group, refs_group) in refs.iter().enumerate() { + let pos = start + group as i32 * GROUP_STRIDE; + if pos >= n_samples || pos + GROUP_LAST_SAMPLE_OFFSET < 0 { + continue; } + let (corr, usable) = correlate_group_clipped(samples, pos, refs_group); if usable > 16 { - score += sum.norm() / (2.0 * FT2_NSS as f32); + score += corr * scale; } }