From 2da749b9782250e989ca0d0882776e29a0c55f26 Mon Sep 17 00:00:00 2001 From: Stan Grams Date: Thu, 19 Mar 2026 23:22:58 +0100 Subject: [PATCH] [refactor](trx-ftx): optimize hot paths and deduplicate decoder internals - Cache generator matrix with OnceLock (P0.1) - Store raw complex in WfElem, eliminate powf round-trip (P0.2) - Reuse FFT planners across decode cycles in Ft2Pipeline (P0.3) - Deduplicate fast_atanh/ldpc_check into ldpc.rs (P1.1) - Gate unused sum-product ldpc_decode behind #[cfg(test)] (P1.2) - Eliminate double pack_bits in verify_crc_and_build_message (P1.3) - Remove unnecessary unsafe impl Send for Ft8Decoder (P1.4) - Convert key loops to iterator/zip patterns (P2.1) - Remove resolved clippy::manual_memcpy suppressions Co-Authored-By: Claude Opus 4.6 Signed-off-by: Stan Grams --- src/decoders/trx-ftx/src/decode.rs | 14 ++-- src/decoders/trx-ftx/src/decoder.rs | 3 - src/decoders/trx-ftx/src/ft2/downsample.rs | 34 +++++++- src/decoders/trx-ftx/src/ft2/mod.rs | 22 +++++- src/decoders/trx-ftx/src/ft2/osd.rs | 91 +++++++++------------- src/decoders/trx-ftx/src/ldpc.rs | 7 +- src/decoders/trx-ftx/src/lib.rs | 9 +-- src/decoders/trx-ftx/src/monitor.rs | 20 +++-- 8 files changed, 113 insertions(+), 87 deletions(-) diff --git a/src/decoders/trx-ftx/src/decode.rs b/src/decoders/trx-ftx/src/decode.rs index 8599087..d771b2b 100644 --- a/src/decoders/trx-ftx/src/decode.rs +++ b/src/decoders/trx-ftx/src/decode.rs @@ -38,8 +38,7 @@ pub struct FtxMessage { } fn wf_elem_to_complex(elem: WfElem) -> Complex32 { - let amplitude = 10.0_f32.powf(elem.mag / 20.0); - Complex32::from_polar(amplitude, elem.phase) + Complex32::new(elem.re, elem.im) } fn get_cand_offset(wf: &Waterfall, cand: &Candidate) -> usize { @@ -55,10 +54,7 @@ fn wf_mag_at(wf: &Waterfall, base: usize, idx: isize) -> &WfElem { if i < wf.mag.len() { &wf.mag[i] } else { - &WfElem { - mag: -120.0, - phase: 0.0, - } + &DEFAULT_WF_ELEM } } @@ -66,6 +62,8 @@ fn wf_mag_at(wf: &Waterfall, base: usize, idx: isize) -> &WfElem { static DEFAULT_WF_ELEM: WfElem = WfElem { mag: -120.0, phase: 0.0, + re: 0.0, + im: 0.0, }; fn wf_mag_safe(wf: &Waterfall, idx: usize) -> &WfElem { @@ -534,6 +532,7 @@ pub(crate) fn verify_crc_and_build_message( let mut a91 = [0u8; crate::protocol::FTX_LDPC_K_BYTES]; pack_bits(plain174, crate::protocol::FTX_LDPC_K, &mut a91); + let a91_orig = a91; let crc_extracted = crate::crc::ftx_extract_crc(&a91); a91[9] &= 0xF8; a91[10] = 0x00; @@ -543,8 +542,7 @@ pub(crate) fn verify_crc_and_build_message( return None; } - // Re-read a91 since we modified it for CRC check - pack_bits(plain174, crate::protocol::FTX_LDPC_K, &mut a91); + let a91 = a91_orig; let mut message = FtxMessage { hash: crc_calculated, diff --git a/src/decoders/trx-ftx/src/decoder.rs b/src/decoders/trx-ftx/src/decoder.rs index b64b6ce..eef1578 100644 --- a/src/decoders/trx-ftx/src/decoder.rs +++ b/src/decoders/trx-ftx/src/decoder.rs @@ -45,9 +45,6 @@ pub struct Ft8Decoder { ft2_pipeline: Option, } -// Ft8Decoder is not shared across threads, but may be moved between tasks. -unsafe impl Send for Ft8Decoder {} - impl Ft8Decoder { /// Create a new FT8 decoder. pub fn new(sample_rate: u32) -> Result { diff --git a/src/decoders/trx-ftx/src/ft2/downsample.rs b/src/decoders/trx-ftx/src/ft2/downsample.rs index 7e028a7..e92a528 100644 --- a/src/decoders/trx-ftx/src/ft2/downsample.rs +++ b/src/decoders/trx-ftx/src/ft2/downsample.rs @@ -9,6 +9,8 @@ //! around that frequency, applies a spectral window, and inverse-FFTs to produce //! a complex baseband signal at a reduced sample rate (12000/NDOWN = 1333.3 Hz). +use std::sync::Arc; + use num_complex::Complex32; use rustfft::FftPlanner; @@ -64,8 +66,22 @@ impl DownsampleContext { /// Initialize the downsample context by computing the forward FFT of /// the raw audio and preparing the spectral window. /// + /// If `real_fft` and `ifft` are provided, they are reused instead of + /// creating fresh planners. The real FFT must be a forward plan of length + /// `nraw` and the IFFT must be an inverse plan of length `nraw / NDOWN`. + /// /// Returns `None` if the raw audio is too short or allocation fails. pub fn new(raw_audio: &[f32], sample_rate: f32) -> Option { + Self::new_with_plans(raw_audio, sample_rate, None, None) + } + + /// Initialize with optional pre-built FFT plans for reuse across decode cycles. + pub fn new_with_plans( + raw_audio: &[f32], + sample_rate: f32, + real_fft: Option>>, + ifft: Option>>, + ) -> Option { let nraw = raw_audio.len(); if nraw == 0 { return None; @@ -85,8 +101,13 @@ impl DownsampleContext { } // Forward real FFT of raw audio - let mut real_planner = realfft::RealFftPlanner::::new(); - let fft = real_planner.plan_fft_forward(nraw); + let fft = match real_fft { + Some(f) => f, + None => { + let mut real_planner = realfft::RealFftPlanner::::new(); + real_planner.plan_fft_forward(nraw) + } + }; let mut input = fft.make_input_vec(); let mut output = fft.make_output_vec(); let mut scratch = fft.make_scratch_vec(); @@ -98,8 +119,13 @@ impl DownsampleContext { let spectrum = output; // IFFT plan for downsampled length - let mut planner = FftPlanner::::new(); - let ifft = planner.plan_fft_inverse(nfft2); + let ifft = match ifft { + Some(f) => f, + None => { + let mut planner = FftPlanner::::new(); + planner.plan_fft_inverse(nfft2) + } + }; let ifft_scratch_len = ifft.get_inplace_scratch_len(); Some(Self { diff --git a/src/decoders/trx-ftx/src/ft2/mod.rs b/src/decoders/trx-ftx/src/ft2/mod.rs index 73890e9..615c09c 100644 --- a/src/decoders/trx-ftx/src/ft2/mod.rs +++ b/src/decoders/trx-ftx/src/ft2/mod.rs @@ -13,8 +13,11 @@ pub mod downsample; pub mod osd; pub mod sync; +use std::sync::Arc; + use num_complex::Complex32; use realfft::RealFftPlanner; +use rustfft::FftPlanner; use crate::decode::{verify_crc_and_build_message, FtxMessage}; use crate::protocol::*; @@ -117,6 +120,9 @@ pub struct Ft2Pipeline { raw_capacity: usize, waveforms: SyncWaveforms, peak_search: PeakSearchWorkspace, + // Cached FFT plans reused across decode cycles + ds_real_fft: Arc>, + ds_ifft: Arc>, } struct Ft2DecodeWorkspace { @@ -176,12 +182,21 @@ impl PeakSearchWorkspace { impl Ft2Pipeline { /// Create a new FT2 pipeline for the given sample rate. pub fn new(sample_rate: i32) -> Self { + // Pre-build FFT plans for the downsample context (reused every decode cycle) + let nfft2 = FT2_NMAX / FT2_NDOWN; + let mut real_planner = RealFftPlanner::::new(); + let ds_real_fft = real_planner.plan_fft_forward(FT2_NMAX); + let mut fft_planner = FftPlanner::::new(); + let ds_ifft = fft_planner.plan_fft_inverse(nfft2); + Self { sample_rate: sample_rate as f32, raw_audio: Vec::with_capacity(FT2_NMAX), raw_capacity: FT2_NMAX, waveforms: prepare_sync_waveforms(), peak_search: PeakSearchWorkspace::new(), + ds_real_fft, + ds_ifft, } } @@ -216,7 +231,12 @@ impl Ft2Pipeline { return Vec::new(); } - let ctx = match DownsampleContext::new(&self.raw_audio, self.sample_rate) { + let ctx = match DownsampleContext::new_with_plans( + &self.raw_audio, + self.sample_rate, + Some(Arc::clone(&self.ds_real_fft)), + Some(Arc::clone(&self.ds_ifft)), + ) { Some(ctx) => ctx, None => return Vec::new(), }; diff --git a/src/decoders/trx-ftx/src/ft2/osd.rs b/src/decoders/trx-ftx/src/ft2/osd.rs index e196ca9..71bf9ac 100644 --- a/src/decoders/trx-ftx/src/ft2/osd.rs +++ b/src/decoders/trx-ftx/src/ft2/osd.rs @@ -16,39 +16,15 @@ //! 3. Exhaustive search over bit-flip patterns of increasing weight //! 4. Pattern hashing (OSD-2) to efficiently search two-bit-flip corrections +use std::sync::OnceLock; + use crate::constants::{FTX_LDPC_GENERATOR, FTX_LDPC_MN, FTX_LDPC_NM, FTX_LDPC_NUM_ROWS}; use crate::crc::{ftx_compute_crc, ftx_extract_crc}; use crate::decode::pack_bits; use crate::encode::parity8; +use crate::ldpc::ldpc_check; use crate::protocol::{FTX_LDPC_K, FTX_LDPC_K_BYTES, FTX_LDPC_M, FTX_LDPC_N}; -/// Check LDPC parity of a 174-bit codeword. Returns number of parity errors. -pub fn ft2_ldpc_check(codeword: &[u8]) -> i32 { - let mut errors = 0i32; - for m in 0..FTX_LDPC_M { - let mut x: u8 = 0; - let num_rows = FTX_LDPC_NUM_ROWS[m] as usize; - for i in 0..num_rows { - let idx = FTX_LDPC_NM[m][i] as usize; - if idx > 0 && idx - 1 < codeword.len() { - x ^= codeword[idx - 1]; - } - } - if x != 0 { - errors += 1; - } - } - errors -} - -/// Fast rational approximation of `atanh(x)`. -fn fast_atanh(x: f32) -> f32 { - let x2 = x * x; - let a = x * (945.0 + x2 * (-735.0 + x2 * 64.0)); - let b = 945.0 + x2 * (-1050.0 + x2 * 225.0); - a / b -} - /// Piecewise linear approximation of `atanh(x)` used in BP message passing. fn platanh(x: f32) -> f32 { let isign: f32 = if x < 0.0 { -1.0 } else { 1.0 }; @@ -102,23 +78,23 @@ fn encode174_91_nocrc_bits(message91: &[u8], codeword: &mut [u8; FTX_LDPC_N]) { /// XOR two byte slices. fn xor_rows(dst: &mut [u8], src: &[u8], len: usize) { - for i in 0..len { - dst[i] ^= src[i]; - } + dst[..len] + .iter_mut() + .zip(&src[..len]) + .for_each(|(d, s)| *d ^= s); } /// Matrix-vector multiply for re-encoding in OSD. fn mrbencode91(me: &[u8], codeword: &mut [u8], g2: &[u8], n: usize, k: usize) { - for c in codeword[..n].iter_mut() { - *c = 0; - } + codeword[..n].fill(0); for i in 0..k { if me[i] == 0 { continue; } - for j in 0..n { - codeword[j] ^= g2[j * k + i]; - } + codeword[..n] + .iter_mut() + .enumerate() + .for_each(|(j, c)| *c ^= g2[j * k + i]); } } @@ -269,9 +245,9 @@ pub fn osd174_91( let n = FTX_LDPC_N; let ndeep = ndeep.min(6); - // Build per-bit generator matrix (each row i generates codeword from + // Cached per-bit generator matrix (each row i generates codeword from // unit vector e_i) - let gen = build_generator_matrix(); + let gen = generator_matrix(); // Stack-allocated working buffers (k=91, n=174, n-k=83). let mut genmrb = [0u8; FTX_LDPC_K * FTX_LDPC_N]; @@ -608,22 +584,24 @@ fn reorder_result( } } -/// Build the full per-bit generator matrix. -/// Each row `i` contains the 174-bit codeword produced by encoding -/// a unit vector with bit `i` set. -fn build_generator_matrix() -> Box<[[u8; FTX_LDPC_N]; FTX_LDPC_K]> { - let mut gen = Box::new([[0u8; FTX_LDPC_N]; FTX_LDPC_K]); - for i in 0..FTX_LDPC_K { - let mut msg = [0u8; FTX_LDPC_K]; - msg[i] = 1; - if i < 77 { - for j in 77..FTX_LDPC_K { - msg[j] = 0; +/// Get a reference to the cached generator matrix. +/// The matrix is computed once on first call and reused thereafter. +fn generator_matrix() -> &'static [[u8; FTX_LDPC_N]; FTX_LDPC_K] { + static GEN: OnceLock> = OnceLock::new(); + GEN.get_or_init(|| { + let mut gen = Box::new([[0u8; FTX_LDPC_N]; FTX_LDPC_K]); + for i in 0..FTX_LDPC_K { + let mut msg = [0u8; FTX_LDPC_K]; + msg[i] = 1; + if i < 77 { + for j in 77..FTX_LDPC_K { + msg[j] = 0; + } } + encode174_91_nocrc_bits(&msg, &mut gen[i]); } - encode174_91_nocrc_bits(&msg, &mut gen[i]); - } - gen + gen + }) } /// Full iterative BP decoder with OSD refinement. @@ -698,7 +676,7 @@ pub fn ft2_decode174_91_osd( for i in 0..FTX_LDPC_N { best_cw[i] = if zn[i] > 0.0 { 1 } else { 0 }; } - let ncheck = ft2_ldpc_check(&best_cw); + let ncheck = ldpc_check(&best_cw); if ncheck == 0 && check_crc91(&best_cw) { message91.copy_from_slice(&best_cw[..FTX_LDPC_K]); @@ -815,18 +793,19 @@ pub fn ft2_decode174_91_osd( #[cfg(test)] mod tests { use super::*; + use crate::ldpc::fast_atanh; #[test] fn ldpc_check_all_zeros() { let cw = [0u8; FTX_LDPC_N]; - assert_eq!(ft2_ldpc_check(&cw), 0); + assert_eq!(ldpc_check(&cw), 0); } #[test] fn ldpc_check_single_bit_error() { let mut cw = [0u8; FTX_LDPC_N]; cw[0] = 1; - assert!(ft2_ldpc_check(&cw) > 0); + assert!(ldpc_check(&cw) > 0); } #[test] @@ -922,7 +901,7 @@ mod tests { #[test] fn generator_matrix_row_zero() { - let gen = build_generator_matrix(); + let gen = generator_matrix(); // Row 0 should encode unit vector e_0 assert_eq!(gen[0][0], 1); // Some parity bits should be non-zero diff --git a/src/decoders/trx-ftx/src/ldpc.rs b/src/decoders/trx-ftx/src/ldpc.rs index 469c646..e91b91f 100644 --- a/src/decoders/trx-ftx/src/ldpc.rs +++ b/src/decoders/trx-ftx/src/ldpc.rs @@ -13,7 +13,7 @@ use crate::constants::{FTX_LDPC_MN, FTX_LDPC_NM, FTX_LDPC_NUM_ROWS}; use crate::protocol::{FTX_LDPC_M, FTX_LDPC_N}; /// Fast rational approximation of `tanh(x)`, clamped at +/-4.97. -fn fast_tanh(x: f32) -> f32 { +pub(crate) fn fast_tanh(x: f32) -> f32 { if x < -4.97f32 { return -1.0f32; } @@ -27,7 +27,7 @@ fn fast_tanh(x: f32) -> f32 { } /// Fast rational approximation of `atanh(x)`. -fn fast_atanh(x: f32) -> f32 { +pub(crate) fn fast_atanh(x: f32) -> f32 { let x2 = x * x; let a = x * (945.0f32 + x2 * (-735.0f32 + x2 * 64.0f32)); let b = 945.0f32 + x2 * (-1050.0f32 + x2 * 225.0f32); @@ -37,7 +37,7 @@ fn fast_atanh(x: f32) -> f32 { /// Count the number of LDPC parity errors in a 174-bit codeword. /// /// Returns 0 if all parity checks pass (valid codeword). -pub fn ldpc_check(codeword: &[u8; FTX_LDPC_N]) -> i32 { +pub(crate) fn ldpc_check(codeword: &[u8; FTX_LDPC_N]) -> i32 { let mut errors = 0i32; for m in 0..FTX_LDPC_M { let mut x: u8 = 0; @@ -59,6 +59,7 @@ pub fn ldpc_check(codeword: &[u8; FTX_LDPC_N]) -> i32 { /// `max_iters` controls how many iterations to attempt. /// /// Returns the number of remaining parity errors (0 = success). +#[cfg(test)] pub fn ldpc_decode( codeword: &mut [f32; FTX_LDPC_N], max_iters: usize, diff --git a/src/decoders/trx-ftx/src/lib.rs b/src/decoders/trx-ftx/src/lib.rs index c6bf0d1..2b73119 100644 --- a/src/decoders/trx-ftx/src/lib.rs +++ b/src/decoders/trx-ftx/src/lib.rs @@ -10,14 +10,9 @@ pub mod decode; mod decoder; #[allow(clippy::needless_range_loop)] pub mod encode; -#[allow( - dead_code, - clippy::manual_memcpy, - clippy::needless_range_loop, - clippy::too_many_arguments -)] +#[allow(dead_code, clippy::needless_range_loop, clippy::too_many_arguments)] pub mod ft2; -#[allow(clippy::manual_memcpy, clippy::needless_range_loop)] +#[allow(clippy::needless_range_loop)] pub mod ldpc; #[allow(clippy::explicit_counter_loop, clippy::needless_range_loop)] pub mod message; diff --git a/src/decoders/trx-ftx/src/monitor.rs b/src/decoders/trx-ftx/src/monitor.rs index 24aa6d5..121c9cd 100644 --- a/src/decoders/trx-ftx/src/monitor.rs +++ b/src/decoders/trx-ftx/src/monitor.rs @@ -12,11 +12,13 @@ use rustfft::FftPlanner; use crate::protocol::FtxProtocol; -/// Waterfall element storing magnitude (dB) and phase (radians). +/// Waterfall element storing magnitude (dB), phase (radians), and raw complex components. #[derive(Clone, Copy, Default)] pub struct WfElem { pub mag: f32, pub phase: f32, + pub re: f32, + pub im: f32, } impl WfElem { @@ -192,9 +194,10 @@ impl Monitor { } // Windowed FFT - for pos in 0..self.nfft { - self.fft_input[pos] = self.window[pos] * self.last_frame[pos]; - } + self.fft_input + .iter_mut() + .zip(self.window.iter().zip(self.last_frame.iter())) + .for_each(|(dst, (w, f))| *dst = w * f); self.real_fft .process_with_scratch( &mut self.fft_input, @@ -214,7 +217,12 @@ impl Monitor { let phase = c.im.atan2(c.re); if offset < self.wf.mag.len() { - self.wf.mag[offset] = WfElem { mag: db, phase }; + self.wf.mag[offset] = WfElem { + mag: db, + phase, + re: c.re, + im: c.im, + }; } offset += 1; @@ -226,6 +234,8 @@ impl Monitor { self.wf.mag[offset] = WfElem { mag: -120.0, phase: 0.0, + re: 0.0, + im: 0.0, }; } offset += 1;