[refactor](trx-ftx): optimize hot paths and deduplicate decoder internals

- Cache generator matrix with OnceLock (P0.1)
- Store raw complex in WfElem, eliminate powf round-trip (P0.2)
- Reuse FFT planners across decode cycles in Ft2Pipeline (P0.3)
- Deduplicate fast_atanh/ldpc_check into ldpc.rs (P1.1)
- Gate unused sum-product ldpc_decode behind #[cfg(test)] (P1.2)
- Eliminate double pack_bits in verify_crc_and_build_message (P1.3)
- Remove unnecessary unsafe impl Send for Ft8Decoder (P1.4)
- Convert key loops to iterator/zip patterns (P2.1)
- Remove resolved clippy::manual_memcpy suppressions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Stan Grams <sjg@haxx.space>
This commit is contained in:
2026-03-19 23:22:58 +01:00
parent 3dc6918082
commit 2da749b978
8 changed files with 113 additions and 87 deletions
+6 -8
View File
@@ -38,8 +38,7 @@ pub struct FtxMessage {
} }
fn wf_elem_to_complex(elem: WfElem) -> Complex32 { fn wf_elem_to_complex(elem: WfElem) -> Complex32 {
let amplitude = 10.0_f32.powf(elem.mag / 20.0); Complex32::new(elem.re, elem.im)
Complex32::from_polar(amplitude, elem.phase)
} }
fn get_cand_offset(wf: &Waterfall, cand: &Candidate) -> usize { fn get_cand_offset(wf: &Waterfall, cand: &Candidate) -> usize {
@@ -55,10 +54,7 @@ fn wf_mag_at(wf: &Waterfall, base: usize, idx: isize) -> &WfElem {
if i < wf.mag.len() { if i < wf.mag.len() {
&wf.mag[i] &wf.mag[i]
} else { } else {
&WfElem { &DEFAULT_WF_ELEM
mag: -120.0,
phase: 0.0,
}
} }
} }
@@ -66,6 +62,8 @@ fn wf_mag_at(wf: &Waterfall, base: usize, idx: isize) -> &WfElem {
static DEFAULT_WF_ELEM: WfElem = WfElem { static DEFAULT_WF_ELEM: WfElem = WfElem {
mag: -120.0, mag: -120.0,
phase: 0.0, phase: 0.0,
re: 0.0,
im: 0.0,
}; };
fn wf_mag_safe(wf: &Waterfall, idx: usize) -> &WfElem { fn wf_mag_safe(wf: &Waterfall, idx: usize) -> &WfElem {
@@ -534,6 +532,7 @@ pub(crate) fn verify_crc_and_build_message(
let mut a91 = [0u8; crate::protocol::FTX_LDPC_K_BYTES]; let mut a91 = [0u8; crate::protocol::FTX_LDPC_K_BYTES];
pack_bits(plain174, crate::protocol::FTX_LDPC_K, &mut a91); pack_bits(plain174, crate::protocol::FTX_LDPC_K, &mut a91);
let a91_orig = a91;
let crc_extracted = crate::crc::ftx_extract_crc(&a91); let crc_extracted = crate::crc::ftx_extract_crc(&a91);
a91[9] &= 0xF8; a91[9] &= 0xF8;
a91[10] = 0x00; a91[10] = 0x00;
@@ -543,8 +542,7 @@ pub(crate) fn verify_crc_and_build_message(
return None; return None;
} }
// Re-read a91 since we modified it for CRC check let a91 = a91_orig;
pack_bits(plain174, crate::protocol::FTX_LDPC_K, &mut a91);
let mut message = FtxMessage { let mut message = FtxMessage {
hash: crc_calculated, hash: crc_calculated,
-3
View File
@@ -45,9 +45,6 @@ pub struct Ft8Decoder {
ft2_pipeline: Option<crate::ft2::Ft2Pipeline>, ft2_pipeline: Option<crate::ft2::Ft2Pipeline>,
} }
// Ft8Decoder is not shared across threads, but may be moved between tasks.
unsafe impl Send for Ft8Decoder {}
impl Ft8Decoder { impl Ft8Decoder {
/// Create a new FT8 decoder. /// Create a new FT8 decoder.
pub fn new(sample_rate: u32) -> Result<Self, String> { pub fn new(sample_rate: u32) -> Result<Self, String> {
+30 -4
View File
@@ -9,6 +9,8 @@
//! around that frequency, applies a spectral window, and inverse-FFTs to produce //! around that frequency, applies a spectral window, and inverse-FFTs to produce
//! a complex baseband signal at a reduced sample rate (12000/NDOWN = 1333.3 Hz). //! a complex baseband signal at a reduced sample rate (12000/NDOWN = 1333.3 Hz).
use std::sync::Arc;
use num_complex::Complex32; use num_complex::Complex32;
use rustfft::FftPlanner; use rustfft::FftPlanner;
@@ -64,8 +66,22 @@ impl DownsampleContext {
/// Initialize the downsample context by computing the forward FFT of /// Initialize the downsample context by computing the forward FFT of
/// the raw audio and preparing the spectral window. /// the raw audio and preparing the spectral window.
/// ///
/// If `real_fft` and `ifft` are provided, they are reused instead of
/// creating fresh planners. The real FFT must be a forward plan of length
/// `nraw` and the IFFT must be an inverse plan of length `nraw / NDOWN`.
///
/// Returns `None` if the raw audio is too short or allocation fails. /// Returns `None` if the raw audio is too short or allocation fails.
pub fn new(raw_audio: &[f32], sample_rate: f32) -> Option<Self> { pub fn new(raw_audio: &[f32], sample_rate: f32) -> Option<Self> {
Self::new_with_plans(raw_audio, sample_rate, None, None)
}
/// Initialize with optional pre-built FFT plans for reuse across decode cycles.
pub fn new_with_plans(
raw_audio: &[f32],
sample_rate: f32,
real_fft: Option<Arc<dyn realfft::RealToComplex<f32>>>,
ifft: Option<Arc<dyn rustfft::Fft<f32>>>,
) -> Option<Self> {
let nraw = raw_audio.len(); let nraw = raw_audio.len();
if nraw == 0 { if nraw == 0 {
return None; return None;
@@ -85,8 +101,13 @@ impl DownsampleContext {
} }
// Forward real FFT of raw audio // Forward real FFT of raw audio
let mut real_planner = realfft::RealFftPlanner::<f32>::new(); let fft = match real_fft {
let fft = real_planner.plan_fft_forward(nraw); Some(f) => f,
None => {
let mut real_planner = realfft::RealFftPlanner::<f32>::new();
real_planner.plan_fft_forward(nraw)
}
};
let mut input = fft.make_input_vec(); let mut input = fft.make_input_vec();
let mut output = fft.make_output_vec(); let mut output = fft.make_output_vec();
let mut scratch = fft.make_scratch_vec(); let mut scratch = fft.make_scratch_vec();
@@ -98,8 +119,13 @@ impl DownsampleContext {
let spectrum = output; let spectrum = output;
// IFFT plan for downsampled length // IFFT plan for downsampled length
let mut planner = FftPlanner::<f32>::new(); let ifft = match ifft {
let ifft = planner.plan_fft_inverse(nfft2); Some(f) => f,
None => {
let mut planner = FftPlanner::<f32>::new();
planner.plan_fft_inverse(nfft2)
}
};
let ifft_scratch_len = ifft.get_inplace_scratch_len(); let ifft_scratch_len = ifft.get_inplace_scratch_len();
Some(Self { Some(Self {
+21 -1
View File
@@ -13,8 +13,11 @@ pub mod downsample;
pub mod osd; pub mod osd;
pub mod sync; pub mod sync;
use std::sync::Arc;
use num_complex::Complex32; use num_complex::Complex32;
use realfft::RealFftPlanner; use realfft::RealFftPlanner;
use rustfft::FftPlanner;
use crate::decode::{verify_crc_and_build_message, FtxMessage}; use crate::decode::{verify_crc_and_build_message, FtxMessage};
use crate::protocol::*; use crate::protocol::*;
@@ -117,6 +120,9 @@ pub struct Ft2Pipeline {
raw_capacity: usize, raw_capacity: usize,
waveforms: SyncWaveforms, waveforms: SyncWaveforms,
peak_search: PeakSearchWorkspace, peak_search: PeakSearchWorkspace,
// Cached FFT plans reused across decode cycles
ds_real_fft: Arc<dyn realfft::RealToComplex<f32>>,
ds_ifft: Arc<dyn rustfft::Fft<f32>>,
} }
struct Ft2DecodeWorkspace { struct Ft2DecodeWorkspace {
@@ -176,12 +182,21 @@ impl PeakSearchWorkspace {
impl Ft2Pipeline { impl Ft2Pipeline {
/// Create a new FT2 pipeline for the given sample rate. /// Create a new FT2 pipeline for the given sample rate.
pub fn new(sample_rate: i32) -> Self { pub fn new(sample_rate: i32) -> Self {
// Pre-build FFT plans for the downsample context (reused every decode cycle)
let nfft2 = FT2_NMAX / FT2_NDOWN;
let mut real_planner = RealFftPlanner::<f32>::new();
let ds_real_fft = real_planner.plan_fft_forward(FT2_NMAX);
let mut fft_planner = FftPlanner::<f32>::new();
let ds_ifft = fft_planner.plan_fft_inverse(nfft2);
Self { Self {
sample_rate: sample_rate as f32, sample_rate: sample_rate as f32,
raw_audio: Vec::with_capacity(FT2_NMAX), raw_audio: Vec::with_capacity(FT2_NMAX),
raw_capacity: FT2_NMAX, raw_capacity: FT2_NMAX,
waveforms: prepare_sync_waveforms(), waveforms: prepare_sync_waveforms(),
peak_search: PeakSearchWorkspace::new(), peak_search: PeakSearchWorkspace::new(),
ds_real_fft,
ds_ifft,
} }
} }
@@ -216,7 +231,12 @@ impl Ft2Pipeline {
return Vec::new(); return Vec::new();
} }
let ctx = match DownsampleContext::new(&self.raw_audio, self.sample_rate) { let ctx = match DownsampleContext::new_with_plans(
&self.raw_audio,
self.sample_rate,
Some(Arc::clone(&self.ds_real_fft)),
Some(Arc::clone(&self.ds_ifft)),
) {
Some(ctx) => ctx, Some(ctx) => ctx,
None => return Vec::new(), None => return Vec::new(),
}; };
+35 -56
View File
@@ -16,39 +16,15 @@
//! 3. Exhaustive search over bit-flip patterns of increasing weight //! 3. Exhaustive search over bit-flip patterns of increasing weight
//! 4. Pattern hashing (OSD-2) to efficiently search two-bit-flip corrections //! 4. Pattern hashing (OSD-2) to efficiently search two-bit-flip corrections
use std::sync::OnceLock;
use crate::constants::{FTX_LDPC_GENERATOR, FTX_LDPC_MN, FTX_LDPC_NM, FTX_LDPC_NUM_ROWS}; use crate::constants::{FTX_LDPC_GENERATOR, FTX_LDPC_MN, FTX_LDPC_NM, FTX_LDPC_NUM_ROWS};
use crate::crc::{ftx_compute_crc, ftx_extract_crc}; use crate::crc::{ftx_compute_crc, ftx_extract_crc};
use crate::decode::pack_bits; use crate::decode::pack_bits;
use crate::encode::parity8; use crate::encode::parity8;
use crate::ldpc::ldpc_check;
use crate::protocol::{FTX_LDPC_K, FTX_LDPC_K_BYTES, FTX_LDPC_M, FTX_LDPC_N}; use crate::protocol::{FTX_LDPC_K, FTX_LDPC_K_BYTES, FTX_LDPC_M, FTX_LDPC_N};
/// Check LDPC parity of a 174-bit codeword. Returns number of parity errors.
pub fn ft2_ldpc_check(codeword: &[u8]) -> i32 {
let mut errors = 0i32;
for m in 0..FTX_LDPC_M {
let mut x: u8 = 0;
let num_rows = FTX_LDPC_NUM_ROWS[m] as usize;
for i in 0..num_rows {
let idx = FTX_LDPC_NM[m][i] as usize;
if idx > 0 && idx - 1 < codeword.len() {
x ^= codeword[idx - 1];
}
}
if x != 0 {
errors += 1;
}
}
errors
}
/// Fast rational approximation of `atanh(x)`.
fn fast_atanh(x: f32) -> f32 {
let x2 = x * x;
let a = x * (945.0 + x2 * (-735.0 + x2 * 64.0));
let b = 945.0 + x2 * (-1050.0 + x2 * 225.0);
a / b
}
/// Piecewise linear approximation of `atanh(x)` used in BP message passing. /// Piecewise linear approximation of `atanh(x)` used in BP message passing.
fn platanh(x: f32) -> f32 { fn platanh(x: f32) -> f32 {
let isign: f32 = if x < 0.0 { -1.0 } else { 1.0 }; let isign: f32 = if x < 0.0 { -1.0 } else { 1.0 };
@@ -102,23 +78,23 @@ fn encode174_91_nocrc_bits(message91: &[u8], codeword: &mut [u8; FTX_LDPC_N]) {
/// XOR two byte slices. /// XOR two byte slices.
fn xor_rows(dst: &mut [u8], src: &[u8], len: usize) { fn xor_rows(dst: &mut [u8], src: &[u8], len: usize) {
for i in 0..len { dst[..len]
dst[i] ^= src[i]; .iter_mut()
} .zip(&src[..len])
.for_each(|(d, s)| *d ^= s);
} }
/// Matrix-vector multiply for re-encoding in OSD. /// Matrix-vector multiply for re-encoding in OSD.
fn mrbencode91(me: &[u8], codeword: &mut [u8], g2: &[u8], n: usize, k: usize) { fn mrbencode91(me: &[u8], codeword: &mut [u8], g2: &[u8], n: usize, k: usize) {
for c in codeword[..n].iter_mut() { codeword[..n].fill(0);
*c = 0;
}
for i in 0..k { for i in 0..k {
if me[i] == 0 { if me[i] == 0 {
continue; continue;
} }
for j in 0..n { codeword[..n]
codeword[j] ^= g2[j * k + i]; .iter_mut()
} .enumerate()
.for_each(|(j, c)| *c ^= g2[j * k + i]);
} }
} }
@@ -269,9 +245,9 @@ pub fn osd174_91(
let n = FTX_LDPC_N; let n = FTX_LDPC_N;
let ndeep = ndeep.min(6); let ndeep = ndeep.min(6);
// Build per-bit generator matrix (each row i generates codeword from // Cached per-bit generator matrix (each row i generates codeword from
// unit vector e_i) // unit vector e_i)
let gen = build_generator_matrix(); let gen = generator_matrix();
// Stack-allocated working buffers (k=91, n=174, n-k=83). // Stack-allocated working buffers (k=91, n=174, n-k=83).
let mut genmrb = [0u8; FTX_LDPC_K * FTX_LDPC_N]; let mut genmrb = [0u8; FTX_LDPC_K * FTX_LDPC_N];
@@ -608,22 +584,24 @@ fn reorder_result(
} }
} }
/// Build the full per-bit generator matrix. /// Get a reference to the cached generator matrix.
/// Each row `i` contains the 174-bit codeword produced by encoding /// The matrix is computed once on first call and reused thereafter.
/// a unit vector with bit `i` set. fn generator_matrix() -> &'static [[u8; FTX_LDPC_N]; FTX_LDPC_K] {
fn build_generator_matrix() -> Box<[[u8; FTX_LDPC_N]; FTX_LDPC_K]> { static GEN: OnceLock<Box<[[u8; FTX_LDPC_N]; FTX_LDPC_K]>> = OnceLock::new();
let mut gen = Box::new([[0u8; FTX_LDPC_N]; FTX_LDPC_K]); GEN.get_or_init(|| {
for i in 0..FTX_LDPC_K { let mut gen = Box::new([[0u8; FTX_LDPC_N]; FTX_LDPC_K]);
let mut msg = [0u8; FTX_LDPC_K]; for i in 0..FTX_LDPC_K {
msg[i] = 1; let mut msg = [0u8; FTX_LDPC_K];
if i < 77 { msg[i] = 1;
for j in 77..FTX_LDPC_K { if i < 77 {
msg[j] = 0; for j in 77..FTX_LDPC_K {
msg[j] = 0;
}
} }
encode174_91_nocrc_bits(&msg, &mut gen[i]);
} }
encode174_91_nocrc_bits(&msg, &mut gen[i]); gen
} })
gen
} }
/// Full iterative BP decoder with OSD refinement. /// Full iterative BP decoder with OSD refinement.
@@ -698,7 +676,7 @@ pub fn ft2_decode174_91_osd(
for i in 0..FTX_LDPC_N { for i in 0..FTX_LDPC_N {
best_cw[i] = if zn[i] > 0.0 { 1 } else { 0 }; best_cw[i] = if zn[i] > 0.0 { 1 } else { 0 };
} }
let ncheck = ft2_ldpc_check(&best_cw); let ncheck = ldpc_check(&best_cw);
if ncheck == 0 && check_crc91(&best_cw) { if ncheck == 0 && check_crc91(&best_cw) {
message91.copy_from_slice(&best_cw[..FTX_LDPC_K]); message91.copy_from_slice(&best_cw[..FTX_LDPC_K]);
@@ -815,18 +793,19 @@ pub fn ft2_decode174_91_osd(
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::ldpc::fast_atanh;
#[test] #[test]
fn ldpc_check_all_zeros() { fn ldpc_check_all_zeros() {
let cw = [0u8; FTX_LDPC_N]; let cw = [0u8; FTX_LDPC_N];
assert_eq!(ft2_ldpc_check(&cw), 0); assert_eq!(ldpc_check(&cw), 0);
} }
#[test] #[test]
fn ldpc_check_single_bit_error() { fn ldpc_check_single_bit_error() {
let mut cw = [0u8; FTX_LDPC_N]; let mut cw = [0u8; FTX_LDPC_N];
cw[0] = 1; cw[0] = 1;
assert!(ft2_ldpc_check(&cw) > 0); assert!(ldpc_check(&cw) > 0);
} }
#[test] #[test]
@@ -922,7 +901,7 @@ mod tests {
#[test] #[test]
fn generator_matrix_row_zero() { fn generator_matrix_row_zero() {
let gen = build_generator_matrix(); let gen = generator_matrix();
// Row 0 should encode unit vector e_0 // Row 0 should encode unit vector e_0
assert_eq!(gen[0][0], 1); assert_eq!(gen[0][0], 1);
// Some parity bits should be non-zero // Some parity bits should be non-zero
+4 -3
View File
@@ -13,7 +13,7 @@ use crate::constants::{FTX_LDPC_MN, FTX_LDPC_NM, FTX_LDPC_NUM_ROWS};
use crate::protocol::{FTX_LDPC_M, FTX_LDPC_N}; use crate::protocol::{FTX_LDPC_M, FTX_LDPC_N};
/// Fast rational approximation of `tanh(x)`, clamped at +/-4.97. /// Fast rational approximation of `tanh(x)`, clamped at +/-4.97.
fn fast_tanh(x: f32) -> f32 { pub(crate) fn fast_tanh(x: f32) -> f32 {
if x < -4.97f32 { if x < -4.97f32 {
return -1.0f32; return -1.0f32;
} }
@@ -27,7 +27,7 @@ fn fast_tanh(x: f32) -> f32 {
} }
/// Fast rational approximation of `atanh(x)`. /// Fast rational approximation of `atanh(x)`.
fn fast_atanh(x: f32) -> f32 { pub(crate) fn fast_atanh(x: f32) -> f32 {
let x2 = x * x; let x2 = x * x;
let a = x * (945.0f32 + x2 * (-735.0f32 + x2 * 64.0f32)); let a = x * (945.0f32 + x2 * (-735.0f32 + x2 * 64.0f32));
let b = 945.0f32 + x2 * (-1050.0f32 + x2 * 225.0f32); let b = 945.0f32 + x2 * (-1050.0f32 + x2 * 225.0f32);
@@ -37,7 +37,7 @@ fn fast_atanh(x: f32) -> f32 {
/// Count the number of LDPC parity errors in a 174-bit codeword. /// Count the number of LDPC parity errors in a 174-bit codeword.
/// ///
/// Returns 0 if all parity checks pass (valid codeword). /// Returns 0 if all parity checks pass (valid codeword).
pub fn ldpc_check(codeword: &[u8; FTX_LDPC_N]) -> i32 { pub(crate) fn ldpc_check(codeword: &[u8; FTX_LDPC_N]) -> i32 {
let mut errors = 0i32; let mut errors = 0i32;
for m in 0..FTX_LDPC_M { for m in 0..FTX_LDPC_M {
let mut x: u8 = 0; let mut x: u8 = 0;
@@ -59,6 +59,7 @@ pub fn ldpc_check(codeword: &[u8; FTX_LDPC_N]) -> i32 {
/// `max_iters` controls how many iterations to attempt. /// `max_iters` controls how many iterations to attempt.
/// ///
/// Returns the number of remaining parity errors (0 = success). /// Returns the number of remaining parity errors (0 = success).
#[cfg(test)]
pub fn ldpc_decode( pub fn ldpc_decode(
codeword: &mut [f32; FTX_LDPC_N], codeword: &mut [f32; FTX_LDPC_N],
max_iters: usize, max_iters: usize,
+2 -7
View File
@@ -10,14 +10,9 @@ pub mod decode;
mod decoder; mod decoder;
#[allow(clippy::needless_range_loop)] #[allow(clippy::needless_range_loop)]
pub mod encode; pub mod encode;
#[allow( #[allow(dead_code, clippy::needless_range_loop, clippy::too_many_arguments)]
dead_code,
clippy::manual_memcpy,
clippy::needless_range_loop,
clippy::too_many_arguments
)]
pub mod ft2; pub mod ft2;
#[allow(clippy::manual_memcpy, clippy::needless_range_loop)] #[allow(clippy::needless_range_loop)]
pub mod ldpc; pub mod ldpc;
#[allow(clippy::explicit_counter_loop, clippy::needless_range_loop)] #[allow(clippy::explicit_counter_loop, clippy::needless_range_loop)]
pub mod message; pub mod message;
+15 -5
View File
@@ -12,11 +12,13 @@ use rustfft::FftPlanner;
use crate::protocol::FtxProtocol; use crate::protocol::FtxProtocol;
/// Waterfall element storing magnitude (dB) and phase (radians). /// Waterfall element storing magnitude (dB), phase (radians), and raw complex components.
#[derive(Clone, Copy, Default)] #[derive(Clone, Copy, Default)]
pub struct WfElem { pub struct WfElem {
pub mag: f32, pub mag: f32,
pub phase: f32, pub phase: f32,
pub re: f32,
pub im: f32,
} }
impl WfElem { impl WfElem {
@@ -192,9 +194,10 @@ impl Monitor {
} }
// Windowed FFT // Windowed FFT
for pos in 0..self.nfft { self.fft_input
self.fft_input[pos] = self.window[pos] * self.last_frame[pos]; .iter_mut()
} .zip(self.window.iter().zip(self.last_frame.iter()))
.for_each(|(dst, (w, f))| *dst = w * f);
self.real_fft self.real_fft
.process_with_scratch( .process_with_scratch(
&mut self.fft_input, &mut self.fft_input,
@@ -214,7 +217,12 @@ impl Monitor {
let phase = c.im.atan2(c.re); let phase = c.im.atan2(c.re);
if offset < self.wf.mag.len() { if offset < self.wf.mag.len() {
self.wf.mag[offset] = WfElem { mag: db, phase }; self.wf.mag[offset] = WfElem {
mag: db,
phase,
re: c.re,
im: c.im,
};
} }
offset += 1; offset += 1;
@@ -226,6 +234,8 @@ impl Monitor {
self.wf.mag[offset] = WfElem { self.wf.mag[offset] = WfElem {
mag: -120.0, mag: -120.0,
phase: 0.0, phase: 0.0,
re: 0.0,
im: 0.0,
}; };
} }
offset += 1; offset += 1;