[refactor](trx-ftx): optimize hot paths and deduplicate decoder internals

- Cache generator matrix with OnceLock (P0.1)
- Store raw complex in WfElem, eliminate powf round-trip (P0.2)
- Reuse FFT planners across decode cycles in Ft2Pipeline (P0.3)
- Deduplicate fast_atanh/ldpc_check into ldpc.rs (P1.1)
- Gate unused sum-product ldpc_decode behind #[cfg(test)] (P1.2)
- Eliminate double pack_bits in verify_crc_and_build_message (P1.3)
- Remove unnecessary unsafe impl Send for Ft8Decoder (P1.4)
- Convert key loops to iterator/zip patterns (P2.1)
- Remove resolved clippy::manual_memcpy suppressions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Stan Grams <sjg@haxx.space>
This commit is contained in:
2026-03-19 23:22:58 +01:00
parent 3dc6918082
commit 2da749b978
8 changed files with 113 additions and 87 deletions
+6 -8
View File
@@ -38,8 +38,7 @@ pub struct FtxMessage {
}
fn wf_elem_to_complex(elem: WfElem) -> Complex32 {
let amplitude = 10.0_f32.powf(elem.mag / 20.0);
Complex32::from_polar(amplitude, elem.phase)
Complex32::new(elem.re, elem.im)
}
fn get_cand_offset(wf: &Waterfall, cand: &Candidate) -> usize {
@@ -55,10 +54,7 @@ fn wf_mag_at(wf: &Waterfall, base: usize, idx: isize) -> &WfElem {
if i < wf.mag.len() {
&wf.mag[i]
} else {
&WfElem {
mag: -120.0,
phase: 0.0,
}
&DEFAULT_WF_ELEM
}
}
@@ -66,6 +62,8 @@ fn wf_mag_at(wf: &Waterfall, base: usize, idx: isize) -> &WfElem {
static DEFAULT_WF_ELEM: WfElem = WfElem {
mag: -120.0,
phase: 0.0,
re: 0.0,
im: 0.0,
};
fn wf_mag_safe(wf: &Waterfall, idx: usize) -> &WfElem {
@@ -534,6 +532,7 @@ pub(crate) fn verify_crc_and_build_message(
let mut a91 = [0u8; crate::protocol::FTX_LDPC_K_BYTES];
pack_bits(plain174, crate::protocol::FTX_LDPC_K, &mut a91);
let a91_orig = a91;
let crc_extracted = crate::crc::ftx_extract_crc(&a91);
a91[9] &= 0xF8;
a91[10] = 0x00;
@@ -543,8 +542,7 @@ pub(crate) fn verify_crc_and_build_message(
return None;
}
// Re-read a91 since we modified it for CRC check
pack_bits(plain174, crate::protocol::FTX_LDPC_K, &mut a91);
let a91 = a91_orig;
let mut message = FtxMessage {
hash: crc_calculated,
-3
View File
@@ -45,9 +45,6 @@ pub struct Ft8Decoder {
ft2_pipeline: Option<crate::ft2::Ft2Pipeline>,
}
// Ft8Decoder is not shared across threads, but may be moved between tasks.
unsafe impl Send for Ft8Decoder {}
impl Ft8Decoder {
/// Create a new FT8 decoder.
pub fn new(sample_rate: u32) -> Result<Self, String> {
+30 -4
View File
@@ -9,6 +9,8 @@
//! around that frequency, applies a spectral window, and inverse-FFTs to produce
//! a complex baseband signal at a reduced sample rate (12000/NDOWN = 1333.3 Hz).
use std::sync::Arc;
use num_complex::Complex32;
use rustfft::FftPlanner;
@@ -64,8 +66,22 @@ impl DownsampleContext {
/// Initialize the downsample context by computing the forward FFT of
/// the raw audio and preparing the spectral window.
///
/// If `real_fft` and `ifft` are provided, they are reused instead of
/// creating fresh planners. The real FFT must be a forward plan of length
/// `nraw` and the IFFT must be an inverse plan of length `nraw / NDOWN`.
///
/// Returns `None` if the raw audio is too short or allocation fails.
pub fn new(raw_audio: &[f32], sample_rate: f32) -> Option<Self> {
Self::new_with_plans(raw_audio, sample_rate, None, None)
}
/// Initialize with optional pre-built FFT plans for reuse across decode cycles.
pub fn new_with_plans(
raw_audio: &[f32],
sample_rate: f32,
real_fft: Option<Arc<dyn realfft::RealToComplex<f32>>>,
ifft: Option<Arc<dyn rustfft::Fft<f32>>>,
) -> Option<Self> {
let nraw = raw_audio.len();
if nraw == 0 {
return None;
@@ -85,8 +101,13 @@ impl DownsampleContext {
}
// Forward real FFT of raw audio
let mut real_planner = realfft::RealFftPlanner::<f32>::new();
let fft = real_planner.plan_fft_forward(nraw);
let fft = match real_fft {
Some(f) => f,
None => {
let mut real_planner = realfft::RealFftPlanner::<f32>::new();
real_planner.plan_fft_forward(nraw)
}
};
let mut input = fft.make_input_vec();
let mut output = fft.make_output_vec();
let mut scratch = fft.make_scratch_vec();
@@ -98,8 +119,13 @@ impl DownsampleContext {
let spectrum = output;
// IFFT plan for downsampled length
let mut planner = FftPlanner::<f32>::new();
let ifft = planner.plan_fft_inverse(nfft2);
let ifft = match ifft {
Some(f) => f,
None => {
let mut planner = FftPlanner::<f32>::new();
planner.plan_fft_inverse(nfft2)
}
};
let ifft_scratch_len = ifft.get_inplace_scratch_len();
Some(Self {
+21 -1
View File
@@ -13,8 +13,11 @@ pub mod downsample;
pub mod osd;
pub mod sync;
use std::sync::Arc;
use num_complex::Complex32;
use realfft::RealFftPlanner;
use rustfft::FftPlanner;
use crate::decode::{verify_crc_and_build_message, FtxMessage};
use crate::protocol::*;
@@ -117,6 +120,9 @@ pub struct Ft2Pipeline {
raw_capacity: usize,
waveforms: SyncWaveforms,
peak_search: PeakSearchWorkspace,
// Cached FFT plans reused across decode cycles
ds_real_fft: Arc<dyn realfft::RealToComplex<f32>>,
ds_ifft: Arc<dyn rustfft::Fft<f32>>,
}
struct Ft2DecodeWorkspace {
@@ -176,12 +182,21 @@ impl PeakSearchWorkspace {
impl Ft2Pipeline {
/// Create a new FT2 pipeline for the given sample rate.
pub fn new(sample_rate: i32) -> Self {
// Pre-build FFT plans for the downsample context (reused every decode cycle)
let nfft2 = FT2_NMAX / FT2_NDOWN;
let mut real_planner = RealFftPlanner::<f32>::new();
let ds_real_fft = real_planner.plan_fft_forward(FT2_NMAX);
let mut fft_planner = FftPlanner::<f32>::new();
let ds_ifft = fft_planner.plan_fft_inverse(nfft2);
Self {
sample_rate: sample_rate as f32,
raw_audio: Vec::with_capacity(FT2_NMAX),
raw_capacity: FT2_NMAX,
waveforms: prepare_sync_waveforms(),
peak_search: PeakSearchWorkspace::new(),
ds_real_fft,
ds_ifft,
}
}
@@ -216,7 +231,12 @@ impl Ft2Pipeline {
return Vec::new();
}
let ctx = match DownsampleContext::new(&self.raw_audio, self.sample_rate) {
let ctx = match DownsampleContext::new_with_plans(
&self.raw_audio,
self.sample_rate,
Some(Arc::clone(&self.ds_real_fft)),
Some(Arc::clone(&self.ds_ifft)),
) {
Some(ctx) => ctx,
None => return Vec::new(),
};
+35 -56
View File
@@ -16,39 +16,15 @@
//! 3. Exhaustive search over bit-flip patterns of increasing weight
//! 4. Pattern hashing (OSD-2) to efficiently search two-bit-flip corrections
use std::sync::OnceLock;
use crate::constants::{FTX_LDPC_GENERATOR, FTX_LDPC_MN, FTX_LDPC_NM, FTX_LDPC_NUM_ROWS};
use crate::crc::{ftx_compute_crc, ftx_extract_crc};
use crate::decode::pack_bits;
use crate::encode::parity8;
use crate::ldpc::ldpc_check;
use crate::protocol::{FTX_LDPC_K, FTX_LDPC_K_BYTES, FTX_LDPC_M, FTX_LDPC_N};
/// Check LDPC parity of a 174-bit codeword. Returns number of parity errors.
pub fn ft2_ldpc_check(codeword: &[u8]) -> i32 {
let mut errors = 0i32;
for m in 0..FTX_LDPC_M {
let mut x: u8 = 0;
let num_rows = FTX_LDPC_NUM_ROWS[m] as usize;
for i in 0..num_rows {
let idx = FTX_LDPC_NM[m][i] as usize;
if idx > 0 && idx - 1 < codeword.len() {
x ^= codeword[idx - 1];
}
}
if x != 0 {
errors += 1;
}
}
errors
}
/// Fast rational approximation of `atanh(x)`.
fn fast_atanh(x: f32) -> f32 {
let x2 = x * x;
let a = x * (945.0 + x2 * (-735.0 + x2 * 64.0));
let b = 945.0 + x2 * (-1050.0 + x2 * 225.0);
a / b
}
/// Piecewise linear approximation of `atanh(x)` used in BP message passing.
fn platanh(x: f32) -> f32 {
let isign: f32 = if x < 0.0 { -1.0 } else { 1.0 };
@@ -102,23 +78,23 @@ fn encode174_91_nocrc_bits(message91: &[u8], codeword: &mut [u8; FTX_LDPC_N]) {
/// XOR two byte slices.
fn xor_rows(dst: &mut [u8], src: &[u8], len: usize) {
for i in 0..len {
dst[i] ^= src[i];
}
dst[..len]
.iter_mut()
.zip(&src[..len])
.for_each(|(d, s)| *d ^= s);
}
/// Matrix-vector multiply for re-encoding in OSD.
fn mrbencode91(me: &[u8], codeword: &mut [u8], g2: &[u8], n: usize, k: usize) {
for c in codeword[..n].iter_mut() {
*c = 0;
}
codeword[..n].fill(0);
for i in 0..k {
if me[i] == 0 {
continue;
}
for j in 0..n {
codeword[j] ^= g2[j * k + i];
}
codeword[..n]
.iter_mut()
.enumerate()
.for_each(|(j, c)| *c ^= g2[j * k + i]);
}
}
@@ -269,9 +245,9 @@ pub fn osd174_91(
let n = FTX_LDPC_N;
let ndeep = ndeep.min(6);
// Build per-bit generator matrix (each row i generates codeword from
// Cached per-bit generator matrix (each row i generates codeword from
// unit vector e_i)
let gen = build_generator_matrix();
let gen = generator_matrix();
// Stack-allocated working buffers (k=91, n=174, n-k=83).
let mut genmrb = [0u8; FTX_LDPC_K * FTX_LDPC_N];
@@ -608,22 +584,24 @@ fn reorder_result(
}
}
/// Build the full per-bit generator matrix.
/// Each row `i` contains the 174-bit codeword produced by encoding
/// a unit vector with bit `i` set.
fn build_generator_matrix() -> Box<[[u8; FTX_LDPC_N]; FTX_LDPC_K]> {
let mut gen = Box::new([[0u8; FTX_LDPC_N]; FTX_LDPC_K]);
for i in 0..FTX_LDPC_K {
let mut msg = [0u8; FTX_LDPC_K];
msg[i] = 1;
if i < 77 {
for j in 77..FTX_LDPC_K {
msg[j] = 0;
/// Get a reference to the cached generator matrix.
/// The matrix is computed once on first call and reused thereafter.
fn generator_matrix() -> &'static [[u8; FTX_LDPC_N]; FTX_LDPC_K] {
static GEN: OnceLock<Box<[[u8; FTX_LDPC_N]; FTX_LDPC_K]>> = OnceLock::new();
GEN.get_or_init(|| {
let mut gen = Box::new([[0u8; FTX_LDPC_N]; FTX_LDPC_K]);
for i in 0..FTX_LDPC_K {
let mut msg = [0u8; FTX_LDPC_K];
msg[i] = 1;
if i < 77 {
for j in 77..FTX_LDPC_K {
msg[j] = 0;
}
}
encode174_91_nocrc_bits(&msg, &mut gen[i]);
}
encode174_91_nocrc_bits(&msg, &mut gen[i]);
}
gen
gen
})
}
/// Full iterative BP decoder with OSD refinement.
@@ -698,7 +676,7 @@ pub fn ft2_decode174_91_osd(
for i in 0..FTX_LDPC_N {
best_cw[i] = if zn[i] > 0.0 { 1 } else { 0 };
}
let ncheck = ft2_ldpc_check(&best_cw);
let ncheck = ldpc_check(&best_cw);
if ncheck == 0 && check_crc91(&best_cw) {
message91.copy_from_slice(&best_cw[..FTX_LDPC_K]);
@@ -815,18 +793,19 @@ pub fn ft2_decode174_91_osd(
#[cfg(test)]
mod tests {
use super::*;
use crate::ldpc::fast_atanh;
#[test]
fn ldpc_check_all_zeros() {
let cw = [0u8; FTX_LDPC_N];
assert_eq!(ft2_ldpc_check(&cw), 0);
assert_eq!(ldpc_check(&cw), 0);
}
#[test]
fn ldpc_check_single_bit_error() {
let mut cw = [0u8; FTX_LDPC_N];
cw[0] = 1;
assert!(ft2_ldpc_check(&cw) > 0);
assert!(ldpc_check(&cw) > 0);
}
#[test]
@@ -922,7 +901,7 @@ mod tests {
#[test]
fn generator_matrix_row_zero() {
let gen = build_generator_matrix();
let gen = generator_matrix();
// Row 0 should encode unit vector e_0
assert_eq!(gen[0][0], 1);
// Some parity bits should be non-zero
+4 -3
View File
@@ -13,7 +13,7 @@ use crate::constants::{FTX_LDPC_MN, FTX_LDPC_NM, FTX_LDPC_NUM_ROWS};
use crate::protocol::{FTX_LDPC_M, FTX_LDPC_N};
/// Fast rational approximation of `tanh(x)`, clamped at +/-4.97.
fn fast_tanh(x: f32) -> f32 {
pub(crate) fn fast_tanh(x: f32) -> f32 {
if x < -4.97f32 {
return -1.0f32;
}
@@ -27,7 +27,7 @@ fn fast_tanh(x: f32) -> f32 {
}
/// Fast rational approximation of `atanh(x)`.
fn fast_atanh(x: f32) -> f32 {
pub(crate) fn fast_atanh(x: f32) -> f32 {
let x2 = x * x;
let a = x * (945.0f32 + x2 * (-735.0f32 + x2 * 64.0f32));
let b = 945.0f32 + x2 * (-1050.0f32 + x2 * 225.0f32);
@@ -37,7 +37,7 @@ fn fast_atanh(x: f32) -> f32 {
/// Count the number of LDPC parity errors in a 174-bit codeword.
///
/// Returns 0 if all parity checks pass (valid codeword).
pub fn ldpc_check(codeword: &[u8; FTX_LDPC_N]) -> i32 {
pub(crate) fn ldpc_check(codeword: &[u8; FTX_LDPC_N]) -> i32 {
let mut errors = 0i32;
for m in 0..FTX_LDPC_M {
let mut x: u8 = 0;
@@ -59,6 +59,7 @@ pub fn ldpc_check(codeword: &[u8; FTX_LDPC_N]) -> i32 {
/// `max_iters` controls how many iterations to attempt.
///
/// Returns the number of remaining parity errors (0 = success).
#[cfg(test)]
pub fn ldpc_decode(
codeword: &mut [f32; FTX_LDPC_N],
max_iters: usize,
+2 -7
View File
@@ -10,14 +10,9 @@ pub mod decode;
mod decoder;
#[allow(clippy::needless_range_loop)]
pub mod encode;
#[allow(
dead_code,
clippy::manual_memcpy,
clippy::needless_range_loop,
clippy::too_many_arguments
)]
#[allow(dead_code, clippy::needless_range_loop, clippy::too_many_arguments)]
pub mod ft2;
#[allow(clippy::manual_memcpy, clippy::needless_range_loop)]
#[allow(clippy::needless_range_loop)]
pub mod ldpc;
#[allow(clippy::explicit_counter_loop, clippy::needless_range_loop)]
pub mod message;
+15 -5
View File
@@ -12,11 +12,13 @@ use rustfft::FftPlanner;
use crate::protocol::FtxProtocol;
/// Waterfall element storing magnitude (dB) and phase (radians).
/// Waterfall element storing magnitude (dB), phase (radians), and raw complex components.
#[derive(Clone, Copy, Default)]
pub struct WfElem {
pub mag: f32,
pub phase: f32,
pub re: f32,
pub im: f32,
}
impl WfElem {
@@ -192,9 +194,10 @@ impl Monitor {
}
// Windowed FFT
for pos in 0..self.nfft {
self.fft_input[pos] = self.window[pos] * self.last_frame[pos];
}
self.fft_input
.iter_mut()
.zip(self.window.iter().zip(self.last_frame.iter()))
.for_each(|(dst, (w, f))| *dst = w * f);
self.real_fft
.process_with_scratch(
&mut self.fft_input,
@@ -214,7 +217,12 @@ impl Monitor {
let phase = c.im.atan2(c.re);
if offset < self.wf.mag.len() {
self.wf.mag[offset] = WfElem { mag: db, phase };
self.wf.mag[offset] = WfElem {
mag: db,
phase,
re: c.re,
im: c.im,
};
}
offset += 1;
@@ -226,6 +234,8 @@ impl Monitor {
self.wf.mag[offset] = WfElem {
mag: -120.0,
phase: 0.0,
re: 0.0,
im: 0.0,
};
}
offset += 1;