[refactor](trx-ftx): optimize hot paths and deduplicate decoder internals
- Cache generator matrix with OnceLock (P0.1) - Store raw complex in WfElem, eliminate powf round-trip (P0.2) - Reuse FFT planners across decode cycles in Ft2Pipeline (P0.3) - Deduplicate fast_atanh/ldpc_check into ldpc.rs (P1.1) - Gate unused sum-product ldpc_decode behind #[cfg(test)] (P1.2) - Eliminate double pack_bits in verify_crc_and_build_message (P1.3) - Remove unnecessary unsafe impl Send for Ft8Decoder (P1.4) - Convert key loops to iterator/zip patterns (P2.1) - Remove resolved clippy::manual_memcpy suppressions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-off-by: Stan Grams <sjg@haxx.space>
This commit is contained in:
@@ -38,8 +38,7 @@ pub struct FtxMessage {
|
||||
}
|
||||
|
||||
fn wf_elem_to_complex(elem: WfElem) -> Complex32 {
|
||||
let amplitude = 10.0_f32.powf(elem.mag / 20.0);
|
||||
Complex32::from_polar(amplitude, elem.phase)
|
||||
Complex32::new(elem.re, elem.im)
|
||||
}
|
||||
|
||||
fn get_cand_offset(wf: &Waterfall, cand: &Candidate) -> usize {
|
||||
@@ -55,10 +54,7 @@ fn wf_mag_at(wf: &Waterfall, base: usize, idx: isize) -> &WfElem {
|
||||
if i < wf.mag.len() {
|
||||
&wf.mag[i]
|
||||
} else {
|
||||
&WfElem {
|
||||
mag: -120.0,
|
||||
phase: 0.0,
|
||||
}
|
||||
&DEFAULT_WF_ELEM
|
||||
}
|
||||
}
|
||||
|
||||
@@ -66,6 +62,8 @@ fn wf_mag_at(wf: &Waterfall, base: usize, idx: isize) -> &WfElem {
|
||||
static DEFAULT_WF_ELEM: WfElem = WfElem {
|
||||
mag: -120.0,
|
||||
phase: 0.0,
|
||||
re: 0.0,
|
||||
im: 0.0,
|
||||
};
|
||||
|
||||
fn wf_mag_safe(wf: &Waterfall, idx: usize) -> &WfElem {
|
||||
@@ -534,6 +532,7 @@ pub(crate) fn verify_crc_and_build_message(
|
||||
let mut a91 = [0u8; crate::protocol::FTX_LDPC_K_BYTES];
|
||||
pack_bits(plain174, crate::protocol::FTX_LDPC_K, &mut a91);
|
||||
|
||||
let a91_orig = a91;
|
||||
let crc_extracted = crate::crc::ftx_extract_crc(&a91);
|
||||
a91[9] &= 0xF8;
|
||||
a91[10] = 0x00;
|
||||
@@ -543,8 +542,7 @@ pub(crate) fn verify_crc_and_build_message(
|
||||
return None;
|
||||
}
|
||||
|
||||
// Re-read a91 since we modified it for CRC check
|
||||
pack_bits(plain174, crate::protocol::FTX_LDPC_K, &mut a91);
|
||||
let a91 = a91_orig;
|
||||
|
||||
let mut message = FtxMessage {
|
||||
hash: crc_calculated,
|
||||
|
||||
@@ -45,9 +45,6 @@ pub struct Ft8Decoder {
|
||||
ft2_pipeline: Option<crate::ft2::Ft2Pipeline>,
|
||||
}
|
||||
|
||||
// Ft8Decoder is not shared across threads, but may be moved between tasks.
|
||||
unsafe impl Send for Ft8Decoder {}
|
||||
|
||||
impl Ft8Decoder {
|
||||
/// Create a new FT8 decoder.
|
||||
pub fn new(sample_rate: u32) -> Result<Self, String> {
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
//! around that frequency, applies a spectral window, and inverse-FFTs to produce
|
||||
//! a complex baseband signal at a reduced sample rate (12000/NDOWN = 1333.3 Hz).
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use num_complex::Complex32;
|
||||
use rustfft::FftPlanner;
|
||||
|
||||
@@ -64,8 +66,22 @@ impl DownsampleContext {
|
||||
/// Initialize the downsample context by computing the forward FFT of
|
||||
/// the raw audio and preparing the spectral window.
|
||||
///
|
||||
/// If `real_fft` and `ifft` are provided, they are reused instead of
|
||||
/// creating fresh planners. The real FFT must be a forward plan of length
|
||||
/// `nraw` and the IFFT must be an inverse plan of length `nraw / NDOWN`.
|
||||
///
|
||||
/// Returns `None` if the raw audio is too short or allocation fails.
|
||||
pub fn new(raw_audio: &[f32], sample_rate: f32) -> Option<Self> {
|
||||
Self::new_with_plans(raw_audio, sample_rate, None, None)
|
||||
}
|
||||
|
||||
/// Initialize with optional pre-built FFT plans for reuse across decode cycles.
|
||||
pub fn new_with_plans(
|
||||
raw_audio: &[f32],
|
||||
sample_rate: f32,
|
||||
real_fft: Option<Arc<dyn realfft::RealToComplex<f32>>>,
|
||||
ifft: Option<Arc<dyn rustfft::Fft<f32>>>,
|
||||
) -> Option<Self> {
|
||||
let nraw = raw_audio.len();
|
||||
if nraw == 0 {
|
||||
return None;
|
||||
@@ -85,8 +101,13 @@ impl DownsampleContext {
|
||||
}
|
||||
|
||||
// Forward real FFT of raw audio
|
||||
let fft = match real_fft {
|
||||
Some(f) => f,
|
||||
None => {
|
||||
let mut real_planner = realfft::RealFftPlanner::<f32>::new();
|
||||
let fft = real_planner.plan_fft_forward(nraw);
|
||||
real_planner.plan_fft_forward(nraw)
|
||||
}
|
||||
};
|
||||
let mut input = fft.make_input_vec();
|
||||
let mut output = fft.make_output_vec();
|
||||
let mut scratch = fft.make_scratch_vec();
|
||||
@@ -98,8 +119,13 @@ impl DownsampleContext {
|
||||
let spectrum = output;
|
||||
|
||||
// IFFT plan for downsampled length
|
||||
let ifft = match ifft {
|
||||
Some(f) => f,
|
||||
None => {
|
||||
let mut planner = FftPlanner::<f32>::new();
|
||||
let ifft = planner.plan_fft_inverse(nfft2);
|
||||
planner.plan_fft_inverse(nfft2)
|
||||
}
|
||||
};
|
||||
let ifft_scratch_len = ifft.get_inplace_scratch_len();
|
||||
|
||||
Some(Self {
|
||||
|
||||
@@ -13,8 +13,11 @@ pub mod downsample;
|
||||
pub mod osd;
|
||||
pub mod sync;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use num_complex::Complex32;
|
||||
use realfft::RealFftPlanner;
|
||||
use rustfft::FftPlanner;
|
||||
|
||||
use crate::decode::{verify_crc_and_build_message, FtxMessage};
|
||||
use crate::protocol::*;
|
||||
@@ -117,6 +120,9 @@ pub struct Ft2Pipeline {
|
||||
raw_capacity: usize,
|
||||
waveforms: SyncWaveforms,
|
||||
peak_search: PeakSearchWorkspace,
|
||||
// Cached FFT plans reused across decode cycles
|
||||
ds_real_fft: Arc<dyn realfft::RealToComplex<f32>>,
|
||||
ds_ifft: Arc<dyn rustfft::Fft<f32>>,
|
||||
}
|
||||
|
||||
struct Ft2DecodeWorkspace {
|
||||
@@ -176,12 +182,21 @@ impl PeakSearchWorkspace {
|
||||
impl Ft2Pipeline {
|
||||
/// Create a new FT2 pipeline for the given sample rate.
|
||||
pub fn new(sample_rate: i32) -> Self {
|
||||
// Pre-build FFT plans for the downsample context (reused every decode cycle)
|
||||
let nfft2 = FT2_NMAX / FT2_NDOWN;
|
||||
let mut real_planner = RealFftPlanner::<f32>::new();
|
||||
let ds_real_fft = real_planner.plan_fft_forward(FT2_NMAX);
|
||||
let mut fft_planner = FftPlanner::<f32>::new();
|
||||
let ds_ifft = fft_planner.plan_fft_inverse(nfft2);
|
||||
|
||||
Self {
|
||||
sample_rate: sample_rate as f32,
|
||||
raw_audio: Vec::with_capacity(FT2_NMAX),
|
||||
raw_capacity: FT2_NMAX,
|
||||
waveforms: prepare_sync_waveforms(),
|
||||
peak_search: PeakSearchWorkspace::new(),
|
||||
ds_real_fft,
|
||||
ds_ifft,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -216,7 +231,12 @@ impl Ft2Pipeline {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let ctx = match DownsampleContext::new(&self.raw_audio, self.sample_rate) {
|
||||
let ctx = match DownsampleContext::new_with_plans(
|
||||
&self.raw_audio,
|
||||
self.sample_rate,
|
||||
Some(Arc::clone(&self.ds_real_fft)),
|
||||
Some(Arc::clone(&self.ds_ifft)),
|
||||
) {
|
||||
Some(ctx) => ctx,
|
||||
None => return Vec::new(),
|
||||
};
|
||||
|
||||
@@ -16,39 +16,15 @@
|
||||
//! 3. Exhaustive search over bit-flip patterns of increasing weight
|
||||
//! 4. Pattern hashing (OSD-2) to efficiently search two-bit-flip corrections
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use crate::constants::{FTX_LDPC_GENERATOR, FTX_LDPC_MN, FTX_LDPC_NM, FTX_LDPC_NUM_ROWS};
|
||||
use crate::crc::{ftx_compute_crc, ftx_extract_crc};
|
||||
use crate::decode::pack_bits;
|
||||
use crate::encode::parity8;
|
||||
use crate::ldpc::ldpc_check;
|
||||
use crate::protocol::{FTX_LDPC_K, FTX_LDPC_K_BYTES, FTX_LDPC_M, FTX_LDPC_N};
|
||||
|
||||
/// Check LDPC parity of a 174-bit codeword. Returns number of parity errors.
|
||||
pub fn ft2_ldpc_check(codeword: &[u8]) -> i32 {
|
||||
let mut errors = 0i32;
|
||||
for m in 0..FTX_LDPC_M {
|
||||
let mut x: u8 = 0;
|
||||
let num_rows = FTX_LDPC_NUM_ROWS[m] as usize;
|
||||
for i in 0..num_rows {
|
||||
let idx = FTX_LDPC_NM[m][i] as usize;
|
||||
if idx > 0 && idx - 1 < codeword.len() {
|
||||
x ^= codeword[idx - 1];
|
||||
}
|
||||
}
|
||||
if x != 0 {
|
||||
errors += 1;
|
||||
}
|
||||
}
|
||||
errors
|
||||
}
|
||||
|
||||
/// Fast rational approximation of `atanh(x)`.
|
||||
fn fast_atanh(x: f32) -> f32 {
|
||||
let x2 = x * x;
|
||||
let a = x * (945.0 + x2 * (-735.0 + x2 * 64.0));
|
||||
let b = 945.0 + x2 * (-1050.0 + x2 * 225.0);
|
||||
a / b
|
||||
}
|
||||
|
||||
/// Piecewise linear approximation of `atanh(x)` used in BP message passing.
|
||||
fn platanh(x: f32) -> f32 {
|
||||
let isign: f32 = if x < 0.0 { -1.0 } else { 1.0 };
|
||||
@@ -102,23 +78,23 @@ fn encode174_91_nocrc_bits(message91: &[u8], codeword: &mut [u8; FTX_LDPC_N]) {
|
||||
|
||||
/// XOR two byte slices.
|
||||
fn xor_rows(dst: &mut [u8], src: &[u8], len: usize) {
|
||||
for i in 0..len {
|
||||
dst[i] ^= src[i];
|
||||
}
|
||||
dst[..len]
|
||||
.iter_mut()
|
||||
.zip(&src[..len])
|
||||
.for_each(|(d, s)| *d ^= s);
|
||||
}
|
||||
|
||||
/// Matrix-vector multiply for re-encoding in OSD.
|
||||
fn mrbencode91(me: &[u8], codeword: &mut [u8], g2: &[u8], n: usize, k: usize) {
|
||||
for c in codeword[..n].iter_mut() {
|
||||
*c = 0;
|
||||
}
|
||||
codeword[..n].fill(0);
|
||||
for i in 0..k {
|
||||
if me[i] == 0 {
|
||||
continue;
|
||||
}
|
||||
for j in 0..n {
|
||||
codeword[j] ^= g2[j * k + i];
|
||||
}
|
||||
codeword[..n]
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.for_each(|(j, c)| *c ^= g2[j * k + i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -269,9 +245,9 @@ pub fn osd174_91(
|
||||
let n = FTX_LDPC_N;
|
||||
let ndeep = ndeep.min(6);
|
||||
|
||||
// Build per-bit generator matrix (each row i generates codeword from
|
||||
// Cached per-bit generator matrix (each row i generates codeword from
|
||||
// unit vector e_i)
|
||||
let gen = build_generator_matrix();
|
||||
let gen = generator_matrix();
|
||||
|
||||
// Stack-allocated working buffers (k=91, n=174, n-k=83).
|
||||
let mut genmrb = [0u8; FTX_LDPC_K * FTX_LDPC_N];
|
||||
@@ -608,10 +584,11 @@ fn reorder_result(
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the full per-bit generator matrix.
|
||||
/// Each row `i` contains the 174-bit codeword produced by encoding
|
||||
/// a unit vector with bit `i` set.
|
||||
fn build_generator_matrix() -> Box<[[u8; FTX_LDPC_N]; FTX_LDPC_K]> {
|
||||
/// Get a reference to the cached generator matrix.
|
||||
/// The matrix is computed once on first call and reused thereafter.
|
||||
fn generator_matrix() -> &'static [[u8; FTX_LDPC_N]; FTX_LDPC_K] {
|
||||
static GEN: OnceLock<Box<[[u8; FTX_LDPC_N]; FTX_LDPC_K]>> = OnceLock::new();
|
||||
GEN.get_or_init(|| {
|
||||
let mut gen = Box::new([[0u8; FTX_LDPC_N]; FTX_LDPC_K]);
|
||||
for i in 0..FTX_LDPC_K {
|
||||
let mut msg = [0u8; FTX_LDPC_K];
|
||||
@@ -624,6 +601,7 @@ fn build_generator_matrix() -> Box<[[u8; FTX_LDPC_N]; FTX_LDPC_K]> {
|
||||
encode174_91_nocrc_bits(&msg, &mut gen[i]);
|
||||
}
|
||||
gen
|
||||
})
|
||||
}
|
||||
|
||||
/// Full iterative BP decoder with OSD refinement.
|
||||
@@ -698,7 +676,7 @@ pub fn ft2_decode174_91_osd(
|
||||
for i in 0..FTX_LDPC_N {
|
||||
best_cw[i] = if zn[i] > 0.0 { 1 } else { 0 };
|
||||
}
|
||||
let ncheck = ft2_ldpc_check(&best_cw);
|
||||
let ncheck = ldpc_check(&best_cw);
|
||||
|
||||
if ncheck == 0 && check_crc91(&best_cw) {
|
||||
message91.copy_from_slice(&best_cw[..FTX_LDPC_K]);
|
||||
@@ -815,18 +793,19 @@ pub fn ft2_decode174_91_osd(
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::ldpc::fast_atanh;
|
||||
|
||||
#[test]
|
||||
fn ldpc_check_all_zeros() {
|
||||
let cw = [0u8; FTX_LDPC_N];
|
||||
assert_eq!(ft2_ldpc_check(&cw), 0);
|
||||
assert_eq!(ldpc_check(&cw), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ldpc_check_single_bit_error() {
|
||||
let mut cw = [0u8; FTX_LDPC_N];
|
||||
cw[0] = 1;
|
||||
assert!(ft2_ldpc_check(&cw) > 0);
|
||||
assert!(ldpc_check(&cw) > 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -922,7 +901,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn generator_matrix_row_zero() {
|
||||
let gen = build_generator_matrix();
|
||||
let gen = generator_matrix();
|
||||
// Row 0 should encode unit vector e_0
|
||||
assert_eq!(gen[0][0], 1);
|
||||
// Some parity bits should be non-zero
|
||||
|
||||
@@ -13,7 +13,7 @@ use crate::constants::{FTX_LDPC_MN, FTX_LDPC_NM, FTX_LDPC_NUM_ROWS};
|
||||
use crate::protocol::{FTX_LDPC_M, FTX_LDPC_N};
|
||||
|
||||
/// Fast rational approximation of `tanh(x)`, clamped at +/-4.97.
|
||||
fn fast_tanh(x: f32) -> f32 {
|
||||
pub(crate) fn fast_tanh(x: f32) -> f32 {
|
||||
if x < -4.97f32 {
|
||||
return -1.0f32;
|
||||
}
|
||||
@@ -27,7 +27,7 @@ fn fast_tanh(x: f32) -> f32 {
|
||||
}
|
||||
|
||||
/// Fast rational approximation of `atanh(x)`.
|
||||
fn fast_atanh(x: f32) -> f32 {
|
||||
pub(crate) fn fast_atanh(x: f32) -> f32 {
|
||||
let x2 = x * x;
|
||||
let a = x * (945.0f32 + x2 * (-735.0f32 + x2 * 64.0f32));
|
||||
let b = 945.0f32 + x2 * (-1050.0f32 + x2 * 225.0f32);
|
||||
@@ -37,7 +37,7 @@ fn fast_atanh(x: f32) -> f32 {
|
||||
/// Count the number of LDPC parity errors in a 174-bit codeword.
|
||||
///
|
||||
/// Returns 0 if all parity checks pass (valid codeword).
|
||||
pub fn ldpc_check(codeword: &[u8; FTX_LDPC_N]) -> i32 {
|
||||
pub(crate) fn ldpc_check(codeword: &[u8; FTX_LDPC_N]) -> i32 {
|
||||
let mut errors = 0i32;
|
||||
for m in 0..FTX_LDPC_M {
|
||||
let mut x: u8 = 0;
|
||||
@@ -59,6 +59,7 @@ pub fn ldpc_check(codeword: &[u8; FTX_LDPC_N]) -> i32 {
|
||||
/// `max_iters` controls how many iterations to attempt.
|
||||
///
|
||||
/// Returns the number of remaining parity errors (0 = success).
|
||||
#[cfg(test)]
|
||||
pub fn ldpc_decode(
|
||||
codeword: &mut [f32; FTX_LDPC_N],
|
||||
max_iters: usize,
|
||||
|
||||
@@ -10,14 +10,9 @@ pub mod decode;
|
||||
mod decoder;
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
pub mod encode;
|
||||
#[allow(
|
||||
dead_code,
|
||||
clippy::manual_memcpy,
|
||||
clippy::needless_range_loop,
|
||||
clippy::too_many_arguments
|
||||
)]
|
||||
#[allow(dead_code, clippy::needless_range_loop, clippy::too_many_arguments)]
|
||||
pub mod ft2;
|
||||
#[allow(clippy::manual_memcpy, clippy::needless_range_loop)]
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
pub mod ldpc;
|
||||
#[allow(clippy::explicit_counter_loop, clippy::needless_range_loop)]
|
||||
pub mod message;
|
||||
|
||||
@@ -12,11 +12,13 @@ use rustfft::FftPlanner;
|
||||
|
||||
use crate::protocol::FtxProtocol;
|
||||
|
||||
/// Waterfall element storing magnitude (dB) and phase (radians).
|
||||
/// Waterfall element storing magnitude (dB), phase (radians), and raw complex components.
|
||||
#[derive(Clone, Copy, Default)]
|
||||
pub struct WfElem {
|
||||
pub mag: f32,
|
||||
pub phase: f32,
|
||||
pub re: f32,
|
||||
pub im: f32,
|
||||
}
|
||||
|
||||
impl WfElem {
|
||||
@@ -192,9 +194,10 @@ impl Monitor {
|
||||
}
|
||||
|
||||
// Windowed FFT
|
||||
for pos in 0..self.nfft {
|
||||
self.fft_input[pos] = self.window[pos] * self.last_frame[pos];
|
||||
}
|
||||
self.fft_input
|
||||
.iter_mut()
|
||||
.zip(self.window.iter().zip(self.last_frame.iter()))
|
||||
.for_each(|(dst, (w, f))| *dst = w * f);
|
||||
self.real_fft
|
||||
.process_with_scratch(
|
||||
&mut self.fft_input,
|
||||
@@ -214,7 +217,12 @@ impl Monitor {
|
||||
let phase = c.im.atan2(c.re);
|
||||
|
||||
if offset < self.wf.mag.len() {
|
||||
self.wf.mag[offset] = WfElem { mag: db, phase };
|
||||
self.wf.mag[offset] = WfElem {
|
||||
mag: db,
|
||||
phase,
|
||||
re: c.re,
|
||||
im: c.im,
|
||||
};
|
||||
}
|
||||
offset += 1;
|
||||
|
||||
@@ -226,6 +234,8 @@ impl Monitor {
|
||||
self.wf.mag[offset] = WfElem {
|
||||
mag: -120.0,
|
||||
phase: 0.0,
|
||||
re: 0.0,
|
||||
im: 0.0,
|
||||
};
|
||||
}
|
||||
offset += 1;
|
||||
|
||||
Reference in New Issue
Block a user