[refactor](trx-ftx): optimize ft2 decode hot paths

Reuse FT2 downsample and bitmetric work buffers, speed up\nsync2d_score with precomputed references, and cache peak-search\nFFT state on the pipeline.\n\nCo-authored-by: OpenAI Codex <codex@openai.com>

Signed-off-by: Stan Grams <sjg@haxx.space>
This commit is contained in:
2026-03-18 23:08:42 +01:00
parent 7d20058c03
commit 0b28900082
5 changed files with 563 additions and 342 deletions
+10 -9
View File
@@ -176,8 +176,7 @@ impl Ft8Decoder {
/// Waterfall-based decode for FT8/FT4. /// Waterfall-based decode for FT8/FT4.
fn decode_waterfall(&mut self, max_results: usize) -> Vec<Ft8DecodeResult> { fn decode_waterfall(&mut self, max_results: usize) -> Vec<Ft8DecodeResult> {
let candidates = let candidates = ftx_find_candidates(&self.monitor.wf, MAX_CANDIDATES, MIN_CANDIDATE_SCORE);
ftx_find_candidates(&self.monitor.wf, MAX_CANDIDATES, MIN_CANDIDATE_SCORE);
let mut results = Vec::new(); let mut results = Vec::new();
let mut seen: Vec<u16> = Vec::new(); let mut seen: Vec<u16> = Vec::new();
@@ -209,13 +208,14 @@ impl Ft8Decoder {
// Compute time offset // Compute time offset
let symbol_period = self.protocol.symbol_period(); let symbol_period = self.protocol.symbol_period();
let dt_s = let dt_s = (cand.time_offset as f32
(cand.time_offset as f32 + cand.time_sub as f32 / self.monitor.wf.time_osr as f32) + cand.time_sub as f32 / self.monitor.wf.time_osr as f32)
* symbol_period * symbol_period
- 0.5; - 0.5;
// Compute frequency // Compute frequency
let freq_hz = (self.monitor.min_bin as f32 + cand.freq_offset as f32 let freq_hz = (self.monitor.min_bin as f32
+ cand.freq_offset as f32
+ cand.freq_sub as f32 / self.monitor.wf.freq_osr as f32) + cand.freq_sub as f32 / self.monitor.wf.freq_osr as f32)
/ symbol_period; / symbol_period;
@@ -232,7 +232,8 @@ impl Ft8Decoder {
/// FT2-specific decode pipeline. /// FT2-specific decode pipeline.
fn decode_ft2(&mut self, max_results: usize) -> Vec<Ft8DecodeResult> { fn decode_ft2(&mut self, max_results: usize) -> Vec<Ft8DecodeResult> {
let pipe = match self.ft2_pipeline.as_ref() { let ft2_results = {
let pipe = match self.ft2_pipeline.as_mut() {
Some(p) => p, Some(p) => p,
None => return Vec::new(), None => return Vec::new(),
}; };
@@ -241,7 +242,8 @@ impl Ft8Decoder {
return Vec::new(); return Vec::new();
} }
let ft2_results = pipe.decode(max_results); pipe.decode(max_results)
};
let mut results = Vec::new(); let mut results = Vec::new();
for r in ft2_results { for r in ft2_results {
@@ -267,8 +269,7 @@ impl Ft8Decoder {
payload: msg.payload, payload: msg.payload,
hash: msg.hash as u32, hash: msg.hash as u32,
}; };
let (text, _offsets, _rc) = let (text, _offsets, _rc) = message::ftx_message_decode(&m, &mut self.callsign_hash);
message::ftx_message_decode(&m, &mut self.callsign_hash);
if text.is_empty() { if text.is_empty() {
return None; return None;
} }
+120 -125
View File
@@ -15,51 +15,64 @@ use crate::constants::{FT4_COSTAS_PATTERN, FT4_GRAY_MAP};
use super::{FT2_FRAME_SYMBOLS, FT2_NSS}; use super::{FT2_FRAME_SYMBOLS, FT2_NSS};
/// Extract bit metrics from the downsampled signal region. const N_METRICS: usize = 2 * FT2_FRAME_SYMBOLS;
///
/// Returns a 2D array of shape `[2 * FT2_FRAME_SYMBOLS][3]` where:
/// - Index 0: 1-symbol scale metric
/// - Index 1: 2-symbol scale metric
/// - Index 2: 4-symbol scale metric
///
/// Returns `None` if the sync quality is too poor (fewer than 4 of 16
/// Costas sync tones decoded correctly).
pub fn extract_bitmetrics_raw(
signal: &[Complex32],
) -> Option<Vec<[f32; 3]>> {
let n_metrics = 2 * FT2_FRAME_SYMBOLS;
let mut bitmetrics = vec![[0.0f32; 3]; n_metrics];
// Per-symbol FFT to extract complex tone amplitudes /// Reusable FFT plans and scratch buffers for bit-metric extraction.
pub struct BitMetricsWorkspace {
fft: std::sync::Arc<dyn rustfft::Fft<f32>>,
scratch: Vec<Complex32>,
symbols: [[Complex32; 4]; FT2_FRAME_SYMBOLS],
s4: [[f32; 4]; FT2_FRAME_SYMBOLS],
metric1: [f32; N_METRICS],
metric2: [f32; N_METRICS],
metric4: [f32; N_METRICS],
bitmetrics: [[f32; 3]; N_METRICS],
csymb: [Complex32; FT2_NSS],
}
impl BitMetricsWorkspace {
pub fn new() -> Self {
let mut planner = FftPlanner::<f32>::new(); let mut planner = FftPlanner::<f32>::new();
let fft = planner.plan_fft_forward(FT2_NSS); let fft = planner.plan_fft_forward(FT2_NSS);
let fft_scratch_len = fft.get_inplace_scratch_len(); let scratch = vec![Complex32::new(0.0, 0.0); fft.get_inplace_scratch_len()];
let mut scratch = vec![Complex32::new(0.0, 0.0); fft_scratch_len];
// Complex symbols for each of the 4 tones at each frame symbol Self {
let mut symbols = vec![[Complex32::new(0.0, 0.0); 4]; FT2_FRAME_SYMBOLS]; fft,
// Magnitude for each tone at each symbol scratch,
let mut s4 = vec![[0.0f32; 4]; FT2_FRAME_SYMBOLS]; symbols: [[Complex32::new(0.0, 0.0); 4]; FT2_FRAME_SYMBOLS],
s4: [[0.0; 4]; FT2_FRAME_SYMBOLS],
metric1: [0.0; N_METRICS],
metric2: [0.0; N_METRICS],
metric4: [0.0; N_METRICS],
bitmetrics: [[0.0; 3]; N_METRICS],
csymb: [Complex32::new(0.0, 0.0); FT2_NSS],
}
}
/// Extract bit metrics into a reusable internal buffer.
pub fn extract<'a>(&'a mut self, signal: &[Complex32]) -> Option<&'a [[f32; 3]]> {
self.metric1.fill(0.0);
self.metric2.fill(0.0);
self.metric4.fill(0.0);
for sym in 0..FT2_FRAME_SYMBOLS { for sym in 0..FT2_FRAME_SYMBOLS {
let offset = sym * FT2_NSS; let offset = sym * FT2_NSS;
let mut csymb: Vec<Complex32> = (0..FT2_NSS) if offset + FT2_NSS <= signal.len() {
.map(|i| { self.csymb
if offset + i < signal.len() { .copy_from_slice(&signal[offset..(offset + FT2_NSS)]);
signal[offset + i]
} else { } else {
Complex32::new(0.0, 0.0) self.csymb.fill(Complex32::new(0.0, 0.0));
let remaining = signal.len().saturating_sub(offset);
self.csymb[..remaining].copy_from_slice(&signal[offset..(offset + remaining)]);
} }
})
.collect();
fft.process_with_scratch(&mut csymb, &mut scratch); self.fft
.process_with_scratch(&mut self.csymb, &mut self.scratch);
for tone in 0..4 { for tone in 0..4 {
if tone < csymb.len() { let symbol = self.csymb[tone];
symbols[sym][tone] = csymb[tone]; self.symbols[sym][tone] = symbol;
s4[sym][tone] = csymb[tone].norm(); self.s4[sym][tone] = symbol.norm();
}
} }
} }
@@ -73,7 +86,7 @@ pub fn extract_bitmetrics_raw(
} }
let mut best = 0; let mut best = 0;
for tone in 1..4 { for tone in 1..4 {
if s4[base + i][tone] > s4[base + i][best] { if self.s4[base + i][tone] > self.s4[base + i][best] {
best = tone; best = tone;
} }
} }
@@ -87,93 +100,56 @@ pub fn extract_bitmetrics_raw(
return None; return None;
} }
// Precompute one_mask: for each integer 0..255 and bit position 0..7,
// whether that bit is set.
let one_mask: Vec<[u8; 8]> = (0..256u16)
.map(|i| {
let mut m = [0u8; 8];
for j in 0..8 {
m[j] = if (i & (1 << j)) != 0 { 1 } else { 0 };
}
m
})
.collect();
// Compute metrics at three scales
let mut metric1 = vec![0.0f32; n_metrics];
let mut metric2 = vec![0.0f32; n_metrics];
let mut metric4 = vec![0.0f32; n_metrics];
for nseq in 0..3 { for nseq in 0..3 {
let nsym = match nseq { let (nsym, metric): (usize, &mut [f32; N_METRICS]) = match nseq {
0 => 1, 0 => (1, &mut self.metric1),
1 => 2, 1 => (2, &mut self.metric2),
_ => 4, _ => (4, &mut self.metric4),
}; };
let nt = 1 << (2 * nsym); // number of tone sequences to enumerate let nt = 1usize << (2 * nsym);
let ibmax = match nsym {
let mut ks = 0;
while ks + nsym <= FT2_FRAME_SYMBOLS {
// Compute coherent magnitude for each possible tone sequence
let mut s2 = vec![0.0f32; nt];
for i in 0..nt {
let i1 = i / 64;
let i2 = (i & 63) / 16;
let i3 = (i & 15) / 4;
let i4 = i & 3;
let sum = match nsym {
1 => symbols[ks][FT4_GRAY_MAP[i4] as usize],
2 => {
symbols[ks][FT4_GRAY_MAP[i3] as usize]
+ symbols[ks + 1][FT4_GRAY_MAP[i4] as usize]
}
4 => {
symbols[ks][FT4_GRAY_MAP[i1] as usize]
+ symbols[ks + 1][FT4_GRAY_MAP[i2] as usize]
+ symbols[ks + 2][FT4_GRAY_MAP[i3] as usize]
+ symbols[ks + 3][FT4_GRAY_MAP[i4] as usize]
}
_ => Complex32::new(0.0, 0.0),
};
s2[i] = sum.norm();
}
// Extract bit metrics: for each bit position, find max coherent
// magnitude with that bit set vs unset
let ipt = 2 * ks;
let ibmax: usize = match nsym {
1 => 1, 1 => 1,
2 => 3, 2 => 3,
4 => 7, 4 => 7,
_ => 0, _ => 0,
}; };
for ib in 0..=ibmax { let mut ks = 0;
let mut max_one = f32::NEG_INFINITY; while ks + nsym <= FT2_FRAME_SYMBOLS {
let mut max_zero = f32::NEG_INFINITY; let mut max_one = [f32::NEG_INFINITY; 8];
let mut max_zero = [f32::NEG_INFINITY; 8];
for i in 0..nt { for i in 0..nt {
if i < 256 { let sum = match nsym {
if one_mask[i][ibmax - ib] != 0 { 1 => self.symbols[ks][FT4_GRAY_MAP[i & 0x03] as usize],
if s2[i] > max_one { 2 => {
max_one = s2[i]; self.symbols[ks][FT4_GRAY_MAP[(i >> 2) & 0x03] as usize]
+ self.symbols[ks + 1][FT4_GRAY_MAP[i & 0x03] as usize]
} }
} else if s2[i] > max_zero { 4 => {
max_zero = s2[i]; self.symbols[ks][FT4_GRAY_MAP[(i >> 6) & 0x03] as usize]
+ self.symbols[ks + 1][FT4_GRAY_MAP[(i >> 4) & 0x03] as usize]
+ self.symbols[ks + 2][FT4_GRAY_MAP[(i >> 2) & 0x03] as usize]
+ self.symbols[ks + 3][FT4_GRAY_MAP[i & 0x03] as usize]
}
_ => Complex32::new(0.0, 0.0),
};
let coherent = sum.norm();
for ib in 0..=ibmax {
if ((i >> (ibmax - ib)) & 1) != 0 {
max_one[ib] = max_one[ib].max(coherent);
} else {
max_zero[ib] = max_zero[ib].max(coherent);
} }
} }
} }
let ipt = 2 * ks;
for ib in 0..=ibmax {
let metric_idx = ipt + ib; let metric_idx = ipt + ib;
if metric_idx >= n_metrics { if metric_idx < N_METRICS {
continue; metric[metric_idx] = max_one[ib] - max_zero[ib];
}
match nseq {
0 => metric1[metric_idx] = max_one - max_zero,
1 => metric2[metric_idx] = max_one - max_zero,
_ => metric4[metric_idx] = max_one - max_zero,
} }
} }
@@ -182,30 +158,49 @@ pub fn extract_bitmetrics_raw(
} }
// Patch boundary metrics where multi-symbol integration overruns // Patch boundary metrics where multi-symbol integration overruns
if n_metrics >= 206 { self.metric2[204] = self.metric1[204];
metric2[204] = metric1[204]; self.metric2[205] = self.metric1[205];
metric2[205] = metric1[205]; self.metric4[200] = self.metric2[200];
metric4[200] = metric2[200]; self.metric4[201] = self.metric2[201];
metric4[201] = metric2[201]; self.metric4[202] = self.metric2[202];
metric4[202] = metric2[202]; self.metric4[203] = self.metric2[203];
metric4[203] = metric2[203]; self.metric4[204] = self.metric1[204];
metric4[204] = metric1[204]; self.metric4[205] = self.metric1[205];
metric4[205] = metric1[205];
normalize_metric(&mut self.metric1);
normalize_metric(&mut self.metric2);
normalize_metric(&mut self.metric4);
for i in 0..N_METRICS {
self.bitmetrics[i][0] = self.metric1[i];
self.bitmetrics[i][1] = self.metric2[i];
self.bitmetrics[i][2] = self.metric4[i];
} }
// Normalize each metric scale independently Some(&self.bitmetrics)
normalize_metric(&mut metric1);
normalize_metric(&mut metric2);
normalize_metric(&mut metric4);
// Pack into output
for i in 0..n_metrics {
bitmetrics[i][0] = metric1[i];
bitmetrics[i][1] = metric2[i];
bitmetrics[i][2] = metric4[i];
} }
}
Some(bitmetrics) impl Default for BitMetricsWorkspace {
fn default() -> Self {
Self::new()
}
}
/// Extract bit metrics from the downsampled signal region.
///
/// Returns a 2D array of shape `[2 * FT2_FRAME_SYMBOLS][3]` where:
/// - Index 0: 1-symbol scale metric
/// - Index 1: 2-symbol scale metric
/// - Index 2: 4-symbol scale metric
///
/// Returns `None` if the sync quality is too poor (fewer than 4 of 16
/// Costas sync tones decoded correctly).
pub fn extract_bitmetrics_raw(signal: &[Complex32]) -> Option<Vec<[f32; 3]>> {
let mut workspace = BitMetricsWorkspace::new();
workspace
.extract(signal)
.map(|bitmetrics| bitmetrics.to_vec())
} }
/// Normalize a metric array by dividing by its standard deviation. /// Normalize a metric array by dividing by its standard deviation.
+63 -24
View File
@@ -14,6 +14,34 @@ use rustfft::FftPlanner;
use super::{FT2_NDOWN, FT2_SYMBOL_PERIOD_F}; use super::{FT2_NDOWN, FT2_SYMBOL_PERIOD_F};
/// Reusable scratch buffers for frequency-domain downsampling.
pub struct DownsampleWorkspace {
band: Vec<Complex32>,
ifft_scratch: Vec<Complex32>,
}
impl DownsampleWorkspace {
fn new(nfft2: usize, ifft_scratch_len: usize) -> Self {
Self {
band: vec![Complex32::new(0.0, 0.0); nfft2],
ifft_scratch: vec![Complex32::new(0.0, 0.0); ifft_scratch_len],
}
}
fn prepare(&mut self, nfft2: usize, ifft_scratch_len: usize) {
if self.band.len() != nfft2 {
self.band.resize(nfft2, Complex32::new(0.0, 0.0));
} else {
self.band.fill(Complex32::new(0.0, 0.0));
}
if self.ifft_scratch.len() != ifft_scratch_len {
self.ifft_scratch
.resize(ifft_scratch_len, Complex32::new(0.0, 0.0));
}
}
}
/// Downsample context holding precomputed FFT data and spectral window. /// Downsample context holding precomputed FFT data and spectral window.
pub struct DownsampleContext { pub struct DownsampleContext {
/// Number of raw samples. /// Number of raw samples.
@@ -28,8 +56,8 @@ pub struct DownsampleContext {
spectrum: Vec<Complex32>, spectrum: Vec<Complex32>,
/// IFFT plan for the downsampled length. /// IFFT plan for the downsampled length.
ifft: std::sync::Arc<dyn rustfft::Fft<f32>>, ifft: std::sync::Arc<dyn rustfft::Fft<f32>>,
/// Scratch buffer for IFFT. /// Scratch length required by the IFFT plan.
ifft_scratch: Vec<Complex32>, ifft_scratch_len: usize,
} }
impl DownsampleContext { impl DownsampleContext {
@@ -50,7 +78,11 @@ impl DownsampleContext {
let df = sample_rate / nraw as f32; let df = sample_rate / nraw as f32;
// Build spectral extraction window // Build spectral extraction window
let window = build_spectral_window(nfft2, df); let mut window = build_spectral_window(nfft2, df);
let inv_nfft2 = 1.0 / nfft2 as f32;
for coeff in &mut window {
*coeff *= inv_nfft2;
}
// Forward real FFT of raw audio // Forward real FFT of raw audio
let mut real_planner = realfft::RealFftPlanner::<f32>::new(); let mut real_planner = realfft::RealFftPlanner::<f32>::new();
@@ -59,11 +91,7 @@ impl DownsampleContext {
let mut output = fft.make_output_vec(); let mut output = fft.make_output_vec();
let mut scratch = fft.make_scratch_vec(); let mut scratch = fft.make_scratch_vec();
for (i, s) in raw_audio.iter().enumerate() { input.copy_from_slice(raw_audio);
if i < input.len() {
input[i] = *s;
}
}
fft.process_with_scratch(&mut input, &mut output, &mut scratch) fft.process_with_scratch(&mut input, &mut output, &mut scratch)
.ok()?; .ok()?;
@@ -72,7 +100,7 @@ impl DownsampleContext {
// IFFT plan for downsampled length // IFFT plan for downsampled length
let mut planner = FftPlanner::<f32>::new(); let mut planner = FftPlanner::<f32>::new();
let ifft = planner.plan_fft_inverse(nfft2); let ifft = planner.plan_fft_inverse(nfft2);
let ifft_scratch = vec![Complex32::new(0.0, 0.0); ifft.get_inplace_scratch_len()]; let ifft_scratch_len = ifft.get_inplace_scratch_len();
Some(Self { Some(Self {
nraw, nraw,
@@ -81,7 +109,7 @@ impl DownsampleContext {
window, window,
spectrum, spectrum,
ifft, ifft,
ifft_scratch, ifft_scratch_len,
}) })
} }
@@ -90,15 +118,31 @@ impl DownsampleContext {
self.nfft2 self.nfft2
} }
/// Create reusable buffers for repeated downsampling with this context.
pub fn workspace(&self) -> DownsampleWorkspace {
DownsampleWorkspace::new(self.nfft2, self.ifft_scratch_len)
}
/// Downsample the raw audio around `freq_hz`, writing complex baseband /// Downsample the raw audio around `freq_hz`, writing complex baseband
/// samples into `out`. Returns the number of samples produced. /// samples into `out`. Returns the number of samples produced.
pub fn downsample(&self, freq_hz: f32, out: &mut [Complex32]) -> usize { pub fn downsample(&self, freq_hz: f32, out: &mut [Complex32]) -> usize {
let mut workspace = self.workspace();
self.downsample_with_workspace(freq_hz, out, &mut workspace)
}
/// Downsample the raw audio using reusable scratch buffers.
pub fn downsample_with_workspace(
&self,
freq_hz: f32,
out: &mut [Complex32],
workspace: &mut DownsampleWorkspace,
) -> usize {
if out.len() < self.nfft2 { if out.len() < self.nfft2 {
return 0; return 0;
} }
// Working band buffer workspace.prepare(self.nfft2, self.ifft_scratch_len);
let mut band = vec![Complex32::new(0.0, 0.0); self.nfft2]; let band = &mut workspace.band;
let i0 = (freq_hz / self.df).round() as i32; let i0 = (freq_hz / self.df).round() as i32;
let half_nraw = (self.nraw / 2) as i32; let half_nraw = (self.nraw / 2) as i32;
@@ -119,21 +163,16 @@ impl DownsampleContext {
} }
} }
// Apply spectral window and scale // Apply spectral window
let inv_nfft2 = 1.0 / self.nfft2 as f32;
for i in 0..self.nfft2 { for i in 0..self.nfft2 {
band[i] = Complex32::new( band[i] *= self.window[i];
band[i].re * self.window[i] * inv_nfft2,
band[i].im * self.window[i] * inv_nfft2,
);
} }
// Inverse FFT (in-place) // Inverse FFT (in-place)
let mut scratch = self.ifft_scratch.clone();
self.ifft self.ifft
.process_with_scratch(&mut band, &mut scratch); .process_with_scratch(band, &mut workspace.ifft_scratch);
out[..self.nfft2].copy_from_slice(&band); out[..self.nfft2].copy_from_slice(band);
self.nfft2 self.nfft2
} }
} }
@@ -167,13 +206,13 @@ fn build_spectral_window(nfft2: usize, df: f32) -> Vec<f32> {
// Raised-cosine trailing edge // Raised-cosine trailing edge
for i in (iwt + iwf)..(2 * iwt + iwf).min(nfft2) { for i in (iwt + iwf)..(2 * iwt + iwf).min(nfft2) {
window[i] = 0.5 * (1.0 + (std::f32::consts::PI * (i - (iwt + iwf)) as f32 / iwt as f32).cos()); window[i] =
0.5 * (1.0 + (std::f32::consts::PI * (i - (iwt + iwf)) as f32 / iwt as f32).cos());
} }
// Circular shift by iws bins // Circular shift by iws bins
if iws > 0 && iws < nfft2 { if iws > 0 && iws < nfft2 {
let shifted: Vec<f32> = (0..nfft2).map(|i| window[(i + iws) % nfft2]).collect(); window.rotate_left(iws);
window.copy_from_slice(&shifted);
} }
window window
+203 -82
View File
@@ -14,6 +14,7 @@ pub mod osd;
pub mod sync; pub mod sync;
use num_complex::Complex32; use num_complex::Complex32;
use realfft::RealFftPlanner;
use crate::constants::FT4_XOR_SEQUENCE; use crate::constants::FT4_XOR_SEQUENCE;
use crate::crc::{ftx_compute_crc, ftx_extract_crc}; use crate::crc::{ftx_compute_crc, ftx_extract_crc};
@@ -21,8 +22,9 @@ use crate::decode::{pack_bits, FtxMessage};
use crate::ldpc; use crate::ldpc;
use crate::protocol::*; use crate::protocol::*;
use downsample::DownsampleContext; use bitmetrics::BitMetricsWorkspace;
use sync::{prepare_sync_waveforms, sync2d_score}; use downsample::{DownsampleContext, DownsampleWorkspace};
use sync::{prepare_sync_waveforms, sync2d_score, SyncWaveforms};
// FT2 DSP constants // FT2 DSP constants
pub const FT2_NDOWN: usize = 9; pub const FT2_NDOWN: usize = 9;
@@ -119,6 +121,62 @@ pub struct Ft2Pipeline {
sample_rate: f32, sample_rate: f32,
raw_audio: Vec<f32>, raw_audio: Vec<f32>,
raw_capacity: usize, raw_capacity: usize,
waveforms: SyncWaveforms,
peak_search: PeakSearchWorkspace,
}
struct Ft2DecodeWorkspace {
downsample: DownsampleWorkspace,
downsample_a: Vec<Complex32>,
downsample_b: Vec<Complex32>,
signal: Vec<Complex32>,
bitmetrics: BitMetricsWorkspace,
}
impl Ft2DecodeWorkspace {
fn new(ctx: &DownsampleContext) -> Self {
let nfft2 = ctx.nfft2();
Self {
downsample: ctx.workspace(),
downsample_a: vec![Complex32::new(0.0, 0.0); nfft2],
downsample_b: vec![Complex32::new(0.0, 0.0); nfft2],
signal: vec![Complex32::new(0.0, 0.0); FT2_FRAME_SAMPLES],
bitmetrics: BitMetricsWorkspace::new(),
}
}
}
struct PeakSearchWorkspace {
window: Vec<f32>,
fft: std::sync::Arc<dyn realfft::RealToComplex<f32>>,
fft_input: Vec<f32>,
fft_output: Vec<Complex32>,
fft_scratch: Vec<Complex32>,
avg: Vec<f32>,
smooth: Vec<f32>,
baseline: Vec<f32>,
}
impl PeakSearchWorkspace {
fn new() -> Self {
let window = nuttall_window(FT2_NFFT1);
let mut planner = RealFftPlanner::<f32>::new();
let fft = planner.plan_fft_forward(FT2_NFFT1);
let fft_input = fft.make_input_vec();
let fft_output = fft.make_output_vec();
let fft_scratch = fft.make_scratch_vec();
Self {
window,
fft,
fft_input,
fft_output,
fft_scratch,
avg: vec![0.0; FT2_NH1],
smooth: vec![0.0; FT2_NH1],
baseline: vec![0.0; FT2_NH1],
}
}
} }
impl Ft2Pipeline { impl Ft2Pipeline {
@@ -128,6 +186,8 @@ impl Ft2Pipeline {
sample_rate: sample_rate as f32, sample_rate: sample_rate as f32,
raw_audio: Vec::with_capacity(FT2_NMAX), raw_audio: Vec::with_capacity(FT2_NMAX),
raw_capacity: FT2_NMAX, raw_capacity: FT2_NMAX,
waveforms: prepare_sync_waveforms(),
peak_search: PeakSearchWorkspace::new(),
} }
} }
@@ -157,7 +217,7 @@ impl Ft2Pipeline {
} }
/// Run the full FT2 decode pipeline. Returns decoded messages. /// Run the full FT2 decode pipeline. Returns decoded messages.
pub fn decode(&self, max_results: usize) -> Vec<Ft2DecodeResult> { pub fn decode(&mut self, max_results: usize) -> Vec<Ft2DecodeResult> {
if self.raw_audio.len() < FT2_NFFT1 { if self.raw_audio.len() < FT2_NFFT1 {
return Vec::new(); return Vec::new();
} }
@@ -167,7 +227,8 @@ impl Ft2Pipeline {
None => return Vec::new(), None => return Vec::new(),
}; };
let hits = self.find_scan_hits(&ctx); let mut workspace = Ft2DecodeWorkspace::new(&ctx);
let hits = self.find_scan_hits(&ctx, &mut workspace);
if hits.is_empty() { if hits.is_empty() {
return Vec::new(); return Vec::new();
} }
@@ -179,11 +240,11 @@ impl Ft2Pipeline {
if results.len() >= max_results { if results.len() >= max_results {
break; break;
} }
if let Some(result) = self.decode_hit(&ctx, hit) { if let Some(result) = self.decode_hit(&ctx, hit, &mut workspace) {
// Dedup // Dedup
let dominated = seen_hashes.iter().any(|(h, p)| { let dominated = seen_hashes
*h == result.message.hash && *p == result.message.payload .iter()
}); .any(|(h, p)| *h == result.message.hash && *p == result.message.payload);
if dominated { if dominated {
continue; continue;
} }
@@ -196,7 +257,7 @@ impl Ft2Pipeline {
} }
/// Find frequency peaks from averaged power spectrum. /// Find frequency peaks from averaged power spectrum.
fn find_frequency_peaks(&self) -> Vec<RawCandidate> { fn find_frequency_peaks(&mut self) -> Vec<RawCandidate> {
if self.raw_audio.len() < FT2_NFFT1 { if self.raw_audio.len() < FT2_NFFT1 {
return Vec::new(); return Vec::new();
} }
@@ -204,65 +265,68 @@ impl Ft2Pipeline {
let fs = self.sample_rate; let fs = self.sample_rate;
let df = fs / FT2_NFFT1 as f32; let df = fs / FT2_NFFT1 as f32;
let n_frames = 1 + (self.raw_audio.len() - FT2_NFFT1) / FT2_NSTEP; let n_frames = 1 + (self.raw_audio.len() - FT2_NFFT1) / FT2_NSTEP;
let PeakSearchWorkspace {
window,
fft,
fft_input,
fft_output,
fft_scratch,
avg,
smooth,
baseline,
} = &mut self.peak_search;
// Compute Nuttall window avg.fill(0.0);
let window = nuttall_window(FT2_NFFT1); smooth.fill(0.0);
baseline.fill(0.0);
// Forward real FFT setup
let mut real_planner = realfft::RealFftPlanner::<f32>::new();
let fft = real_planner.plan_fft_forward(FT2_NFFT1);
let mut fft_input = fft.make_input_vec();
let mut fft_output = fft.make_output_vec();
let mut fft_scratch = fft.make_scratch_vec();
// Average power spectrum across frames
let mut avg = vec![0.0f32; FT2_NH1];
for frame in 0..n_frames { for frame in 0..n_frames {
let start = frame * FT2_NSTEP; let start = frame * FT2_NSTEP;
for i in 0..FT2_NFFT1 { let input = &self.raw_audio[start..(start + FT2_NFFT1)];
fft_input[i] = self.raw_audio[start + i] * window[i]; for (dst, (&sample, &coeff)) in
fft_input.iter_mut().zip(input.iter().zip(window.iter()))
{
*dst = sample * coeff;
} }
fft.process_with_scratch(&mut fft_input, &mut fft_output, &mut fft_scratch) fft.process_with_scratch(fft_input, fft_output, fft_scratch)
.expect("FFT failed"); .expect("FFT failed");
for bin in 1..FT2_NH1 { for (bin, c) in fft_output.iter().enumerate().take(FT2_NH1).skip(1) {
if bin < fft_output.len() { avg[bin] += c.norm_sqr();
let c = fft_output[bin];
let power = c.re * c.re + c.im * c.im;
avg[bin] += power;
}
} }
} }
let inv_n_frames = 1.0 / n_frames as f32;
for bin in 1..FT2_NH1 { for bin in 1..FT2_NH1 {
avg[bin] /= n_frames as f32; avg[bin] *= inv_n_frames;
} }
// Smooth with 15-point moving average // Smooth with 15-point moving average
let mut smooth = vec![0.0f32; FT2_NH1]; if FT2_NH1 > 16 {
let mut sum: f32 = avg[1..16].iter().sum();
for bin in 8..FT2_NH1.saturating_sub(8) { for bin in 8..FT2_NH1.saturating_sub(8) {
let mut sum = 0.0f32;
for i in (bin.saturating_sub(7))..=(bin + 7).min(FT2_NH1 - 1) {
sum += avg[i];
}
smooth[bin] = sum / 15.0; smooth[bin] = sum / 15.0;
if bin + 8 < FT2_NH1 {
sum += avg[bin + 8] - avg[bin - 7];
}
}
} }
// Baseline with 63-point moving average // Baseline with 63-point moving average
let mut baseline = vec![0.0f32; FT2_NH1]; if FT2_NH1 > 64 {
let mut sum: f32 = smooth[1..64].iter().sum();
for bin in 32..FT2_NH1.saturating_sub(32) { for bin in 32..FT2_NH1.saturating_sub(32) {
let mut sum = 0.0f32;
for i in (bin.saturating_sub(31))..=(bin + 31).min(FT2_NH1 - 1) {
sum += smooth[i];
}
baseline[bin] = sum / 63.0 + 1e-9; baseline[bin] = sum / 63.0 + 1e-9;
if bin + 32 < FT2_NH1 {
sum += smooth[bin + 32] - smooth[bin - 31];
}
}
} }
// Find peaks // Find peaks
let min_bin = (200.0 / df).round() as usize; let min_bin = (200.0 / df).round() as usize;
let max_bin = (4910.0 / df).round() as usize; let max_bin = (4910.0 / df).round() as usize;
let mut candidates = Vec::new(); let mut candidates = Vec::with_capacity(FT2_MAX_RAW_CANDIDATES);
let mut bin = min_bin + 1; let mut bin = min_bin + 1;
while bin < max_bin.saturating_sub(1) && candidates.len() < FT2_MAX_RAW_CANDIDATES { while bin < max_bin.saturating_sub(1) && candidates.len() < FT2_MAX_RAW_CANDIDATES {
@@ -309,20 +373,25 @@ impl Ft2Pipeline {
} }
// Sort by score descending // Sort by score descending
candidates.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal)); candidates.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
candidates candidates
} }
/// Find scan hits by downsampling each frequency peak and computing sync scores. /// Find scan hits by downsampling each frequency peak and computing sync scores.
fn find_scan_hits(&self, ctx: &DownsampleContext) -> Vec<ScanHit> { fn find_scan_hits(
&mut self,
ctx: &DownsampleContext,
workspace: &mut Ft2DecodeWorkspace,
) -> Vec<ScanHit> {
let peaks = self.find_frequency_peaks(); let peaks = self.find_frequency_peaks();
if peaks.is_empty() { if peaks.is_empty() {
return Vec::new(); return Vec::new();
} }
let nfft2 = ctx.nfft2();
let waveforms = prepare_sync_waveforms();
let mut hits = Vec::new(); let mut hits = Vec::new();
for peak in &peaks { for peak in &peaks {
@@ -330,12 +399,15 @@ impl Ft2Pipeline {
break; break;
} }
let mut down = vec![Complex32::new(0.0, 0.0); nfft2]; let produced = ctx.downsample_with_workspace(
let produced = ctx.downsample(peak.freq_hz, &mut down); peak.freq_hz,
&mut workspace.downsample_a,
&mut workspace.downsample,
);
if produced == 0 { if produced == 0 {
continue; continue;
} }
normalize_downsampled(&mut down[..produced], produced); normalize_downsampled(&mut workspace.downsample_a[..produced], produced);
// Coarse search // Coarse search
let mut best_score: f32 = -1.0; let mut best_score: f32 = -1.0;
@@ -347,10 +419,10 @@ impl Ft2Pipeline {
let mut start = -688i32; let mut start = -688i32;
while start <= 2024 { while start <= 2024 {
let score = sync2d_score( let score = sync2d_score(
&down[..produced], &workspace.downsample_a[..produced],
start, start,
idf, idf,
&waveforms, &self.waveforms,
); );
if score > best_score { if score > best_score {
best_score = score; best_score = score;
@@ -373,10 +445,10 @@ impl Ft2Pipeline {
} }
for start in (best_start - 5)..=(best_start + 5) { for start in (best_start - 5)..=(best_start + 5) {
let score = sync2d_score( let score = sync2d_score(
&down[..produced], &workspace.downsample_a[..produced],
start, start,
idf, idf,
&waveforms, &self.waveforms,
); );
if score > best_score { if score > best_score {
best_score = score; best_score = score;
@@ -409,17 +481,22 @@ impl Ft2Pipeline {
} }
/// Attempt to decode a single scan hit through the full pipeline. /// Attempt to decode a single scan hit through the full pipeline.
fn decode_hit(&self, ctx: &DownsampleContext, hit: &ScanHit) -> Option<Ft2DecodeResult> { fn decode_hit(
let nfft2 = ctx.nfft2(); &self,
let waveforms = prepare_sync_waveforms(); ctx: &DownsampleContext,
hit: &ScanHit,
workspace: &mut Ft2DecodeWorkspace,
) -> Option<Ft2DecodeResult> {
// Initial downsample for sync refinement // Initial downsample for sync refinement
let mut cd2 = vec![Complex32::new(0.0, 0.0); nfft2]; let produced = ctx.downsample_with_workspace(
let produced = ctx.downsample(hit.freq_hz, &mut cd2); hit.freq_hz,
&mut workspace.downsample_a,
&mut workspace.downsample,
);
if produced == 0 { if produced == 0 {
return None; return None;
} }
normalize_downsampled(&mut cd2[..produced], produced); normalize_downsampled(&mut workspace.downsample_a[..produced], produced);
// Refine sync // Refine sync
let mut best_score: f32 = -1.0; let mut best_score: f32 = -1.0;
@@ -431,7 +508,12 @@ impl Ft2Pipeline {
continue; continue;
} }
for start in (hit.start - 5)..=(hit.start + 5) { for start in (hit.start - 5)..=(hit.start + 5) {
let score = sync2d_score(&cd2[..produced], start, idf, &waveforms); let score = sync2d_score(
&workspace.downsample_a[..produced],
start,
idf,
&self.waveforms,
);
if score > best_score { if score > best_score {
best_score = score; best_score = score;
best_start = start; best_start = start;
@@ -451,19 +533,25 @@ impl Ft2Pipeline {
} }
// Final downsample at corrected frequency // Final downsample at corrected frequency
let mut cb = vec![Complex32::new(0.0, 0.0); nfft2]; let produced2 = ctx.downsample_with_workspace(
let produced2 = ctx.downsample(corrected_freq_hz, &mut cb); corrected_freq_hz,
&mut workspace.downsample_b,
&mut workspace.downsample,
);
if produced2 == 0 { if produced2 == 0 {
return None; return None;
} }
normalize_downsampled(&mut cb[..produced2], FT2_FRAME_SAMPLES); normalize_downsampled(&mut workspace.downsample_b[..produced2], FT2_FRAME_SAMPLES);
// Extract signal region // Extract signal region
let mut signal = vec![Complex32::new(0.0, 0.0); FT2_FRAME_SAMPLES]; extract_signal_region(
extract_signal_region(&cb[..produced2], best_start, &mut signal); &workspace.downsample_b[..produced2],
best_start,
&mut workspace.signal,
);
// Extract bit metrics // Extract bit metrics
let bitmetrics = bitmetrics::extract_bitmetrics_raw(&signal)?; let bitmetrics = workspace.bitmetrics.extract(&workspace.signal)?;
// Sync quality check using known Costas bit patterns // Sync quality check using known Costas bit patterns
let sync_bits_a: [u8; 8] = [0, 0, 0, 1, 1, 0, 1, 1]; let sync_bits_a: [u8; 8] = [0, 0, 0, 1, 1, 0, 1, 1];
@@ -472,10 +560,26 @@ impl Ft2Pipeline {
let sync_bits_d: [u8; 8] = [1, 0, 1, 1, 0, 0, 0, 1]; let sync_bits_d: [u8; 8] = [1, 0, 1, 1, 0, 0, 0, 1];
let mut sync_qual = 0; let mut sync_qual = 0;
for i in 0..8 { for i in 0..8 {
sync_qual += if (bitmetrics[i][0] >= 0.0) as u8 == sync_bits_a[i] { 1 } else { 0 }; sync_qual += if (bitmetrics[i][0] >= 0.0) as u8 == sync_bits_a[i] {
sync_qual += if (bitmetrics[66 + i][0] >= 0.0) as u8 == sync_bits_b[i] { 1 } else { 0 }; 1
sync_qual += if (bitmetrics[132 + i][0] >= 0.0) as u8 == sync_bits_c[i] { 1 } else { 0 }; } else {
sync_qual += if (bitmetrics[198 + i][0] >= 0.0) as u8 == sync_bits_d[i] { 1 } else { 0 }; 0
};
sync_qual += if (bitmetrics[66 + i][0] >= 0.0) as u8 == sync_bits_b[i] {
1
} else {
0
};
sync_qual += if (bitmetrics[132 + i][0] >= 0.0) as u8 == sync_bits_c[i] {
1
} else {
0
};
sync_qual += if (bitmetrics[198 + i][0] >= 0.0) as u8 == sync_bits_d[i] {
1
} else {
0
};
} }
if sync_qual < 10 { if sync_qual < 10 {
return None; return None;
@@ -591,8 +695,18 @@ impl Ft2Pipeline {
} }
// Compute refined timing via parabolic interpolation // Compute refined timing via parabolic interpolation
let sm1 = sync2d_score(&cd2[..produced], best_start - 1, best_idf, &waveforms); let sm1 = sync2d_score(
let sp1 = sync2d_score(&cd2[..produced], best_start + 1, best_idf, &waveforms); &workspace.downsample_a[..produced],
best_start - 1,
best_idf,
&self.waveforms,
);
let sp1 = sync2d_score(
&workspace.downsample_a[..produced],
best_start + 1,
best_idf,
&self.waveforms,
);
let mut xstart = best_start as f32; let mut xstart = best_start as f32;
let den = sm1 - 2.0 * best_score + sp1; let den = sm1 - 2.0 * best_score + sp1;
if den.abs() > 1e-6 { if den.abs() > 1e-6 {
@@ -635,7 +749,11 @@ fn normalize_downsampled(samples: &mut [Complex32], ref_count: usize) {
if power <= 0.0 { if power <= 0.0 {
return; return;
} }
let rc = if ref_count == 0 { samples.len() } else { ref_count }; let rc = if ref_count == 0 {
samples.len()
} else {
ref_count
};
let scale = (rc as f32 / power).sqrt(); let scale = (rc as f32 / power).sqrt();
for s in samples.iter_mut() { for s in samples.iter_mut() {
*s *= scale; *s *= scale;
@@ -644,14 +762,17 @@ fn normalize_downsampled(samples: &mut [Complex32], ref_count: usize) {
/// Extract a signal region starting at `start` into `out_signal`. /// Extract a signal region starting at `start` into `out_signal`.
fn extract_signal_region(input: &[Complex32], start: i32, out_signal: &mut [Complex32]) { fn extract_signal_region(input: &[Complex32], start: i32, out_signal: &mut [Complex32]) {
for i in 0..out_signal.len() { out_signal.fill(Complex32::new(0.0, 0.0));
let src = start + i as i32;
out_signal[i] = if src >= 0 && (src as usize) < input.len() { let src_start = start.max(0) as usize;
input[src as usize] let dst_start = (-start).max(0) as usize;
} else { if dst_start >= out_signal.len() || src_start >= input.len() {
Complex32::new(0.0, 0.0) return;
};
} }
let copy_len = (input.len() - src_start).min(out_signal.len() - dst_start);
out_signal[dst_start..(dst_start + copy_len)]
.copy_from_slice(&input[src_start..(src_start + copy_len)]);
} }
/// Normalize LLR array (divide by standard deviation). /// Normalize LLR array (divide by standard deviation).
+92 -27
View File
@@ -9,6 +9,7 @@
//! reference across time and frequency offsets. //! reference across time and frequency offsets.
use num_complex::Complex32; use num_complex::Complex32;
use std::sync::OnceLock;
use crate::constants::FT4_COSTAS_PATTERN; use crate::constants::FT4_COSTAS_PATTERN;
@@ -16,6 +17,12 @@ use super::{FT2_NDOWN, FT2_NSS, FT2_SYMBOL_PERIOD_F, FT2_SYNC_TWEAK_MAX, FT2_SYN
/// Number of frequency tweak entries. /// Number of frequency tweak entries.
const NUM_TWEAKS: usize = (FT2_SYNC_TWEAK_MAX - FT2_SYNC_TWEAK_MIN) as usize + 1; const NUM_TWEAKS: usize = (FT2_SYNC_TWEAK_MAX - FT2_SYNC_TWEAK_MIN) as usize + 1;
const SYNC_GROUP_COUNT: usize = 4;
const SYNC_SAMPLES: usize = 64;
const SAMPLE_STRIDE: usize = 2;
const GROUP_STRIDE: i32 = 33 * FT2_NSS as i32;
const GROUP_LAST_SAMPLE_OFFSET: i32 = SAMPLE_STRIDE as i32 * (SYNC_SAMPLES as i32 - 1);
const FRAME_LAST_SAMPLE_OFFSET: i32 = 3 * GROUP_STRIDE + GROUP_LAST_SAMPLE_OFFSET;
/// Precomputed sync and frequency-tweak waveforms. /// Precomputed sync and frequency-tweak waveforms.
pub struct SyncWaveforms { pub struct SyncWaveforms {
@@ -73,6 +80,74 @@ pub fn prepare_sync_waveforms() -> SyncWaveforms {
} }
} }
type SyncReferenceBank = [[[Complex32; SYNC_SAMPLES]; SYNC_GROUP_COUNT]; NUM_TWEAKS];
fn sync_reference_bank() -> &'static SyncReferenceBank {
static REFS: OnceLock<SyncReferenceBank> = OnceLock::new();
REFS.get_or_init(|| {
let waveforms = prepare_sync_waveforms();
let mut refs = [[[Complex32::new(0.0, 0.0); SYNC_SAMPLES]; SYNC_GROUP_COUNT]; NUM_TWEAKS];
for tw_idx in 0..NUM_TWEAKS {
for group in 0..SYNC_GROUP_COUNT {
for i in 0..SYNC_SAMPLES {
refs[tw_idx][group][i] =
(waveforms.sync_wave[group][i] * waveforms.tweak_wave[tw_idx][i]).conj();
}
}
}
refs
})
}
#[inline(always)]
fn correlate_group_fast(
samples: &[Complex32],
pos: usize,
refs: &[Complex32; SYNC_SAMPLES],
) -> f32 {
let mut sum_re = 0.0f32;
let mut sum_im = 0.0f32;
for i in 0..SYNC_SAMPLES {
let sample = samples[pos + i * SAMPLE_STRIDE];
let reference = refs[i];
sum_re += sample.re * reference.re - sample.im * reference.im;
sum_im += sample.re * reference.im + sample.im * reference.re;
}
(sum_re * sum_re + sum_im * sum_im).sqrt()
}
#[inline(always)]
fn correlate_group_clipped(
samples: &[Complex32],
pos: i32,
refs: &[Complex32; SYNC_SAMPLES],
) -> (f32, usize) {
let mut sum_re = 0.0f32;
let mut sum_im = 0.0f32;
let mut usable = 0usize;
let n_samples = samples.len() as i32;
for i in 0..SYNC_SAMPLES {
let sample_idx = pos + i as i32 * SAMPLE_STRIDE as i32;
if sample_idx < 0 || sample_idx >= n_samples {
continue;
}
let sample = samples[sample_idx as usize];
let reference = refs[i];
sum_re += sample.re * reference.re - sample.im * reference.im;
sum_im += sample.re * reference.im + sample.im * reference.re;
usable += 1;
}
((sum_re * sum_re + sum_im * sum_im).sqrt(), usable)
}
/// Compute the 2D sync score for a given time offset and frequency tweak. /// Compute the 2D sync score for a given time offset and frequency tweak.
/// ///
/// Correlates the downsampled complex samples against the four Costas sync /// Correlates the downsampled complex samples against the four Costas sync
@@ -88,46 +163,36 @@ pub fn sync2d_score(
samples: &[Complex32], samples: &[Complex32],
start: i32, start: i32,
idf: i32, idf: i32,
waveforms: &SyncWaveforms, _waveforms: &SyncWaveforms,
) -> f32 { ) -> f32 {
let nss = FT2_NSS as i32;
let n_samples = samples.len() as i32; let n_samples = samples.len() as i32;
// The four sync groups are at symbol positions 0, 33, 66, 99 within the frame
let positions = [
start,
start + 33 * nss,
start + 66 * nss,
start + 99 * nss,
];
let tw_idx = (idf - FT2_SYNC_TWEAK_MIN) as usize; let tw_idx = (idf - FT2_SYNC_TWEAK_MIN) as usize;
if tw_idx >= waveforms.tweak_wave.len() { if tw_idx >= NUM_TWEAKS {
return 0.0; return 0.0;
} }
let tweak = &waveforms.tweak_wave[tw_idx];
let refs = &sync_reference_bank()[tw_idx];
let scale = 1.0 / (2.0 * FT2_NSS as f32);
let mut score = 0.0f32; let mut score = 0.0f32;
for group in 0..4 { if start >= 0 && start + FRAME_LAST_SAMPLE_OFFSET < n_samples {
let pos = positions[group]; for (group, refs_group) in refs.iter().enumerate() {
let mut sum = Complex32::new(0.0, 0.0); let pos = (start + group as i32 * GROUP_STRIDE) as usize;
let mut usable = 0; score += correlate_group_fast(samples, pos, refs_group) * scale;
}
return score;
}
for i in 0..64 { for (group, refs_group) in refs.iter().enumerate() {
let sample_idx = pos + 2 * i as i32; let pos = start + group as i32 * GROUP_STRIDE;
if sample_idx < 0 || sample_idx >= n_samples { if pos >= n_samples || pos + GROUP_LAST_SAMPLE_OFFSET < 0 {
continue; continue;
} }
// Correlate: multiply received sample by conjugate of
// (sync_reference * tweak_phasor)
let reference = waveforms.sync_wave[group][i] * tweak[i];
sum += samples[sample_idx as usize] * reference.conj();
usable += 1;
}
let (corr, usable) = correlate_group_clipped(samples, pos, refs_group);
if usable > 16 { if usable > 16 {
score += sum.norm() / (2.0 * FT2_NSS as f32); score += corr * scale;
} }
} }