[refactor](trx-ftx): optimize ft2 decode hot paths

Reuse FT2 downsample and bitmetric work buffers, speed up\nsync2d_score with precomputed references, and cache peak-search\nFFT state on the pipeline.\n\nCo-authored-by: OpenAI Codex <codex@openai.com>

Signed-off-by: Stan Grams <sjg@haxx.space>
This commit is contained in:
2026-03-18 23:08:42 +01:00
parent 7d20058c03
commit 0b28900082
5 changed files with 563 additions and 342 deletions
+19 -18
View File
@@ -176,8 +176,7 @@ impl Ft8Decoder {
/// Waterfall-based decode for FT8/FT4.
fn decode_waterfall(&mut self, max_results: usize) -> Vec<Ft8DecodeResult> {
let candidates =
ftx_find_candidates(&self.monitor.wf, MAX_CANDIDATES, MIN_CANDIDATE_SCORE);
let candidates = ftx_find_candidates(&self.monitor.wf, MAX_CANDIDATES, MIN_CANDIDATE_SCORE);
let mut results = Vec::new();
let mut seen: Vec<u16> = Vec::new();
@@ -209,13 +208,14 @@ impl Ft8Decoder {
// Compute time offset
let symbol_period = self.protocol.symbol_period();
let dt_s =
(cand.time_offset as f32 + cand.time_sub as f32 / self.monitor.wf.time_osr as f32)
* symbol_period
- 0.5;
let dt_s = (cand.time_offset as f32
+ cand.time_sub as f32 / self.monitor.wf.time_osr as f32)
* symbol_period
- 0.5;
// Compute frequency
let freq_hz = (self.monitor.min_bin as f32 + cand.freq_offset as f32
let freq_hz = (self.monitor.min_bin as f32
+ cand.freq_offset as f32
+ cand.freq_sub as f32 / self.monitor.wf.freq_osr as f32)
/ symbol_period;
@@ -232,16 +232,18 @@ impl Ft8Decoder {
/// FT2-specific decode pipeline.
fn decode_ft2(&mut self, max_results: usize) -> Vec<Ft8DecodeResult> {
let pipe = match self.ft2_pipeline.as_ref() {
Some(p) => p,
None => return Vec::new(),
let ft2_results = {
let pipe = match self.ft2_pipeline.as_mut() {
Some(p) => p,
None => return Vec::new(),
};
if !pipe.is_ready() {
return Vec::new();
}
pipe.decode(max_results)
};
if !pipe.is_ready() {
return Vec::new();
}
let ft2_results = pipe.decode(max_results);
let mut results = Vec::new();
for r in ft2_results {
@@ -267,8 +269,7 @@ impl Ft8Decoder {
payload: msg.payload,
hash: msg.hash as u32,
};
let (text, _offsets, _rc) =
message::ftx_message_decode(&m, &mut self.callsign_hash);
let (text, _offsets, _rc) = message::ftx_message_decode(&m, &mut self.callsign_hash);
if text.is_empty() {
return None;
}
+177 -182
View File
@@ -15,6 +15,178 @@ use crate::constants::{FT4_COSTAS_PATTERN, FT4_GRAY_MAP};
use super::{FT2_FRAME_SYMBOLS, FT2_NSS};
const N_METRICS: usize = 2 * FT2_FRAME_SYMBOLS;
/// Reusable FFT plans and scratch buffers for bit-metric extraction.
pub struct BitMetricsWorkspace {
fft: std::sync::Arc<dyn rustfft::Fft<f32>>,
scratch: Vec<Complex32>,
symbols: [[Complex32; 4]; FT2_FRAME_SYMBOLS],
s4: [[f32; 4]; FT2_FRAME_SYMBOLS],
metric1: [f32; N_METRICS],
metric2: [f32; N_METRICS],
metric4: [f32; N_METRICS],
bitmetrics: [[f32; 3]; N_METRICS],
csymb: [Complex32; FT2_NSS],
}
impl BitMetricsWorkspace {
pub fn new() -> Self {
let mut planner = FftPlanner::<f32>::new();
let fft = planner.plan_fft_forward(FT2_NSS);
let scratch = vec![Complex32::new(0.0, 0.0); fft.get_inplace_scratch_len()];
Self {
fft,
scratch,
symbols: [[Complex32::new(0.0, 0.0); 4]; FT2_FRAME_SYMBOLS],
s4: [[0.0; 4]; FT2_FRAME_SYMBOLS],
metric1: [0.0; N_METRICS],
metric2: [0.0; N_METRICS],
metric4: [0.0; N_METRICS],
bitmetrics: [[0.0; 3]; N_METRICS],
csymb: [Complex32::new(0.0, 0.0); FT2_NSS],
}
}
/// Extract bit metrics into a reusable internal buffer.
pub fn extract<'a>(&'a mut self, signal: &[Complex32]) -> Option<&'a [[f32; 3]]> {
self.metric1.fill(0.0);
self.metric2.fill(0.0);
self.metric4.fill(0.0);
for sym in 0..FT2_FRAME_SYMBOLS {
let offset = sym * FT2_NSS;
if offset + FT2_NSS <= signal.len() {
self.csymb
.copy_from_slice(&signal[offset..(offset + FT2_NSS)]);
} else {
self.csymb.fill(Complex32::new(0.0, 0.0));
let remaining = signal.len().saturating_sub(offset);
self.csymb[..remaining].copy_from_slice(&signal[offset..(offset + remaining)]);
}
self.fft
.process_with_scratch(&mut self.csymb, &mut self.scratch);
for tone in 0..4 {
let symbol = self.csymb[tone];
self.symbols[sym][tone] = symbol;
self.s4[sym][tone] = symbol.norm();
}
}
// Sync quality check: verify Costas patterns are detectable
let mut sync_ok = 0;
for group in 0..4 {
let base = group * 33;
for i in 0..4 {
if base + i >= FT2_FRAME_SYMBOLS {
continue;
}
let mut best = 0;
for tone in 1..4 {
if self.s4[base + i][tone] > self.s4[base + i][best] {
best = tone;
}
}
if best == FT4_COSTAS_PATTERN[group][i] as usize {
sync_ok += 1;
}
}
}
if sync_ok < 4 {
return None;
}
for nseq in 0..3 {
let (nsym, metric): (usize, &mut [f32; N_METRICS]) = match nseq {
0 => (1, &mut self.metric1),
1 => (2, &mut self.metric2),
_ => (4, &mut self.metric4),
};
let nt = 1usize << (2 * nsym);
let ibmax = match nsym {
1 => 1,
2 => 3,
4 => 7,
_ => 0,
};
let mut ks = 0;
while ks + nsym <= FT2_FRAME_SYMBOLS {
let mut max_one = [f32::NEG_INFINITY; 8];
let mut max_zero = [f32::NEG_INFINITY; 8];
for i in 0..nt {
let sum = match nsym {
1 => self.symbols[ks][FT4_GRAY_MAP[i & 0x03] as usize],
2 => {
self.symbols[ks][FT4_GRAY_MAP[(i >> 2) & 0x03] as usize]
+ self.symbols[ks + 1][FT4_GRAY_MAP[i & 0x03] as usize]
}
4 => {
self.symbols[ks][FT4_GRAY_MAP[(i >> 6) & 0x03] as usize]
+ self.symbols[ks + 1][FT4_GRAY_MAP[(i >> 4) & 0x03] as usize]
+ self.symbols[ks + 2][FT4_GRAY_MAP[(i >> 2) & 0x03] as usize]
+ self.symbols[ks + 3][FT4_GRAY_MAP[i & 0x03] as usize]
}
_ => Complex32::new(0.0, 0.0),
};
let coherent = sum.norm();
for ib in 0..=ibmax {
if ((i >> (ibmax - ib)) & 1) != 0 {
max_one[ib] = max_one[ib].max(coherent);
} else {
max_zero[ib] = max_zero[ib].max(coherent);
}
}
}
let ipt = 2 * ks;
for ib in 0..=ibmax {
let metric_idx = ipt + ib;
if metric_idx < N_METRICS {
metric[metric_idx] = max_one[ib] - max_zero[ib];
}
}
ks += nsym;
}
}
// Patch boundary metrics where multi-symbol integration overruns
self.metric2[204] = self.metric1[204];
self.metric2[205] = self.metric1[205];
self.metric4[200] = self.metric2[200];
self.metric4[201] = self.metric2[201];
self.metric4[202] = self.metric2[202];
self.metric4[203] = self.metric2[203];
self.metric4[204] = self.metric1[204];
self.metric4[205] = self.metric1[205];
normalize_metric(&mut self.metric1);
normalize_metric(&mut self.metric2);
normalize_metric(&mut self.metric4);
for i in 0..N_METRICS {
self.bitmetrics[i][0] = self.metric1[i];
self.bitmetrics[i][1] = self.metric2[i];
self.bitmetrics[i][2] = self.metric4[i];
}
Some(&self.bitmetrics)
}
}
impl Default for BitMetricsWorkspace {
fn default() -> Self {
Self::new()
}
}
/// Extract bit metrics from the downsampled signal region.
///
/// Returns a 2D array of shape `[2 * FT2_FRAME_SYMBOLS][3]` where:
@@ -24,188 +196,11 @@ use super::{FT2_FRAME_SYMBOLS, FT2_NSS};
///
/// Returns `None` if the sync quality is too poor (fewer than 4 of 16
/// Costas sync tones decoded correctly).
pub fn extract_bitmetrics_raw(
signal: &[Complex32],
) -> Option<Vec<[f32; 3]>> {
let n_metrics = 2 * FT2_FRAME_SYMBOLS;
let mut bitmetrics = vec![[0.0f32; 3]; n_metrics];
// Per-symbol FFT to extract complex tone amplitudes
let mut planner = FftPlanner::<f32>::new();
let fft = planner.plan_fft_forward(FT2_NSS);
let fft_scratch_len = fft.get_inplace_scratch_len();
let mut scratch = vec![Complex32::new(0.0, 0.0); fft_scratch_len];
// Complex symbols for each of the 4 tones at each frame symbol
let mut symbols = vec![[Complex32::new(0.0, 0.0); 4]; FT2_FRAME_SYMBOLS];
// Magnitude for each tone at each symbol
let mut s4 = vec![[0.0f32; 4]; FT2_FRAME_SYMBOLS];
for sym in 0..FT2_FRAME_SYMBOLS {
let offset = sym * FT2_NSS;
let mut csymb: Vec<Complex32> = (0..FT2_NSS)
.map(|i| {
if offset + i < signal.len() {
signal[offset + i]
} else {
Complex32::new(0.0, 0.0)
}
})
.collect();
fft.process_with_scratch(&mut csymb, &mut scratch);
for tone in 0..4 {
if tone < csymb.len() {
symbols[sym][tone] = csymb[tone];
s4[sym][tone] = csymb[tone].norm();
}
}
}
// Sync quality check: verify Costas patterns are detectable
let mut sync_ok = 0;
for group in 0..4 {
let base = group * 33;
for i in 0..4 {
if base + i >= FT2_FRAME_SYMBOLS {
continue;
}
let mut best = 0;
for tone in 1..4 {
if s4[base + i][tone] > s4[base + i][best] {
best = tone;
}
}
if best == FT4_COSTAS_PATTERN[group][i] as usize {
sync_ok += 1;
}
}
}
if sync_ok < 4 {
return None;
}
// Precompute one_mask: for each integer 0..255 and bit position 0..7,
// whether that bit is set.
let one_mask: Vec<[u8; 8]> = (0..256u16)
.map(|i| {
let mut m = [0u8; 8];
for j in 0..8 {
m[j] = if (i & (1 << j)) != 0 { 1 } else { 0 };
}
m
})
.collect();
// Compute metrics at three scales
let mut metric1 = vec![0.0f32; n_metrics];
let mut metric2 = vec![0.0f32; n_metrics];
let mut metric4 = vec![0.0f32; n_metrics];
for nseq in 0..3 {
let nsym = match nseq {
0 => 1,
1 => 2,
_ => 4,
};
let nt = 1 << (2 * nsym); // number of tone sequences to enumerate
let mut ks = 0;
while ks + nsym <= FT2_FRAME_SYMBOLS {
// Compute coherent magnitude for each possible tone sequence
let mut s2 = vec![0.0f32; nt];
for i in 0..nt {
let i1 = i / 64;
let i2 = (i & 63) / 16;
let i3 = (i & 15) / 4;
let i4 = i & 3;
let sum = match nsym {
1 => symbols[ks][FT4_GRAY_MAP[i4] as usize],
2 => {
symbols[ks][FT4_GRAY_MAP[i3] as usize]
+ symbols[ks + 1][FT4_GRAY_MAP[i4] as usize]
}
4 => {
symbols[ks][FT4_GRAY_MAP[i1] as usize]
+ symbols[ks + 1][FT4_GRAY_MAP[i2] as usize]
+ symbols[ks + 2][FT4_GRAY_MAP[i3] as usize]
+ symbols[ks + 3][FT4_GRAY_MAP[i4] as usize]
}
_ => Complex32::new(0.0, 0.0),
};
s2[i] = sum.norm();
}
// Extract bit metrics: for each bit position, find max coherent
// magnitude with that bit set vs unset
let ipt = 2 * ks;
let ibmax: usize = match nsym {
1 => 1,
2 => 3,
4 => 7,
_ => 0,
};
for ib in 0..=ibmax {
let mut max_one = f32::NEG_INFINITY;
let mut max_zero = f32::NEG_INFINITY;
for i in 0..nt {
if i < 256 {
if one_mask[i][ibmax - ib] != 0 {
if s2[i] > max_one {
max_one = s2[i];
}
} else if s2[i] > max_zero {
max_zero = s2[i];
}
}
}
let metric_idx = ipt + ib;
if metric_idx >= n_metrics {
continue;
}
match nseq {
0 => metric1[metric_idx] = max_one - max_zero,
1 => metric2[metric_idx] = max_one - max_zero,
_ => metric4[metric_idx] = max_one - max_zero,
}
}
ks += nsym;
}
}
// Patch boundary metrics where multi-symbol integration overruns
if n_metrics >= 206 {
metric2[204] = metric1[204];
metric2[205] = metric1[205];
metric4[200] = metric2[200];
metric4[201] = metric2[201];
metric4[202] = metric2[202];
metric4[203] = metric2[203];
metric4[204] = metric1[204];
metric4[205] = metric1[205];
}
// Normalize each metric scale independently
normalize_metric(&mut metric1);
normalize_metric(&mut metric2);
normalize_metric(&mut metric4);
// Pack into output
for i in 0..n_metrics {
bitmetrics[i][0] = metric1[i];
bitmetrics[i][1] = metric2[i];
bitmetrics[i][2] = metric4[i];
}
Some(bitmetrics)
pub fn extract_bitmetrics_raw(signal: &[Complex32]) -> Option<Vec<[f32; 3]>> {
let mut workspace = BitMetricsWorkspace::new();
workspace
.extract(signal)
.map(|bitmetrics| bitmetrics.to_vec())
}
/// Normalize a metric array by dividing by its standard deviation.
+63 -24
View File
@@ -14,6 +14,34 @@ use rustfft::FftPlanner;
use super::{FT2_NDOWN, FT2_SYMBOL_PERIOD_F};
/// Reusable scratch buffers for frequency-domain downsampling.
pub struct DownsampleWorkspace {
band: Vec<Complex32>,
ifft_scratch: Vec<Complex32>,
}
impl DownsampleWorkspace {
fn new(nfft2: usize, ifft_scratch_len: usize) -> Self {
Self {
band: vec![Complex32::new(0.0, 0.0); nfft2],
ifft_scratch: vec![Complex32::new(0.0, 0.0); ifft_scratch_len],
}
}
fn prepare(&mut self, nfft2: usize, ifft_scratch_len: usize) {
if self.band.len() != nfft2 {
self.band.resize(nfft2, Complex32::new(0.0, 0.0));
} else {
self.band.fill(Complex32::new(0.0, 0.0));
}
if self.ifft_scratch.len() != ifft_scratch_len {
self.ifft_scratch
.resize(ifft_scratch_len, Complex32::new(0.0, 0.0));
}
}
}
/// Downsample context holding precomputed FFT data and spectral window.
pub struct DownsampleContext {
/// Number of raw samples.
@@ -28,8 +56,8 @@ pub struct DownsampleContext {
spectrum: Vec<Complex32>,
/// IFFT plan for the downsampled length.
ifft: std::sync::Arc<dyn rustfft::Fft<f32>>,
/// Scratch buffer for IFFT.
ifft_scratch: Vec<Complex32>,
/// Scratch length required by the IFFT plan.
ifft_scratch_len: usize,
}
impl DownsampleContext {
@@ -50,7 +78,11 @@ impl DownsampleContext {
let df = sample_rate / nraw as f32;
// Build spectral extraction window
let window = build_spectral_window(nfft2, df);
let mut window = build_spectral_window(nfft2, df);
let inv_nfft2 = 1.0 / nfft2 as f32;
for coeff in &mut window {
*coeff *= inv_nfft2;
}
// Forward real FFT of raw audio
let mut real_planner = realfft::RealFftPlanner::<f32>::new();
@@ -59,11 +91,7 @@ impl DownsampleContext {
let mut output = fft.make_output_vec();
let mut scratch = fft.make_scratch_vec();
for (i, s) in raw_audio.iter().enumerate() {
if i < input.len() {
input[i] = *s;
}
}
input.copy_from_slice(raw_audio);
fft.process_with_scratch(&mut input, &mut output, &mut scratch)
.ok()?;
@@ -72,7 +100,7 @@ impl DownsampleContext {
// IFFT plan for downsampled length
let mut planner = FftPlanner::<f32>::new();
let ifft = planner.plan_fft_inverse(nfft2);
let ifft_scratch = vec![Complex32::new(0.0, 0.0); ifft.get_inplace_scratch_len()];
let ifft_scratch_len = ifft.get_inplace_scratch_len();
Some(Self {
nraw,
@@ -81,7 +109,7 @@ impl DownsampleContext {
window,
spectrum,
ifft,
ifft_scratch,
ifft_scratch_len,
})
}
@@ -90,15 +118,31 @@ impl DownsampleContext {
self.nfft2
}
/// Create reusable buffers for repeated downsampling with this context.
pub fn workspace(&self) -> DownsampleWorkspace {
DownsampleWorkspace::new(self.nfft2, self.ifft_scratch_len)
}
/// Downsample the raw audio around `freq_hz`, writing complex baseband
/// samples into `out`. Returns the number of samples produced.
pub fn downsample(&self, freq_hz: f32, out: &mut [Complex32]) -> usize {
let mut workspace = self.workspace();
self.downsample_with_workspace(freq_hz, out, &mut workspace)
}
/// Downsample the raw audio using reusable scratch buffers.
pub fn downsample_with_workspace(
&self,
freq_hz: f32,
out: &mut [Complex32],
workspace: &mut DownsampleWorkspace,
) -> usize {
if out.len() < self.nfft2 {
return 0;
}
// Working band buffer
let mut band = vec![Complex32::new(0.0, 0.0); self.nfft2];
workspace.prepare(self.nfft2, self.ifft_scratch_len);
let band = &mut workspace.band;
let i0 = (freq_hz / self.df).round() as i32;
let half_nraw = (self.nraw / 2) as i32;
@@ -119,21 +163,16 @@ impl DownsampleContext {
}
}
// Apply spectral window and scale
let inv_nfft2 = 1.0 / self.nfft2 as f32;
// Apply spectral window
for i in 0..self.nfft2 {
band[i] = Complex32::new(
band[i].re * self.window[i] * inv_nfft2,
band[i].im * self.window[i] * inv_nfft2,
);
band[i] *= self.window[i];
}
// Inverse FFT (in-place)
let mut scratch = self.ifft_scratch.clone();
self.ifft
.process_with_scratch(&mut band, &mut scratch);
.process_with_scratch(band, &mut workspace.ifft_scratch);
out[..self.nfft2].copy_from_slice(&band);
out[..self.nfft2].copy_from_slice(band);
self.nfft2
}
}
@@ -167,13 +206,13 @@ fn build_spectral_window(nfft2: usize, df: f32) -> Vec<f32> {
// Raised-cosine trailing edge
for i in (iwt + iwf)..(2 * iwt + iwf).min(nfft2) {
window[i] = 0.5 * (1.0 + (std::f32::consts::PI * (i - (iwt + iwf)) as f32 / iwt as f32).cos());
window[i] =
0.5 * (1.0 + (std::f32::consts::PI * (i - (iwt + iwf)) as f32 / iwt as f32).cos());
}
// Circular shift by iws bins
if iws > 0 && iws < nfft2 {
let shifted: Vec<f32> = (0..nfft2).map(|i| window[(i + iws) % nfft2]).collect();
window.copy_from_slice(&shifted);
window.rotate_left(iws);
}
window
+211 -90
View File
@@ -14,6 +14,7 @@ pub mod osd;
pub mod sync;
use num_complex::Complex32;
use realfft::RealFftPlanner;
use crate::constants::FT4_XOR_SEQUENCE;
use crate::crc::{ftx_compute_crc, ftx_extract_crc};
@@ -21,8 +22,9 @@ use crate::decode::{pack_bits, FtxMessage};
use crate::ldpc;
use crate::protocol::*;
use downsample::DownsampleContext;
use sync::{prepare_sync_waveforms, sync2d_score};
use bitmetrics::BitMetricsWorkspace;
use downsample::{DownsampleContext, DownsampleWorkspace};
use sync::{prepare_sync_waveforms, sync2d_score, SyncWaveforms};
// FT2 DSP constants
pub const FT2_NDOWN: usize = 9;
@@ -119,6 +121,62 @@ pub struct Ft2Pipeline {
sample_rate: f32,
raw_audio: Vec<f32>,
raw_capacity: usize,
waveforms: SyncWaveforms,
peak_search: PeakSearchWorkspace,
}
struct Ft2DecodeWorkspace {
downsample: DownsampleWorkspace,
downsample_a: Vec<Complex32>,
downsample_b: Vec<Complex32>,
signal: Vec<Complex32>,
bitmetrics: BitMetricsWorkspace,
}
impl Ft2DecodeWorkspace {
fn new(ctx: &DownsampleContext) -> Self {
let nfft2 = ctx.nfft2();
Self {
downsample: ctx.workspace(),
downsample_a: vec![Complex32::new(0.0, 0.0); nfft2],
downsample_b: vec![Complex32::new(0.0, 0.0); nfft2],
signal: vec![Complex32::new(0.0, 0.0); FT2_FRAME_SAMPLES],
bitmetrics: BitMetricsWorkspace::new(),
}
}
}
struct PeakSearchWorkspace {
window: Vec<f32>,
fft: std::sync::Arc<dyn realfft::RealToComplex<f32>>,
fft_input: Vec<f32>,
fft_output: Vec<Complex32>,
fft_scratch: Vec<Complex32>,
avg: Vec<f32>,
smooth: Vec<f32>,
baseline: Vec<f32>,
}
impl PeakSearchWorkspace {
fn new() -> Self {
let window = nuttall_window(FT2_NFFT1);
let mut planner = RealFftPlanner::<f32>::new();
let fft = planner.plan_fft_forward(FT2_NFFT1);
let fft_input = fft.make_input_vec();
let fft_output = fft.make_output_vec();
let fft_scratch = fft.make_scratch_vec();
Self {
window,
fft,
fft_input,
fft_output,
fft_scratch,
avg: vec![0.0; FT2_NH1],
smooth: vec![0.0; FT2_NH1],
baseline: vec![0.0; FT2_NH1],
}
}
}
impl Ft2Pipeline {
@@ -128,6 +186,8 @@ impl Ft2Pipeline {
sample_rate: sample_rate as f32,
raw_audio: Vec::with_capacity(FT2_NMAX),
raw_capacity: FT2_NMAX,
waveforms: prepare_sync_waveforms(),
peak_search: PeakSearchWorkspace::new(),
}
}
@@ -157,7 +217,7 @@ impl Ft2Pipeline {
}
/// Run the full FT2 decode pipeline. Returns decoded messages.
pub fn decode(&self, max_results: usize) -> Vec<Ft2DecodeResult> {
pub fn decode(&mut self, max_results: usize) -> Vec<Ft2DecodeResult> {
if self.raw_audio.len() < FT2_NFFT1 {
return Vec::new();
}
@@ -167,7 +227,8 @@ impl Ft2Pipeline {
None => return Vec::new(),
};
let hits = self.find_scan_hits(&ctx);
let mut workspace = Ft2DecodeWorkspace::new(&ctx);
let hits = self.find_scan_hits(&ctx, &mut workspace);
if hits.is_empty() {
return Vec::new();
}
@@ -179,11 +240,11 @@ impl Ft2Pipeline {
if results.len() >= max_results {
break;
}
if let Some(result) = self.decode_hit(&ctx, hit) {
if let Some(result) = self.decode_hit(&ctx, hit, &mut workspace) {
// Dedup
let dominated = seen_hashes.iter().any(|(h, p)| {
*h == result.message.hash && *p == result.message.payload
});
let dominated = seen_hashes
.iter()
.any(|(h, p)| *h == result.message.hash && *p == result.message.payload);
if dominated {
continue;
}
@@ -196,7 +257,7 @@ impl Ft2Pipeline {
}
/// Find frequency peaks from averaged power spectrum.
fn find_frequency_peaks(&self) -> Vec<RawCandidate> {
fn find_frequency_peaks(&mut self) -> Vec<RawCandidate> {
if self.raw_audio.len() < FT2_NFFT1 {
return Vec::new();
}
@@ -204,65 +265,68 @@ impl Ft2Pipeline {
let fs = self.sample_rate;
let df = fs / FT2_NFFT1 as f32;
let n_frames = 1 + (self.raw_audio.len() - FT2_NFFT1) / FT2_NSTEP;
let PeakSearchWorkspace {
window,
fft,
fft_input,
fft_output,
fft_scratch,
avg,
smooth,
baseline,
} = &mut self.peak_search;
// Compute Nuttall window
let window = nuttall_window(FT2_NFFT1);
// Forward real FFT setup
let mut real_planner = realfft::RealFftPlanner::<f32>::new();
let fft = real_planner.plan_fft_forward(FT2_NFFT1);
let mut fft_input = fft.make_input_vec();
let mut fft_output = fft.make_output_vec();
let mut fft_scratch = fft.make_scratch_vec();
// Average power spectrum across frames
let mut avg = vec![0.0f32; FT2_NH1];
avg.fill(0.0);
smooth.fill(0.0);
baseline.fill(0.0);
for frame in 0..n_frames {
let start = frame * FT2_NSTEP;
for i in 0..FT2_NFFT1 {
fft_input[i] = self.raw_audio[start + i] * window[i];
let input = &self.raw_audio[start..(start + FT2_NFFT1)];
for (dst, (&sample, &coeff)) in
fft_input.iter_mut().zip(input.iter().zip(window.iter()))
{
*dst = sample * coeff;
}
fft.process_with_scratch(&mut fft_input, &mut fft_output, &mut fft_scratch)
fft.process_with_scratch(fft_input, fft_output, fft_scratch)
.expect("FFT failed");
for bin in 1..FT2_NH1 {
if bin < fft_output.len() {
let c = fft_output[bin];
let power = c.re * c.re + c.im * c.im;
avg[bin] += power;
for (bin, c) in fft_output.iter().enumerate().take(FT2_NH1).skip(1) {
avg[bin] += c.norm_sqr();
}
}
let inv_n_frames = 1.0 / n_frames as f32;
for bin in 1..FT2_NH1 {
avg[bin] *= inv_n_frames;
}
// Smooth with 15-point moving average
if FT2_NH1 > 16 {
let mut sum: f32 = avg[1..16].iter().sum();
for bin in 8..FT2_NH1.saturating_sub(8) {
smooth[bin] = sum / 15.0;
if bin + 8 < FT2_NH1 {
sum += avg[bin + 8] - avg[bin - 7];
}
}
}
for bin in 1..FT2_NH1 {
avg[bin] /= n_frames as f32;
}
// Smooth with 15-point moving average
let mut smooth = vec![0.0f32; FT2_NH1];
for bin in 8..FT2_NH1.saturating_sub(8) {
let mut sum = 0.0f32;
for i in (bin.saturating_sub(7))..=(bin + 7).min(FT2_NH1 - 1) {
sum += avg[i];
}
smooth[bin] = sum / 15.0;
}
// Baseline with 63-point moving average
let mut baseline = vec![0.0f32; FT2_NH1];
for bin in 32..FT2_NH1.saturating_sub(32) {
let mut sum = 0.0f32;
for i in (bin.saturating_sub(31))..=(bin + 31).min(FT2_NH1 - 1) {
sum += smooth[i];
if FT2_NH1 > 64 {
let mut sum: f32 = smooth[1..64].iter().sum();
for bin in 32..FT2_NH1.saturating_sub(32) {
baseline[bin] = sum / 63.0 + 1e-9;
if bin + 32 < FT2_NH1 {
sum += smooth[bin + 32] - smooth[bin - 31];
}
}
baseline[bin] = sum / 63.0 + 1e-9;
}
// Find peaks
let min_bin = (200.0 / df).round() as usize;
let max_bin = (4910.0 / df).round() as usize;
let mut candidates = Vec::new();
let mut candidates = Vec::with_capacity(FT2_MAX_RAW_CANDIDATES);
let mut bin = min_bin + 1;
while bin < max_bin.saturating_sub(1) && candidates.len() < FT2_MAX_RAW_CANDIDATES {
@@ -309,20 +373,25 @@ impl Ft2Pipeline {
}
// Sort by score descending
candidates.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
candidates.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
candidates
}
/// Find scan hits by downsampling each frequency peak and computing sync scores.
fn find_scan_hits(&self, ctx: &DownsampleContext) -> Vec<ScanHit> {
fn find_scan_hits(
&mut self,
ctx: &DownsampleContext,
workspace: &mut Ft2DecodeWorkspace,
) -> Vec<ScanHit> {
let peaks = self.find_frequency_peaks();
if peaks.is_empty() {
return Vec::new();
}
let nfft2 = ctx.nfft2();
let waveforms = prepare_sync_waveforms();
let mut hits = Vec::new();
for peak in &peaks {
@@ -330,12 +399,15 @@ impl Ft2Pipeline {
break;
}
let mut down = vec![Complex32::new(0.0, 0.0); nfft2];
let produced = ctx.downsample(peak.freq_hz, &mut down);
let produced = ctx.downsample_with_workspace(
peak.freq_hz,
&mut workspace.downsample_a,
&mut workspace.downsample,
);
if produced == 0 {
continue;
}
normalize_downsampled(&mut down[..produced], produced);
normalize_downsampled(&mut workspace.downsample_a[..produced], produced);
// Coarse search
let mut best_score: f32 = -1.0;
@@ -347,10 +419,10 @@ impl Ft2Pipeline {
let mut start = -688i32;
while start <= 2024 {
let score = sync2d_score(
&down[..produced],
&workspace.downsample_a[..produced],
start,
idf,
&waveforms,
&self.waveforms,
);
if score > best_score {
best_score = score;
@@ -373,10 +445,10 @@ impl Ft2Pipeline {
}
for start in (best_start - 5)..=(best_start + 5) {
let score = sync2d_score(
&down[..produced],
&workspace.downsample_a[..produced],
start,
idf,
&waveforms,
&self.waveforms,
);
if score > best_score {
best_score = score;
@@ -409,17 +481,22 @@ impl Ft2Pipeline {
}
/// Attempt to decode a single scan hit through the full pipeline.
fn decode_hit(&self, ctx: &DownsampleContext, hit: &ScanHit) -> Option<Ft2DecodeResult> {
let nfft2 = ctx.nfft2();
let waveforms = prepare_sync_waveforms();
fn decode_hit(
&self,
ctx: &DownsampleContext,
hit: &ScanHit,
workspace: &mut Ft2DecodeWorkspace,
) -> Option<Ft2DecodeResult> {
// Initial downsample for sync refinement
let mut cd2 = vec![Complex32::new(0.0, 0.0); nfft2];
let produced = ctx.downsample(hit.freq_hz, &mut cd2);
let produced = ctx.downsample_with_workspace(
hit.freq_hz,
&mut workspace.downsample_a,
&mut workspace.downsample,
);
if produced == 0 {
return None;
}
normalize_downsampled(&mut cd2[..produced], produced);
normalize_downsampled(&mut workspace.downsample_a[..produced], produced);
// Refine sync
let mut best_score: f32 = -1.0;
@@ -431,7 +508,12 @@ impl Ft2Pipeline {
continue;
}
for start in (hit.start - 5)..=(hit.start + 5) {
let score = sync2d_score(&cd2[..produced], start, idf, &waveforms);
let score = sync2d_score(
&workspace.downsample_a[..produced],
start,
idf,
&self.waveforms,
);
if score > best_score {
best_score = score;
best_start = start;
@@ -451,19 +533,25 @@ impl Ft2Pipeline {
}
// Final downsample at corrected frequency
let mut cb = vec![Complex32::new(0.0, 0.0); nfft2];
let produced2 = ctx.downsample(corrected_freq_hz, &mut cb);
let produced2 = ctx.downsample_with_workspace(
corrected_freq_hz,
&mut workspace.downsample_b,
&mut workspace.downsample,
);
if produced2 == 0 {
return None;
}
normalize_downsampled(&mut cb[..produced2], FT2_FRAME_SAMPLES);
normalize_downsampled(&mut workspace.downsample_b[..produced2], FT2_FRAME_SAMPLES);
// Extract signal region
let mut signal = vec![Complex32::new(0.0, 0.0); FT2_FRAME_SAMPLES];
extract_signal_region(&cb[..produced2], best_start, &mut signal);
extract_signal_region(
&workspace.downsample_b[..produced2],
best_start,
&mut workspace.signal,
);
// Extract bit metrics
let bitmetrics = bitmetrics::extract_bitmetrics_raw(&signal)?;
let bitmetrics = workspace.bitmetrics.extract(&workspace.signal)?;
// Sync quality check using known Costas bit patterns
let sync_bits_a: [u8; 8] = [0, 0, 0, 1, 1, 0, 1, 1];
@@ -472,10 +560,26 @@ impl Ft2Pipeline {
let sync_bits_d: [u8; 8] = [1, 0, 1, 1, 0, 0, 0, 1];
let mut sync_qual = 0;
for i in 0..8 {
sync_qual += if (bitmetrics[i][0] >= 0.0) as u8 == sync_bits_a[i] { 1 } else { 0 };
sync_qual += if (bitmetrics[66 + i][0] >= 0.0) as u8 == sync_bits_b[i] { 1 } else { 0 };
sync_qual += if (bitmetrics[132 + i][0] >= 0.0) as u8 == sync_bits_c[i] { 1 } else { 0 };
sync_qual += if (bitmetrics[198 + i][0] >= 0.0) as u8 == sync_bits_d[i] { 1 } else { 0 };
sync_qual += if (bitmetrics[i][0] >= 0.0) as u8 == sync_bits_a[i] {
1
} else {
0
};
sync_qual += if (bitmetrics[66 + i][0] >= 0.0) as u8 == sync_bits_b[i] {
1
} else {
0
};
sync_qual += if (bitmetrics[132 + i][0] >= 0.0) as u8 == sync_bits_c[i] {
1
} else {
0
};
sync_qual += if (bitmetrics[198 + i][0] >= 0.0) as u8 == sync_bits_d[i] {
1
} else {
0
};
}
if sync_qual < 10 {
return None;
@@ -591,8 +695,18 @@ impl Ft2Pipeline {
}
// Compute refined timing via parabolic interpolation
let sm1 = sync2d_score(&cd2[..produced], best_start - 1, best_idf, &waveforms);
let sp1 = sync2d_score(&cd2[..produced], best_start + 1, best_idf, &waveforms);
let sm1 = sync2d_score(
&workspace.downsample_a[..produced],
best_start - 1,
best_idf,
&self.waveforms,
);
let sp1 = sync2d_score(
&workspace.downsample_a[..produced],
best_start + 1,
best_idf,
&self.waveforms,
);
let mut xstart = best_start as f32;
let den = sm1 - 2.0 * best_score + sp1;
if den.abs() > 1e-6 {
@@ -635,7 +749,11 @@ fn normalize_downsampled(samples: &mut [Complex32], ref_count: usize) {
if power <= 0.0 {
return;
}
let rc = if ref_count == 0 { samples.len() } else { ref_count };
let rc = if ref_count == 0 {
samples.len()
} else {
ref_count
};
let scale = (rc as f32 / power).sqrt();
for s in samples.iter_mut() {
*s *= scale;
@@ -644,14 +762,17 @@ fn normalize_downsampled(samples: &mut [Complex32], ref_count: usize) {
/// Extract a signal region starting at `start` into `out_signal`.
fn extract_signal_region(input: &[Complex32], start: i32, out_signal: &mut [Complex32]) {
for i in 0..out_signal.len() {
let src = start + i as i32;
out_signal[i] = if src >= 0 && (src as usize) < input.len() {
input[src as usize]
} else {
Complex32::new(0.0, 0.0)
};
out_signal.fill(Complex32::new(0.0, 0.0));
let src_start = start.max(0) as usize;
let dst_start = (-start).max(0) as usize;
if dst_start >= out_signal.len() || src_start >= input.len() {
return;
}
let copy_len = (input.len() - src_start).min(out_signal.len() - dst_start);
out_signal[dst_start..(dst_start + copy_len)]
.copy_from_slice(&input[src_start..(src_start + copy_len)]);
}
/// Normalize LLR array (divide by standard deviation).
+93 -28
View File
@@ -9,6 +9,7 @@
//! reference across time and frequency offsets.
use num_complex::Complex32;
use std::sync::OnceLock;
use crate::constants::FT4_COSTAS_PATTERN;
@@ -16,6 +17,12 @@ use super::{FT2_NDOWN, FT2_NSS, FT2_SYMBOL_PERIOD_F, FT2_SYNC_TWEAK_MAX, FT2_SYN
/// Number of frequency tweak entries.
const NUM_TWEAKS: usize = (FT2_SYNC_TWEAK_MAX - FT2_SYNC_TWEAK_MIN) as usize + 1;
const SYNC_GROUP_COUNT: usize = 4;
const SYNC_SAMPLES: usize = 64;
const SAMPLE_STRIDE: usize = 2;
const GROUP_STRIDE: i32 = 33 * FT2_NSS as i32;
const GROUP_LAST_SAMPLE_OFFSET: i32 = SAMPLE_STRIDE as i32 * (SYNC_SAMPLES as i32 - 1);
const FRAME_LAST_SAMPLE_OFFSET: i32 = 3 * GROUP_STRIDE + GROUP_LAST_SAMPLE_OFFSET;
/// Precomputed sync and frequency-tweak waveforms.
pub struct SyncWaveforms {
@@ -73,6 +80,74 @@ pub fn prepare_sync_waveforms() -> SyncWaveforms {
}
}
type SyncReferenceBank = [[[Complex32; SYNC_SAMPLES]; SYNC_GROUP_COUNT]; NUM_TWEAKS];
fn sync_reference_bank() -> &'static SyncReferenceBank {
static REFS: OnceLock<SyncReferenceBank> = OnceLock::new();
REFS.get_or_init(|| {
let waveforms = prepare_sync_waveforms();
let mut refs = [[[Complex32::new(0.0, 0.0); SYNC_SAMPLES]; SYNC_GROUP_COUNT]; NUM_TWEAKS];
for tw_idx in 0..NUM_TWEAKS {
for group in 0..SYNC_GROUP_COUNT {
for i in 0..SYNC_SAMPLES {
refs[tw_idx][group][i] =
(waveforms.sync_wave[group][i] * waveforms.tweak_wave[tw_idx][i]).conj();
}
}
}
refs
})
}
#[inline(always)]
fn correlate_group_fast(
samples: &[Complex32],
pos: usize,
refs: &[Complex32; SYNC_SAMPLES],
) -> f32 {
let mut sum_re = 0.0f32;
let mut sum_im = 0.0f32;
for i in 0..SYNC_SAMPLES {
let sample = samples[pos + i * SAMPLE_STRIDE];
let reference = refs[i];
sum_re += sample.re * reference.re - sample.im * reference.im;
sum_im += sample.re * reference.im + sample.im * reference.re;
}
(sum_re * sum_re + sum_im * sum_im).sqrt()
}
#[inline(always)]
fn correlate_group_clipped(
samples: &[Complex32],
pos: i32,
refs: &[Complex32; SYNC_SAMPLES],
) -> (f32, usize) {
let mut sum_re = 0.0f32;
let mut sum_im = 0.0f32;
let mut usable = 0usize;
let n_samples = samples.len() as i32;
for i in 0..SYNC_SAMPLES {
let sample_idx = pos + i as i32 * SAMPLE_STRIDE as i32;
if sample_idx < 0 || sample_idx >= n_samples {
continue;
}
let sample = samples[sample_idx as usize];
let reference = refs[i];
sum_re += sample.re * reference.re - sample.im * reference.im;
sum_im += sample.re * reference.im + sample.im * reference.re;
usable += 1;
}
((sum_re * sum_re + sum_im * sum_im).sqrt(), usable)
}
/// Compute the 2D sync score for a given time offset and frequency tweak.
///
/// Correlates the downsampled complex samples against the four Costas sync
@@ -88,46 +163,36 @@ pub fn sync2d_score(
samples: &[Complex32],
start: i32,
idf: i32,
waveforms: &SyncWaveforms,
_waveforms: &SyncWaveforms,
) -> f32 {
let nss = FT2_NSS as i32;
let n_samples = samples.len() as i32;
// The four sync groups are at symbol positions 0, 33, 66, 99 within the frame
let positions = [
start,
start + 33 * nss,
start + 66 * nss,
start + 99 * nss,
];
let tw_idx = (idf - FT2_SYNC_TWEAK_MIN) as usize;
if tw_idx >= waveforms.tweak_wave.len() {
if tw_idx >= NUM_TWEAKS {
return 0.0;
}
let tweak = &waveforms.tweak_wave[tw_idx];
let refs = &sync_reference_bank()[tw_idx];
let scale = 1.0 / (2.0 * FT2_NSS as f32);
let mut score = 0.0f32;
for group in 0..4 {
let pos = positions[group];
let mut sum = Complex32::new(0.0, 0.0);
let mut usable = 0;
if start >= 0 && start + FRAME_LAST_SAMPLE_OFFSET < n_samples {
for (group, refs_group) in refs.iter().enumerate() {
let pos = (start + group as i32 * GROUP_STRIDE) as usize;
score += correlate_group_fast(samples, pos, refs_group) * scale;
}
return score;
}
for i in 0..64 {
let sample_idx = pos + 2 * i as i32;
if sample_idx < 0 || sample_idx >= n_samples {
continue;
}
// Correlate: multiply received sample by conjugate of
// (sync_reference * tweak_phasor)
let reference = waveforms.sync_wave[group][i] * tweak[i];
sum += samples[sample_idx as usize] * reference.conj();
usable += 1;
for (group, refs_group) in refs.iter().enumerate() {
let pos = start + group as i32 * GROUP_STRIDE;
if pos >= n_samples || pos + GROUP_LAST_SAMPLE_OFFSET < 0 {
continue;
}
let (corr, usable) = correlate_group_clipped(samples, pos, refs_group);
if usable > 16 {
score += sum.norm() / (2.0 * FT2_NSS as f32);
score += corr * scale;
}
}