[refactor](trx-ftx): optimize ft2 decode hot paths
Reuse FT2 downsample and bitmetric work buffers, speed up\nsync2d_score with precomputed references, and cache peak-search\nFFT state on the pipeline.\n\nCo-authored-by: OpenAI Codex <codex@openai.com> Signed-off-by: Stan Grams <sjg@haxx.space>
This commit is contained in:
@@ -176,8 +176,7 @@ impl Ft8Decoder {
|
||||
|
||||
/// Waterfall-based decode for FT8/FT4.
|
||||
fn decode_waterfall(&mut self, max_results: usize) -> Vec<Ft8DecodeResult> {
|
||||
let candidates =
|
||||
ftx_find_candidates(&self.monitor.wf, MAX_CANDIDATES, MIN_CANDIDATE_SCORE);
|
||||
let candidates = ftx_find_candidates(&self.monitor.wf, MAX_CANDIDATES, MIN_CANDIDATE_SCORE);
|
||||
|
||||
let mut results = Vec::new();
|
||||
let mut seen: Vec<u16> = Vec::new();
|
||||
@@ -209,13 +208,14 @@ impl Ft8Decoder {
|
||||
|
||||
// Compute time offset
|
||||
let symbol_period = self.protocol.symbol_period();
|
||||
let dt_s =
|
||||
(cand.time_offset as f32 + cand.time_sub as f32 / self.monitor.wf.time_osr as f32)
|
||||
let dt_s = (cand.time_offset as f32
|
||||
+ cand.time_sub as f32 / self.monitor.wf.time_osr as f32)
|
||||
* symbol_period
|
||||
- 0.5;
|
||||
|
||||
// Compute frequency
|
||||
let freq_hz = (self.monitor.min_bin as f32 + cand.freq_offset as f32
|
||||
let freq_hz = (self.monitor.min_bin as f32
|
||||
+ cand.freq_offset as f32
|
||||
+ cand.freq_sub as f32 / self.monitor.wf.freq_osr as f32)
|
||||
/ symbol_period;
|
||||
|
||||
@@ -232,7 +232,8 @@ impl Ft8Decoder {
|
||||
|
||||
/// FT2-specific decode pipeline.
|
||||
fn decode_ft2(&mut self, max_results: usize) -> Vec<Ft8DecodeResult> {
|
||||
let pipe = match self.ft2_pipeline.as_ref() {
|
||||
let ft2_results = {
|
||||
let pipe = match self.ft2_pipeline.as_mut() {
|
||||
Some(p) => p,
|
||||
None => return Vec::new(),
|
||||
};
|
||||
@@ -241,7 +242,8 @@ impl Ft8Decoder {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let ft2_results = pipe.decode(max_results);
|
||||
pipe.decode(max_results)
|
||||
};
|
||||
let mut results = Vec::new();
|
||||
|
||||
for r in ft2_results {
|
||||
@@ -267,8 +269,7 @@ impl Ft8Decoder {
|
||||
payload: msg.payload,
|
||||
hash: msg.hash as u32,
|
||||
};
|
||||
let (text, _offsets, _rc) =
|
||||
message::ftx_message_decode(&m, &mut self.callsign_hash);
|
||||
let (text, _offsets, _rc) = message::ftx_message_decode(&m, &mut self.callsign_hash);
|
||||
if text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
@@ -15,51 +15,64 @@ use crate::constants::{FT4_COSTAS_PATTERN, FT4_GRAY_MAP};
|
||||
|
||||
use super::{FT2_FRAME_SYMBOLS, FT2_NSS};
|
||||
|
||||
/// Extract bit metrics from the downsampled signal region.
|
||||
///
|
||||
/// Returns a 2D array of shape `[2 * FT2_FRAME_SYMBOLS][3]` where:
|
||||
/// - Index 0: 1-symbol scale metric
|
||||
/// - Index 1: 2-symbol scale metric
|
||||
/// - Index 2: 4-symbol scale metric
|
||||
///
|
||||
/// Returns `None` if the sync quality is too poor (fewer than 4 of 16
|
||||
/// Costas sync tones decoded correctly).
|
||||
pub fn extract_bitmetrics_raw(
|
||||
signal: &[Complex32],
|
||||
) -> Option<Vec<[f32; 3]>> {
|
||||
let n_metrics = 2 * FT2_FRAME_SYMBOLS;
|
||||
let mut bitmetrics = vec![[0.0f32; 3]; n_metrics];
|
||||
const N_METRICS: usize = 2 * FT2_FRAME_SYMBOLS;
|
||||
|
||||
// Per-symbol FFT to extract complex tone amplitudes
|
||||
/// Reusable FFT plans and scratch buffers for bit-metric extraction.
|
||||
pub struct BitMetricsWorkspace {
|
||||
fft: std::sync::Arc<dyn rustfft::Fft<f32>>,
|
||||
scratch: Vec<Complex32>,
|
||||
symbols: [[Complex32; 4]; FT2_FRAME_SYMBOLS],
|
||||
s4: [[f32; 4]; FT2_FRAME_SYMBOLS],
|
||||
metric1: [f32; N_METRICS],
|
||||
metric2: [f32; N_METRICS],
|
||||
metric4: [f32; N_METRICS],
|
||||
bitmetrics: [[f32; 3]; N_METRICS],
|
||||
csymb: [Complex32; FT2_NSS],
|
||||
}
|
||||
|
||||
impl BitMetricsWorkspace {
|
||||
pub fn new() -> Self {
|
||||
let mut planner = FftPlanner::<f32>::new();
|
||||
let fft = planner.plan_fft_forward(FT2_NSS);
|
||||
let fft_scratch_len = fft.get_inplace_scratch_len();
|
||||
let mut scratch = vec![Complex32::new(0.0, 0.0); fft_scratch_len];
|
||||
let scratch = vec![Complex32::new(0.0, 0.0); fft.get_inplace_scratch_len()];
|
||||
|
||||
// Complex symbols for each of the 4 tones at each frame symbol
|
||||
let mut symbols = vec![[Complex32::new(0.0, 0.0); 4]; FT2_FRAME_SYMBOLS];
|
||||
// Magnitude for each tone at each symbol
|
||||
let mut s4 = vec![[0.0f32; 4]; FT2_FRAME_SYMBOLS];
|
||||
Self {
|
||||
fft,
|
||||
scratch,
|
||||
symbols: [[Complex32::new(0.0, 0.0); 4]; FT2_FRAME_SYMBOLS],
|
||||
s4: [[0.0; 4]; FT2_FRAME_SYMBOLS],
|
||||
metric1: [0.0; N_METRICS],
|
||||
metric2: [0.0; N_METRICS],
|
||||
metric4: [0.0; N_METRICS],
|
||||
bitmetrics: [[0.0; 3]; N_METRICS],
|
||||
csymb: [Complex32::new(0.0, 0.0); FT2_NSS],
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract bit metrics into a reusable internal buffer.
|
||||
pub fn extract<'a>(&'a mut self, signal: &[Complex32]) -> Option<&'a [[f32; 3]]> {
|
||||
self.metric1.fill(0.0);
|
||||
self.metric2.fill(0.0);
|
||||
self.metric4.fill(0.0);
|
||||
|
||||
for sym in 0..FT2_FRAME_SYMBOLS {
|
||||
let offset = sym * FT2_NSS;
|
||||
let mut csymb: Vec<Complex32> = (0..FT2_NSS)
|
||||
.map(|i| {
|
||||
if offset + i < signal.len() {
|
||||
signal[offset + i]
|
||||
if offset + FT2_NSS <= signal.len() {
|
||||
self.csymb
|
||||
.copy_from_slice(&signal[offset..(offset + FT2_NSS)]);
|
||||
} else {
|
||||
Complex32::new(0.0, 0.0)
|
||||
self.csymb.fill(Complex32::new(0.0, 0.0));
|
||||
let remaining = signal.len().saturating_sub(offset);
|
||||
self.csymb[..remaining].copy_from_slice(&signal[offset..(offset + remaining)]);
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
fft.process_with_scratch(&mut csymb, &mut scratch);
|
||||
self.fft
|
||||
.process_with_scratch(&mut self.csymb, &mut self.scratch);
|
||||
|
||||
for tone in 0..4 {
|
||||
if tone < csymb.len() {
|
||||
symbols[sym][tone] = csymb[tone];
|
||||
s4[sym][tone] = csymb[tone].norm();
|
||||
}
|
||||
let symbol = self.csymb[tone];
|
||||
self.symbols[sym][tone] = symbol;
|
||||
self.s4[sym][tone] = symbol.norm();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,7 +86,7 @@ pub fn extract_bitmetrics_raw(
|
||||
}
|
||||
let mut best = 0;
|
||||
for tone in 1..4 {
|
||||
if s4[base + i][tone] > s4[base + i][best] {
|
||||
if self.s4[base + i][tone] > self.s4[base + i][best] {
|
||||
best = tone;
|
||||
}
|
||||
}
|
||||
@@ -87,93 +100,56 @@ pub fn extract_bitmetrics_raw(
|
||||
return None;
|
||||
}
|
||||
|
||||
// Precompute one_mask: for each integer 0..255 and bit position 0..7,
|
||||
// whether that bit is set.
|
||||
let one_mask: Vec<[u8; 8]> = (0..256u16)
|
||||
.map(|i| {
|
||||
let mut m = [0u8; 8];
|
||||
for j in 0..8 {
|
||||
m[j] = if (i & (1 << j)) != 0 { 1 } else { 0 };
|
||||
}
|
||||
m
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Compute metrics at three scales
|
||||
let mut metric1 = vec![0.0f32; n_metrics];
|
||||
let mut metric2 = vec![0.0f32; n_metrics];
|
||||
let mut metric4 = vec![0.0f32; n_metrics];
|
||||
|
||||
for nseq in 0..3 {
|
||||
let nsym = match nseq {
|
||||
0 => 1,
|
||||
1 => 2,
|
||||
_ => 4,
|
||||
let (nsym, metric): (usize, &mut [f32; N_METRICS]) = match nseq {
|
||||
0 => (1, &mut self.metric1),
|
||||
1 => (2, &mut self.metric2),
|
||||
_ => (4, &mut self.metric4),
|
||||
};
|
||||
let nt = 1 << (2 * nsym); // number of tone sequences to enumerate
|
||||
|
||||
let mut ks = 0;
|
||||
while ks + nsym <= FT2_FRAME_SYMBOLS {
|
||||
// Compute coherent magnitude for each possible tone sequence
|
||||
let mut s2 = vec![0.0f32; nt];
|
||||
for i in 0..nt {
|
||||
let i1 = i / 64;
|
||||
let i2 = (i & 63) / 16;
|
||||
let i3 = (i & 15) / 4;
|
||||
let i4 = i & 3;
|
||||
|
||||
let sum = match nsym {
|
||||
1 => symbols[ks][FT4_GRAY_MAP[i4] as usize],
|
||||
2 => {
|
||||
symbols[ks][FT4_GRAY_MAP[i3] as usize]
|
||||
+ symbols[ks + 1][FT4_GRAY_MAP[i4] as usize]
|
||||
}
|
||||
4 => {
|
||||
symbols[ks][FT4_GRAY_MAP[i1] as usize]
|
||||
+ symbols[ks + 1][FT4_GRAY_MAP[i2] as usize]
|
||||
+ symbols[ks + 2][FT4_GRAY_MAP[i3] as usize]
|
||||
+ symbols[ks + 3][FT4_GRAY_MAP[i4] as usize]
|
||||
}
|
||||
_ => Complex32::new(0.0, 0.0),
|
||||
};
|
||||
s2[i] = sum.norm();
|
||||
}
|
||||
|
||||
// Extract bit metrics: for each bit position, find max coherent
|
||||
// magnitude with that bit set vs unset
|
||||
let ipt = 2 * ks;
|
||||
let ibmax: usize = match nsym {
|
||||
let nt = 1usize << (2 * nsym);
|
||||
let ibmax = match nsym {
|
||||
1 => 1,
|
||||
2 => 3,
|
||||
4 => 7,
|
||||
_ => 0,
|
||||
};
|
||||
|
||||
for ib in 0..=ibmax {
|
||||
let mut max_one = f32::NEG_INFINITY;
|
||||
let mut max_zero = f32::NEG_INFINITY;
|
||||
let mut ks = 0;
|
||||
while ks + nsym <= FT2_FRAME_SYMBOLS {
|
||||
let mut max_one = [f32::NEG_INFINITY; 8];
|
||||
let mut max_zero = [f32::NEG_INFINITY; 8];
|
||||
|
||||
for i in 0..nt {
|
||||
if i < 256 {
|
||||
if one_mask[i][ibmax - ib] != 0 {
|
||||
if s2[i] > max_one {
|
||||
max_one = s2[i];
|
||||
let sum = match nsym {
|
||||
1 => self.symbols[ks][FT4_GRAY_MAP[i & 0x03] as usize],
|
||||
2 => {
|
||||
self.symbols[ks][FT4_GRAY_MAP[(i >> 2) & 0x03] as usize]
|
||||
+ self.symbols[ks + 1][FT4_GRAY_MAP[i & 0x03] as usize]
|
||||
}
|
||||
} else if s2[i] > max_zero {
|
||||
max_zero = s2[i];
|
||||
4 => {
|
||||
self.symbols[ks][FT4_GRAY_MAP[(i >> 6) & 0x03] as usize]
|
||||
+ self.symbols[ks + 1][FT4_GRAY_MAP[(i >> 4) & 0x03] as usize]
|
||||
+ self.symbols[ks + 2][FT4_GRAY_MAP[(i >> 2) & 0x03] as usize]
|
||||
+ self.symbols[ks + 3][FT4_GRAY_MAP[i & 0x03] as usize]
|
||||
}
|
||||
_ => Complex32::new(0.0, 0.0),
|
||||
};
|
||||
let coherent = sum.norm();
|
||||
|
||||
for ib in 0..=ibmax {
|
||||
if ((i >> (ibmax - ib)) & 1) != 0 {
|
||||
max_one[ib] = max_one[ib].max(coherent);
|
||||
} else {
|
||||
max_zero[ib] = max_zero[ib].max(coherent);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let ipt = 2 * ks;
|
||||
for ib in 0..=ibmax {
|
||||
let metric_idx = ipt + ib;
|
||||
if metric_idx >= n_metrics {
|
||||
continue;
|
||||
}
|
||||
|
||||
match nseq {
|
||||
0 => metric1[metric_idx] = max_one - max_zero,
|
||||
1 => metric2[metric_idx] = max_one - max_zero,
|
||||
_ => metric4[metric_idx] = max_one - max_zero,
|
||||
if metric_idx < N_METRICS {
|
||||
metric[metric_idx] = max_one[ib] - max_zero[ib];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -182,30 +158,49 @@ pub fn extract_bitmetrics_raw(
|
||||
}
|
||||
|
||||
// Patch boundary metrics where multi-symbol integration overruns
|
||||
if n_metrics >= 206 {
|
||||
metric2[204] = metric1[204];
|
||||
metric2[205] = metric1[205];
|
||||
metric4[200] = metric2[200];
|
||||
metric4[201] = metric2[201];
|
||||
metric4[202] = metric2[202];
|
||||
metric4[203] = metric2[203];
|
||||
metric4[204] = metric1[204];
|
||||
metric4[205] = metric1[205];
|
||||
self.metric2[204] = self.metric1[204];
|
||||
self.metric2[205] = self.metric1[205];
|
||||
self.metric4[200] = self.metric2[200];
|
||||
self.metric4[201] = self.metric2[201];
|
||||
self.metric4[202] = self.metric2[202];
|
||||
self.metric4[203] = self.metric2[203];
|
||||
self.metric4[204] = self.metric1[204];
|
||||
self.metric4[205] = self.metric1[205];
|
||||
|
||||
normalize_metric(&mut self.metric1);
|
||||
normalize_metric(&mut self.metric2);
|
||||
normalize_metric(&mut self.metric4);
|
||||
|
||||
for i in 0..N_METRICS {
|
||||
self.bitmetrics[i][0] = self.metric1[i];
|
||||
self.bitmetrics[i][1] = self.metric2[i];
|
||||
self.bitmetrics[i][2] = self.metric4[i];
|
||||
}
|
||||
|
||||
// Normalize each metric scale independently
|
||||
normalize_metric(&mut metric1);
|
||||
normalize_metric(&mut metric2);
|
||||
normalize_metric(&mut metric4);
|
||||
|
||||
// Pack into output
|
||||
for i in 0..n_metrics {
|
||||
bitmetrics[i][0] = metric1[i];
|
||||
bitmetrics[i][1] = metric2[i];
|
||||
bitmetrics[i][2] = metric4[i];
|
||||
Some(&self.bitmetrics)
|
||||
}
|
||||
}
|
||||
|
||||
Some(bitmetrics)
|
||||
impl Default for BitMetricsWorkspace {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract bit metrics from the downsampled signal region.
|
||||
///
|
||||
/// Returns a 2D array of shape `[2 * FT2_FRAME_SYMBOLS][3]` where:
|
||||
/// - Index 0: 1-symbol scale metric
|
||||
/// - Index 1: 2-symbol scale metric
|
||||
/// - Index 2: 4-symbol scale metric
|
||||
///
|
||||
/// Returns `None` if the sync quality is too poor (fewer than 4 of 16
|
||||
/// Costas sync tones decoded correctly).
|
||||
pub fn extract_bitmetrics_raw(signal: &[Complex32]) -> Option<Vec<[f32; 3]>> {
|
||||
let mut workspace = BitMetricsWorkspace::new();
|
||||
workspace
|
||||
.extract(signal)
|
||||
.map(|bitmetrics| bitmetrics.to_vec())
|
||||
}
|
||||
|
||||
/// Normalize a metric array by dividing by its standard deviation.
|
||||
|
||||
@@ -14,6 +14,34 @@ use rustfft::FftPlanner;
|
||||
|
||||
use super::{FT2_NDOWN, FT2_SYMBOL_PERIOD_F};
|
||||
|
||||
/// Reusable scratch buffers for frequency-domain downsampling.
|
||||
pub struct DownsampleWorkspace {
|
||||
band: Vec<Complex32>,
|
||||
ifft_scratch: Vec<Complex32>,
|
||||
}
|
||||
|
||||
impl DownsampleWorkspace {
|
||||
fn new(nfft2: usize, ifft_scratch_len: usize) -> Self {
|
||||
Self {
|
||||
band: vec![Complex32::new(0.0, 0.0); nfft2],
|
||||
ifft_scratch: vec![Complex32::new(0.0, 0.0); ifft_scratch_len],
|
||||
}
|
||||
}
|
||||
|
||||
fn prepare(&mut self, nfft2: usize, ifft_scratch_len: usize) {
|
||||
if self.band.len() != nfft2 {
|
||||
self.band.resize(nfft2, Complex32::new(0.0, 0.0));
|
||||
} else {
|
||||
self.band.fill(Complex32::new(0.0, 0.0));
|
||||
}
|
||||
|
||||
if self.ifft_scratch.len() != ifft_scratch_len {
|
||||
self.ifft_scratch
|
||||
.resize(ifft_scratch_len, Complex32::new(0.0, 0.0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Downsample context holding precomputed FFT data and spectral window.
|
||||
pub struct DownsampleContext {
|
||||
/// Number of raw samples.
|
||||
@@ -28,8 +56,8 @@ pub struct DownsampleContext {
|
||||
spectrum: Vec<Complex32>,
|
||||
/// IFFT plan for the downsampled length.
|
||||
ifft: std::sync::Arc<dyn rustfft::Fft<f32>>,
|
||||
/// Scratch buffer for IFFT.
|
||||
ifft_scratch: Vec<Complex32>,
|
||||
/// Scratch length required by the IFFT plan.
|
||||
ifft_scratch_len: usize,
|
||||
}
|
||||
|
||||
impl DownsampleContext {
|
||||
@@ -50,7 +78,11 @@ impl DownsampleContext {
|
||||
let df = sample_rate / nraw as f32;
|
||||
|
||||
// Build spectral extraction window
|
||||
let window = build_spectral_window(nfft2, df);
|
||||
let mut window = build_spectral_window(nfft2, df);
|
||||
let inv_nfft2 = 1.0 / nfft2 as f32;
|
||||
for coeff in &mut window {
|
||||
*coeff *= inv_nfft2;
|
||||
}
|
||||
|
||||
// Forward real FFT of raw audio
|
||||
let mut real_planner = realfft::RealFftPlanner::<f32>::new();
|
||||
@@ -59,11 +91,7 @@ impl DownsampleContext {
|
||||
let mut output = fft.make_output_vec();
|
||||
let mut scratch = fft.make_scratch_vec();
|
||||
|
||||
for (i, s) in raw_audio.iter().enumerate() {
|
||||
if i < input.len() {
|
||||
input[i] = *s;
|
||||
}
|
||||
}
|
||||
input.copy_from_slice(raw_audio);
|
||||
fft.process_with_scratch(&mut input, &mut output, &mut scratch)
|
||||
.ok()?;
|
||||
|
||||
@@ -72,7 +100,7 @@ impl DownsampleContext {
|
||||
// IFFT plan for downsampled length
|
||||
let mut planner = FftPlanner::<f32>::new();
|
||||
let ifft = planner.plan_fft_inverse(nfft2);
|
||||
let ifft_scratch = vec![Complex32::new(0.0, 0.0); ifft.get_inplace_scratch_len()];
|
||||
let ifft_scratch_len = ifft.get_inplace_scratch_len();
|
||||
|
||||
Some(Self {
|
||||
nraw,
|
||||
@@ -81,7 +109,7 @@ impl DownsampleContext {
|
||||
window,
|
||||
spectrum,
|
||||
ifft,
|
||||
ifft_scratch,
|
||||
ifft_scratch_len,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -90,15 +118,31 @@ impl DownsampleContext {
|
||||
self.nfft2
|
||||
}
|
||||
|
||||
/// Create reusable buffers for repeated downsampling with this context.
|
||||
pub fn workspace(&self) -> DownsampleWorkspace {
|
||||
DownsampleWorkspace::new(self.nfft2, self.ifft_scratch_len)
|
||||
}
|
||||
|
||||
/// Downsample the raw audio around `freq_hz`, writing complex baseband
|
||||
/// samples into `out`. Returns the number of samples produced.
|
||||
pub fn downsample(&self, freq_hz: f32, out: &mut [Complex32]) -> usize {
|
||||
let mut workspace = self.workspace();
|
||||
self.downsample_with_workspace(freq_hz, out, &mut workspace)
|
||||
}
|
||||
|
||||
/// Downsample the raw audio using reusable scratch buffers.
|
||||
pub fn downsample_with_workspace(
|
||||
&self,
|
||||
freq_hz: f32,
|
||||
out: &mut [Complex32],
|
||||
workspace: &mut DownsampleWorkspace,
|
||||
) -> usize {
|
||||
if out.len() < self.nfft2 {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Working band buffer
|
||||
let mut band = vec![Complex32::new(0.0, 0.0); self.nfft2];
|
||||
workspace.prepare(self.nfft2, self.ifft_scratch_len);
|
||||
let band = &mut workspace.band;
|
||||
let i0 = (freq_hz / self.df).round() as i32;
|
||||
let half_nraw = (self.nraw / 2) as i32;
|
||||
|
||||
@@ -119,21 +163,16 @@ impl DownsampleContext {
|
||||
}
|
||||
}
|
||||
|
||||
// Apply spectral window and scale
|
||||
let inv_nfft2 = 1.0 / self.nfft2 as f32;
|
||||
// Apply spectral window
|
||||
for i in 0..self.nfft2 {
|
||||
band[i] = Complex32::new(
|
||||
band[i].re * self.window[i] * inv_nfft2,
|
||||
band[i].im * self.window[i] * inv_nfft2,
|
||||
);
|
||||
band[i] *= self.window[i];
|
||||
}
|
||||
|
||||
// Inverse FFT (in-place)
|
||||
let mut scratch = self.ifft_scratch.clone();
|
||||
self.ifft
|
||||
.process_with_scratch(&mut band, &mut scratch);
|
||||
.process_with_scratch(band, &mut workspace.ifft_scratch);
|
||||
|
||||
out[..self.nfft2].copy_from_slice(&band);
|
||||
out[..self.nfft2].copy_from_slice(band);
|
||||
self.nfft2
|
||||
}
|
||||
}
|
||||
@@ -167,13 +206,13 @@ fn build_spectral_window(nfft2: usize, df: f32) -> Vec<f32> {
|
||||
|
||||
// Raised-cosine trailing edge
|
||||
for i in (iwt + iwf)..(2 * iwt + iwf).min(nfft2) {
|
||||
window[i] = 0.5 * (1.0 + (std::f32::consts::PI * (i - (iwt + iwf)) as f32 / iwt as f32).cos());
|
||||
window[i] =
|
||||
0.5 * (1.0 + (std::f32::consts::PI * (i - (iwt + iwf)) as f32 / iwt as f32).cos());
|
||||
}
|
||||
|
||||
// Circular shift by iws bins
|
||||
if iws > 0 && iws < nfft2 {
|
||||
let shifted: Vec<f32> = (0..nfft2).map(|i| window[(i + iws) % nfft2]).collect();
|
||||
window.copy_from_slice(&shifted);
|
||||
window.rotate_left(iws);
|
||||
}
|
||||
|
||||
window
|
||||
|
||||
@@ -14,6 +14,7 @@ pub mod osd;
|
||||
pub mod sync;
|
||||
|
||||
use num_complex::Complex32;
|
||||
use realfft::RealFftPlanner;
|
||||
|
||||
use crate::constants::FT4_XOR_SEQUENCE;
|
||||
use crate::crc::{ftx_compute_crc, ftx_extract_crc};
|
||||
@@ -21,8 +22,9 @@ use crate::decode::{pack_bits, FtxMessage};
|
||||
use crate::ldpc;
|
||||
use crate::protocol::*;
|
||||
|
||||
use downsample::DownsampleContext;
|
||||
use sync::{prepare_sync_waveforms, sync2d_score};
|
||||
use bitmetrics::BitMetricsWorkspace;
|
||||
use downsample::{DownsampleContext, DownsampleWorkspace};
|
||||
use sync::{prepare_sync_waveforms, sync2d_score, SyncWaveforms};
|
||||
|
||||
// FT2 DSP constants
|
||||
pub const FT2_NDOWN: usize = 9;
|
||||
@@ -119,6 +121,62 @@ pub struct Ft2Pipeline {
|
||||
sample_rate: f32,
|
||||
raw_audio: Vec<f32>,
|
||||
raw_capacity: usize,
|
||||
waveforms: SyncWaveforms,
|
||||
peak_search: PeakSearchWorkspace,
|
||||
}
|
||||
|
||||
struct Ft2DecodeWorkspace {
|
||||
downsample: DownsampleWorkspace,
|
||||
downsample_a: Vec<Complex32>,
|
||||
downsample_b: Vec<Complex32>,
|
||||
signal: Vec<Complex32>,
|
||||
bitmetrics: BitMetricsWorkspace,
|
||||
}
|
||||
|
||||
impl Ft2DecodeWorkspace {
|
||||
fn new(ctx: &DownsampleContext) -> Self {
|
||||
let nfft2 = ctx.nfft2();
|
||||
Self {
|
||||
downsample: ctx.workspace(),
|
||||
downsample_a: vec![Complex32::new(0.0, 0.0); nfft2],
|
||||
downsample_b: vec![Complex32::new(0.0, 0.0); nfft2],
|
||||
signal: vec![Complex32::new(0.0, 0.0); FT2_FRAME_SAMPLES],
|
||||
bitmetrics: BitMetricsWorkspace::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct PeakSearchWorkspace {
|
||||
window: Vec<f32>,
|
||||
fft: std::sync::Arc<dyn realfft::RealToComplex<f32>>,
|
||||
fft_input: Vec<f32>,
|
||||
fft_output: Vec<Complex32>,
|
||||
fft_scratch: Vec<Complex32>,
|
||||
avg: Vec<f32>,
|
||||
smooth: Vec<f32>,
|
||||
baseline: Vec<f32>,
|
||||
}
|
||||
|
||||
impl PeakSearchWorkspace {
|
||||
fn new() -> Self {
|
||||
let window = nuttall_window(FT2_NFFT1);
|
||||
let mut planner = RealFftPlanner::<f32>::new();
|
||||
let fft = planner.plan_fft_forward(FT2_NFFT1);
|
||||
let fft_input = fft.make_input_vec();
|
||||
let fft_output = fft.make_output_vec();
|
||||
let fft_scratch = fft.make_scratch_vec();
|
||||
|
||||
Self {
|
||||
window,
|
||||
fft,
|
||||
fft_input,
|
||||
fft_output,
|
||||
fft_scratch,
|
||||
avg: vec![0.0; FT2_NH1],
|
||||
smooth: vec![0.0; FT2_NH1],
|
||||
baseline: vec![0.0; FT2_NH1],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Ft2Pipeline {
|
||||
@@ -128,6 +186,8 @@ impl Ft2Pipeline {
|
||||
sample_rate: sample_rate as f32,
|
||||
raw_audio: Vec::with_capacity(FT2_NMAX),
|
||||
raw_capacity: FT2_NMAX,
|
||||
waveforms: prepare_sync_waveforms(),
|
||||
peak_search: PeakSearchWorkspace::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -157,7 +217,7 @@ impl Ft2Pipeline {
|
||||
}
|
||||
|
||||
/// Run the full FT2 decode pipeline. Returns decoded messages.
|
||||
pub fn decode(&self, max_results: usize) -> Vec<Ft2DecodeResult> {
|
||||
pub fn decode(&mut self, max_results: usize) -> Vec<Ft2DecodeResult> {
|
||||
if self.raw_audio.len() < FT2_NFFT1 {
|
||||
return Vec::new();
|
||||
}
|
||||
@@ -167,7 +227,8 @@ impl Ft2Pipeline {
|
||||
None => return Vec::new(),
|
||||
};
|
||||
|
||||
let hits = self.find_scan_hits(&ctx);
|
||||
let mut workspace = Ft2DecodeWorkspace::new(&ctx);
|
||||
let hits = self.find_scan_hits(&ctx, &mut workspace);
|
||||
if hits.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
@@ -179,11 +240,11 @@ impl Ft2Pipeline {
|
||||
if results.len() >= max_results {
|
||||
break;
|
||||
}
|
||||
if let Some(result) = self.decode_hit(&ctx, hit) {
|
||||
if let Some(result) = self.decode_hit(&ctx, hit, &mut workspace) {
|
||||
// Dedup
|
||||
let dominated = seen_hashes.iter().any(|(h, p)| {
|
||||
*h == result.message.hash && *p == result.message.payload
|
||||
});
|
||||
let dominated = seen_hashes
|
||||
.iter()
|
||||
.any(|(h, p)| *h == result.message.hash && *p == result.message.payload);
|
||||
if dominated {
|
||||
continue;
|
||||
}
|
||||
@@ -196,7 +257,7 @@ impl Ft2Pipeline {
|
||||
}
|
||||
|
||||
/// Find frequency peaks from averaged power spectrum.
|
||||
fn find_frequency_peaks(&self) -> Vec<RawCandidate> {
|
||||
fn find_frequency_peaks(&mut self) -> Vec<RawCandidate> {
|
||||
if self.raw_audio.len() < FT2_NFFT1 {
|
||||
return Vec::new();
|
||||
}
|
||||
@@ -204,65 +265,68 @@ impl Ft2Pipeline {
|
||||
let fs = self.sample_rate;
|
||||
let df = fs / FT2_NFFT1 as f32;
|
||||
let n_frames = 1 + (self.raw_audio.len() - FT2_NFFT1) / FT2_NSTEP;
|
||||
let PeakSearchWorkspace {
|
||||
window,
|
||||
fft,
|
||||
fft_input,
|
||||
fft_output,
|
||||
fft_scratch,
|
||||
avg,
|
||||
smooth,
|
||||
baseline,
|
||||
} = &mut self.peak_search;
|
||||
|
||||
// Compute Nuttall window
|
||||
let window = nuttall_window(FT2_NFFT1);
|
||||
|
||||
// Forward real FFT setup
|
||||
let mut real_planner = realfft::RealFftPlanner::<f32>::new();
|
||||
let fft = real_planner.plan_fft_forward(FT2_NFFT1);
|
||||
let mut fft_input = fft.make_input_vec();
|
||||
let mut fft_output = fft.make_output_vec();
|
||||
let mut fft_scratch = fft.make_scratch_vec();
|
||||
|
||||
// Average power spectrum across frames
|
||||
let mut avg = vec![0.0f32; FT2_NH1];
|
||||
avg.fill(0.0);
|
||||
smooth.fill(0.0);
|
||||
baseline.fill(0.0);
|
||||
|
||||
for frame in 0..n_frames {
|
||||
let start = frame * FT2_NSTEP;
|
||||
for i in 0..FT2_NFFT1 {
|
||||
fft_input[i] = self.raw_audio[start + i] * window[i];
|
||||
let input = &self.raw_audio[start..(start + FT2_NFFT1)];
|
||||
for (dst, (&sample, &coeff)) in
|
||||
fft_input.iter_mut().zip(input.iter().zip(window.iter()))
|
||||
{
|
||||
*dst = sample * coeff;
|
||||
}
|
||||
fft.process_with_scratch(&mut fft_input, &mut fft_output, &mut fft_scratch)
|
||||
fft.process_with_scratch(fft_input, fft_output, fft_scratch)
|
||||
.expect("FFT failed");
|
||||
|
||||
for bin in 1..FT2_NH1 {
|
||||
if bin < fft_output.len() {
|
||||
let c = fft_output[bin];
|
||||
let power = c.re * c.re + c.im * c.im;
|
||||
avg[bin] += power;
|
||||
}
|
||||
for (bin, c) in fft_output.iter().enumerate().take(FT2_NH1).skip(1) {
|
||||
avg[bin] += c.norm_sqr();
|
||||
}
|
||||
}
|
||||
|
||||
let inv_n_frames = 1.0 / n_frames as f32;
|
||||
for bin in 1..FT2_NH1 {
|
||||
avg[bin] /= n_frames as f32;
|
||||
avg[bin] *= inv_n_frames;
|
||||
}
|
||||
|
||||
// Smooth with 15-point moving average
|
||||
let mut smooth = vec![0.0f32; FT2_NH1];
|
||||
if FT2_NH1 > 16 {
|
||||
let mut sum: f32 = avg[1..16].iter().sum();
|
||||
for bin in 8..FT2_NH1.saturating_sub(8) {
|
||||
let mut sum = 0.0f32;
|
||||
for i in (bin.saturating_sub(7))..=(bin + 7).min(FT2_NH1 - 1) {
|
||||
sum += avg[i];
|
||||
}
|
||||
smooth[bin] = sum / 15.0;
|
||||
if bin + 8 < FT2_NH1 {
|
||||
sum += avg[bin + 8] - avg[bin - 7];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Baseline with 63-point moving average
|
||||
let mut baseline = vec![0.0f32; FT2_NH1];
|
||||
if FT2_NH1 > 64 {
|
||||
let mut sum: f32 = smooth[1..64].iter().sum();
|
||||
for bin in 32..FT2_NH1.saturating_sub(32) {
|
||||
let mut sum = 0.0f32;
|
||||
for i in (bin.saturating_sub(31))..=(bin + 31).min(FT2_NH1 - 1) {
|
||||
sum += smooth[i];
|
||||
}
|
||||
baseline[bin] = sum / 63.0 + 1e-9;
|
||||
if bin + 32 < FT2_NH1 {
|
||||
sum += smooth[bin + 32] - smooth[bin - 31];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find peaks
|
||||
let min_bin = (200.0 / df).round() as usize;
|
||||
let max_bin = (4910.0 / df).round() as usize;
|
||||
let mut candidates = Vec::new();
|
||||
let mut candidates = Vec::with_capacity(FT2_MAX_RAW_CANDIDATES);
|
||||
|
||||
let mut bin = min_bin + 1;
|
||||
while bin < max_bin.saturating_sub(1) && candidates.len() < FT2_MAX_RAW_CANDIDATES {
|
||||
@@ -309,20 +373,25 @@ impl Ft2Pipeline {
|
||||
}
|
||||
|
||||
// Sort by score descending
|
||||
candidates.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
|
||||
candidates.sort_by(|a, b| {
|
||||
b.score
|
||||
.partial_cmp(&a.score)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
candidates
|
||||
}
|
||||
|
||||
/// Find scan hits by downsampling each frequency peak and computing sync scores.
|
||||
fn find_scan_hits(&self, ctx: &DownsampleContext) -> Vec<ScanHit> {
|
||||
fn find_scan_hits(
|
||||
&mut self,
|
||||
ctx: &DownsampleContext,
|
||||
workspace: &mut Ft2DecodeWorkspace,
|
||||
) -> Vec<ScanHit> {
|
||||
let peaks = self.find_frequency_peaks();
|
||||
if peaks.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let nfft2 = ctx.nfft2();
|
||||
let waveforms = prepare_sync_waveforms();
|
||||
|
||||
let mut hits = Vec::new();
|
||||
|
||||
for peak in &peaks {
|
||||
@@ -330,12 +399,15 @@ impl Ft2Pipeline {
|
||||
break;
|
||||
}
|
||||
|
||||
let mut down = vec![Complex32::new(0.0, 0.0); nfft2];
|
||||
let produced = ctx.downsample(peak.freq_hz, &mut down);
|
||||
let produced = ctx.downsample_with_workspace(
|
||||
peak.freq_hz,
|
||||
&mut workspace.downsample_a,
|
||||
&mut workspace.downsample,
|
||||
);
|
||||
if produced == 0 {
|
||||
continue;
|
||||
}
|
||||
normalize_downsampled(&mut down[..produced], produced);
|
||||
normalize_downsampled(&mut workspace.downsample_a[..produced], produced);
|
||||
|
||||
// Coarse search
|
||||
let mut best_score: f32 = -1.0;
|
||||
@@ -347,10 +419,10 @@ impl Ft2Pipeline {
|
||||
let mut start = -688i32;
|
||||
while start <= 2024 {
|
||||
let score = sync2d_score(
|
||||
&down[..produced],
|
||||
&workspace.downsample_a[..produced],
|
||||
start,
|
||||
idf,
|
||||
&waveforms,
|
||||
&self.waveforms,
|
||||
);
|
||||
if score > best_score {
|
||||
best_score = score;
|
||||
@@ -373,10 +445,10 @@ impl Ft2Pipeline {
|
||||
}
|
||||
for start in (best_start - 5)..=(best_start + 5) {
|
||||
let score = sync2d_score(
|
||||
&down[..produced],
|
||||
&workspace.downsample_a[..produced],
|
||||
start,
|
||||
idf,
|
||||
&waveforms,
|
||||
&self.waveforms,
|
||||
);
|
||||
if score > best_score {
|
||||
best_score = score;
|
||||
@@ -409,17 +481,22 @@ impl Ft2Pipeline {
|
||||
}
|
||||
|
||||
/// Attempt to decode a single scan hit through the full pipeline.
|
||||
fn decode_hit(&self, ctx: &DownsampleContext, hit: &ScanHit) -> Option<Ft2DecodeResult> {
|
||||
let nfft2 = ctx.nfft2();
|
||||
let waveforms = prepare_sync_waveforms();
|
||||
|
||||
fn decode_hit(
|
||||
&self,
|
||||
ctx: &DownsampleContext,
|
||||
hit: &ScanHit,
|
||||
workspace: &mut Ft2DecodeWorkspace,
|
||||
) -> Option<Ft2DecodeResult> {
|
||||
// Initial downsample for sync refinement
|
||||
let mut cd2 = vec![Complex32::new(0.0, 0.0); nfft2];
|
||||
let produced = ctx.downsample(hit.freq_hz, &mut cd2);
|
||||
let produced = ctx.downsample_with_workspace(
|
||||
hit.freq_hz,
|
||||
&mut workspace.downsample_a,
|
||||
&mut workspace.downsample,
|
||||
);
|
||||
if produced == 0 {
|
||||
return None;
|
||||
}
|
||||
normalize_downsampled(&mut cd2[..produced], produced);
|
||||
normalize_downsampled(&mut workspace.downsample_a[..produced], produced);
|
||||
|
||||
// Refine sync
|
||||
let mut best_score: f32 = -1.0;
|
||||
@@ -431,7 +508,12 @@ impl Ft2Pipeline {
|
||||
continue;
|
||||
}
|
||||
for start in (hit.start - 5)..=(hit.start + 5) {
|
||||
let score = sync2d_score(&cd2[..produced], start, idf, &waveforms);
|
||||
let score = sync2d_score(
|
||||
&workspace.downsample_a[..produced],
|
||||
start,
|
||||
idf,
|
||||
&self.waveforms,
|
||||
);
|
||||
if score > best_score {
|
||||
best_score = score;
|
||||
best_start = start;
|
||||
@@ -451,19 +533,25 @@ impl Ft2Pipeline {
|
||||
}
|
||||
|
||||
// Final downsample at corrected frequency
|
||||
let mut cb = vec![Complex32::new(0.0, 0.0); nfft2];
|
||||
let produced2 = ctx.downsample(corrected_freq_hz, &mut cb);
|
||||
let produced2 = ctx.downsample_with_workspace(
|
||||
corrected_freq_hz,
|
||||
&mut workspace.downsample_b,
|
||||
&mut workspace.downsample,
|
||||
);
|
||||
if produced2 == 0 {
|
||||
return None;
|
||||
}
|
||||
normalize_downsampled(&mut cb[..produced2], FT2_FRAME_SAMPLES);
|
||||
normalize_downsampled(&mut workspace.downsample_b[..produced2], FT2_FRAME_SAMPLES);
|
||||
|
||||
// Extract signal region
|
||||
let mut signal = vec![Complex32::new(0.0, 0.0); FT2_FRAME_SAMPLES];
|
||||
extract_signal_region(&cb[..produced2], best_start, &mut signal);
|
||||
extract_signal_region(
|
||||
&workspace.downsample_b[..produced2],
|
||||
best_start,
|
||||
&mut workspace.signal,
|
||||
);
|
||||
|
||||
// Extract bit metrics
|
||||
let bitmetrics = bitmetrics::extract_bitmetrics_raw(&signal)?;
|
||||
let bitmetrics = workspace.bitmetrics.extract(&workspace.signal)?;
|
||||
|
||||
// Sync quality check using known Costas bit patterns
|
||||
let sync_bits_a: [u8; 8] = [0, 0, 0, 1, 1, 0, 1, 1];
|
||||
@@ -472,10 +560,26 @@ impl Ft2Pipeline {
|
||||
let sync_bits_d: [u8; 8] = [1, 0, 1, 1, 0, 0, 0, 1];
|
||||
let mut sync_qual = 0;
|
||||
for i in 0..8 {
|
||||
sync_qual += if (bitmetrics[i][0] >= 0.0) as u8 == sync_bits_a[i] { 1 } else { 0 };
|
||||
sync_qual += if (bitmetrics[66 + i][0] >= 0.0) as u8 == sync_bits_b[i] { 1 } else { 0 };
|
||||
sync_qual += if (bitmetrics[132 + i][0] >= 0.0) as u8 == sync_bits_c[i] { 1 } else { 0 };
|
||||
sync_qual += if (bitmetrics[198 + i][0] >= 0.0) as u8 == sync_bits_d[i] { 1 } else { 0 };
|
||||
sync_qual += if (bitmetrics[i][0] >= 0.0) as u8 == sync_bits_a[i] {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
sync_qual += if (bitmetrics[66 + i][0] >= 0.0) as u8 == sync_bits_b[i] {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
sync_qual += if (bitmetrics[132 + i][0] >= 0.0) as u8 == sync_bits_c[i] {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
sync_qual += if (bitmetrics[198 + i][0] >= 0.0) as u8 == sync_bits_d[i] {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
}
|
||||
if sync_qual < 10 {
|
||||
return None;
|
||||
@@ -591,8 +695,18 @@ impl Ft2Pipeline {
|
||||
}
|
||||
|
||||
// Compute refined timing via parabolic interpolation
|
||||
let sm1 = sync2d_score(&cd2[..produced], best_start - 1, best_idf, &waveforms);
|
||||
let sp1 = sync2d_score(&cd2[..produced], best_start + 1, best_idf, &waveforms);
|
||||
let sm1 = sync2d_score(
|
||||
&workspace.downsample_a[..produced],
|
||||
best_start - 1,
|
||||
best_idf,
|
||||
&self.waveforms,
|
||||
);
|
||||
let sp1 = sync2d_score(
|
||||
&workspace.downsample_a[..produced],
|
||||
best_start + 1,
|
||||
best_idf,
|
||||
&self.waveforms,
|
||||
);
|
||||
let mut xstart = best_start as f32;
|
||||
let den = sm1 - 2.0 * best_score + sp1;
|
||||
if den.abs() > 1e-6 {
|
||||
@@ -635,7 +749,11 @@ fn normalize_downsampled(samples: &mut [Complex32], ref_count: usize) {
|
||||
if power <= 0.0 {
|
||||
return;
|
||||
}
|
||||
let rc = if ref_count == 0 { samples.len() } else { ref_count };
|
||||
let rc = if ref_count == 0 {
|
||||
samples.len()
|
||||
} else {
|
||||
ref_count
|
||||
};
|
||||
let scale = (rc as f32 / power).sqrt();
|
||||
for s in samples.iter_mut() {
|
||||
*s *= scale;
|
||||
@@ -644,14 +762,17 @@ fn normalize_downsampled(samples: &mut [Complex32], ref_count: usize) {
|
||||
|
||||
/// Extract a signal region starting at `start` into `out_signal`.
|
||||
fn extract_signal_region(input: &[Complex32], start: i32, out_signal: &mut [Complex32]) {
|
||||
for i in 0..out_signal.len() {
|
||||
let src = start + i as i32;
|
||||
out_signal[i] = if src >= 0 && (src as usize) < input.len() {
|
||||
input[src as usize]
|
||||
} else {
|
||||
Complex32::new(0.0, 0.0)
|
||||
};
|
||||
out_signal.fill(Complex32::new(0.0, 0.0));
|
||||
|
||||
let src_start = start.max(0) as usize;
|
||||
let dst_start = (-start).max(0) as usize;
|
||||
if dst_start >= out_signal.len() || src_start >= input.len() {
|
||||
return;
|
||||
}
|
||||
|
||||
let copy_len = (input.len() - src_start).min(out_signal.len() - dst_start);
|
||||
out_signal[dst_start..(dst_start + copy_len)]
|
||||
.copy_from_slice(&input[src_start..(src_start + copy_len)]);
|
||||
}
|
||||
|
||||
/// Normalize LLR array (divide by standard deviation).
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
//! reference across time and frequency offsets.
|
||||
|
||||
use num_complex::Complex32;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use crate::constants::FT4_COSTAS_PATTERN;
|
||||
|
||||
@@ -16,6 +17,12 @@ use super::{FT2_NDOWN, FT2_NSS, FT2_SYMBOL_PERIOD_F, FT2_SYNC_TWEAK_MAX, FT2_SYN
|
||||
|
||||
/// Number of frequency tweak entries.
|
||||
const NUM_TWEAKS: usize = (FT2_SYNC_TWEAK_MAX - FT2_SYNC_TWEAK_MIN) as usize + 1;
|
||||
const SYNC_GROUP_COUNT: usize = 4;
|
||||
const SYNC_SAMPLES: usize = 64;
|
||||
const SAMPLE_STRIDE: usize = 2;
|
||||
const GROUP_STRIDE: i32 = 33 * FT2_NSS as i32;
|
||||
const GROUP_LAST_SAMPLE_OFFSET: i32 = SAMPLE_STRIDE as i32 * (SYNC_SAMPLES as i32 - 1);
|
||||
const FRAME_LAST_SAMPLE_OFFSET: i32 = 3 * GROUP_STRIDE + GROUP_LAST_SAMPLE_OFFSET;
|
||||
|
||||
/// Precomputed sync and frequency-tweak waveforms.
|
||||
pub struct SyncWaveforms {
|
||||
@@ -73,6 +80,74 @@ pub fn prepare_sync_waveforms() -> SyncWaveforms {
|
||||
}
|
||||
}
|
||||
|
||||
type SyncReferenceBank = [[[Complex32; SYNC_SAMPLES]; SYNC_GROUP_COUNT]; NUM_TWEAKS];
|
||||
|
||||
fn sync_reference_bank() -> &'static SyncReferenceBank {
|
||||
static REFS: OnceLock<SyncReferenceBank> = OnceLock::new();
|
||||
|
||||
REFS.get_or_init(|| {
|
||||
let waveforms = prepare_sync_waveforms();
|
||||
let mut refs = [[[Complex32::new(0.0, 0.0); SYNC_SAMPLES]; SYNC_GROUP_COUNT]; NUM_TWEAKS];
|
||||
|
||||
for tw_idx in 0..NUM_TWEAKS {
|
||||
for group in 0..SYNC_GROUP_COUNT {
|
||||
for i in 0..SYNC_SAMPLES {
|
||||
refs[tw_idx][group][i] =
|
||||
(waveforms.sync_wave[group][i] * waveforms.tweak_wave[tw_idx][i]).conj();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
refs
|
||||
})
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn correlate_group_fast(
|
||||
samples: &[Complex32],
|
||||
pos: usize,
|
||||
refs: &[Complex32; SYNC_SAMPLES],
|
||||
) -> f32 {
|
||||
let mut sum_re = 0.0f32;
|
||||
let mut sum_im = 0.0f32;
|
||||
|
||||
for i in 0..SYNC_SAMPLES {
|
||||
let sample = samples[pos + i * SAMPLE_STRIDE];
|
||||
let reference = refs[i];
|
||||
sum_re += sample.re * reference.re - sample.im * reference.im;
|
||||
sum_im += sample.re * reference.im + sample.im * reference.re;
|
||||
}
|
||||
|
||||
(sum_re * sum_re + sum_im * sum_im).sqrt()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn correlate_group_clipped(
|
||||
samples: &[Complex32],
|
||||
pos: i32,
|
||||
refs: &[Complex32; SYNC_SAMPLES],
|
||||
) -> (f32, usize) {
|
||||
let mut sum_re = 0.0f32;
|
||||
let mut sum_im = 0.0f32;
|
||||
let mut usable = 0usize;
|
||||
let n_samples = samples.len() as i32;
|
||||
|
||||
for i in 0..SYNC_SAMPLES {
|
||||
let sample_idx = pos + i as i32 * SAMPLE_STRIDE as i32;
|
||||
if sample_idx < 0 || sample_idx >= n_samples {
|
||||
continue;
|
||||
}
|
||||
|
||||
let sample = samples[sample_idx as usize];
|
||||
let reference = refs[i];
|
||||
sum_re += sample.re * reference.re - sample.im * reference.im;
|
||||
sum_im += sample.re * reference.im + sample.im * reference.re;
|
||||
usable += 1;
|
||||
}
|
||||
|
||||
((sum_re * sum_re + sum_im * sum_im).sqrt(), usable)
|
||||
}
|
||||
|
||||
/// Compute the 2D sync score for a given time offset and frequency tweak.
|
||||
///
|
||||
/// Correlates the downsampled complex samples against the four Costas sync
|
||||
@@ -88,46 +163,36 @@ pub fn sync2d_score(
|
||||
samples: &[Complex32],
|
||||
start: i32,
|
||||
idf: i32,
|
||||
waveforms: &SyncWaveforms,
|
||||
_waveforms: &SyncWaveforms,
|
||||
) -> f32 {
|
||||
let nss = FT2_NSS as i32;
|
||||
let n_samples = samples.len() as i32;
|
||||
|
||||
// The four sync groups are at symbol positions 0, 33, 66, 99 within the frame
|
||||
let positions = [
|
||||
start,
|
||||
start + 33 * nss,
|
||||
start + 66 * nss,
|
||||
start + 99 * nss,
|
||||
];
|
||||
|
||||
let tw_idx = (idf - FT2_SYNC_TWEAK_MIN) as usize;
|
||||
if tw_idx >= waveforms.tweak_wave.len() {
|
||||
if tw_idx >= NUM_TWEAKS {
|
||||
return 0.0;
|
||||
}
|
||||
let tweak = &waveforms.tweak_wave[tw_idx];
|
||||
|
||||
let refs = &sync_reference_bank()[tw_idx];
|
||||
let scale = 1.0 / (2.0 * FT2_NSS as f32);
|
||||
|
||||
let mut score = 0.0f32;
|
||||
|
||||
for group in 0..4 {
|
||||
let pos = positions[group];
|
||||
let mut sum = Complex32::new(0.0, 0.0);
|
||||
let mut usable = 0;
|
||||
if start >= 0 && start + FRAME_LAST_SAMPLE_OFFSET < n_samples {
|
||||
for (group, refs_group) in refs.iter().enumerate() {
|
||||
let pos = (start + group as i32 * GROUP_STRIDE) as usize;
|
||||
score += correlate_group_fast(samples, pos, refs_group) * scale;
|
||||
}
|
||||
return score;
|
||||
}
|
||||
|
||||
for i in 0..64 {
|
||||
let sample_idx = pos + 2 * i as i32;
|
||||
if sample_idx < 0 || sample_idx >= n_samples {
|
||||
for (group, refs_group) in refs.iter().enumerate() {
|
||||
let pos = start + group as i32 * GROUP_STRIDE;
|
||||
if pos >= n_samples || pos + GROUP_LAST_SAMPLE_OFFSET < 0 {
|
||||
continue;
|
||||
}
|
||||
// Correlate: multiply received sample by conjugate of
|
||||
// (sync_reference * tweak_phasor)
|
||||
let reference = waveforms.sync_wave[group][i] * tweak[i];
|
||||
sum += samples[sample_idx as usize] * reference.conj();
|
||||
usable += 1;
|
||||
}
|
||||
|
||||
let (corr, usable) = correlate_group_clipped(samples, pos, refs_group);
|
||||
if usable > 16 {
|
||||
score += sum.norm() / (2.0 * FT2_NSS as f32);
|
||||
score += corr * scale;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user