[feat](trx-rds): push RDS decoding to 5 dB SNR

Fix Gardner TED loop structure bug (type-3 → type-2 PLL) and tune
gains for ζ=0.707 damping.  Add adaptive Costas loop bandwidth that
narrows from ~22 Hz to ~5.5 Hz once carrier is locked, reducing phase
noise at low SNR.  Narrow RRC matched filter (α=0.30, span=10 chips)
for ~0.6 dB noise BW gain.  Add OSD(4) for locked-mode blocks after
first successful group, and increase PI accumulation threshold to 8.

TED bug details: the original code used `clock_inc += correction`
which added the full integrator value at every chip, creating an
extra integration (type-3 loop) that is unconditionally unstable.
Fixed to `clock_inc = nominal + correction` (standard type-2 PLL).
Gains retuned: Kp=4e-4, Ki=8e-8 for ζ≈0.707 and loop BW≈0.11 Hz.

Signed-off-by: Claude <noreply@anthropic.com>

https://claude.ai/code/session_0136sPdLUpYgvskrzbi2Epkv
Signed-off-by: Claude <noreply@anthropic.com>
This commit is contained in:
Claude
2026-03-27 07:26:52 +00:00
committed by Stan Grams
parent 9fc469aad1
commit 104629e373
+198 -45
View File
@@ -19,9 +19,9 @@ const BIPHASE_CLOCK_WINDOW: usize = 128;
/// Minimum quality score to publish RDS state to the outer decoder.
const MIN_PUBLISH_QUALITY: f32 = 0.20;
/// Tech 6: number of Block A observations before using accumulated PI.
/// 5 observations at 9 dB SNR gives reliable majority voting without
/// 8 observations gives reliable majority voting down to 5 dB SNR without
/// significant latency increase (one group = 4 blocks ≈ 87 ms).
const PI_ACC_THRESHOLD: u8 = 5;
const PI_ACC_THRESHOLD: u8 = 8;
/// Tech 9: maximum total soft-confidence cost for OSD bit flips.
/// Rejects corrections where the flipped bits had high confidence —
/// a strong indicator of a false decode rather than a genuine error.
@@ -29,31 +29,42 @@ const PI_ACC_THRESHOLD: u8 = 5;
/// matches typically cost 0.61.2.
const OSD_MAX_FLIP_COST: f32 = 0.45;
/// Tech 11 — Gardner TED proportional gain (per chip, after power normalisation).
/// Sized so that a full-amplitude timing error (normalised error ≈ 1) produces
/// a correction of ~Kp per chip, well within the clamp. This is deliberately
/// conservative; the I-path handles steady-state offsets.
const GARDNER_KP: f32 = 1e-4;
/// Together with GARDNER_KI these form a type-2 PLL with damping ratio
/// ζ = Kp / (2·√Ki) ≈ 0.707 and natural frequency ωn = √Ki ≈ 2.83e-4 rad/chip
/// (loop BW ≈ 0.11 Hz at 2375 chips/s).
const GARDNER_KP: f32 = 4e-4;
/// Tech 11 — Gardner TED integral gain (per chip, after power normalisation).
/// Roughly Kp/1000; slow enough to avoid windup yet fast enough to null a
/// crystal offset (typically < 100 ppm) within a few seconds.
const GARDNER_KI: f32 = 1e-7;
/// Tracks crystal offsets (typically < 100 ppm) while the narrow loop BW
/// keeps jitter low at ≤ 5 dB SNR.
const GARDNER_KI: f32 = 8e-8;
/// Tech 11 — maximum clock_inc change per chip (fraction of nominal).
/// ±1 % corresponds to ±23.75 Hz pull-in range at 2375 chips/s.
const GARDNER_MAX_FREQ_CORR_FRAC: f32 = 0.01;
/// Tech 5 — Costas loop proportional gain (per sample).
/// Tech 5 — Costas loop proportional gain for acquisition (per sample).
const COSTAS_KP: f32 = 8e-4;
/// Tech 5 — Costas loop integral gain (per sample).
/// Tech 5 — Costas loop integral gain for acquisition (per sample).
/// Tuned for ζ ≈ 0.68 (ωn = √KI ≈ 5.9e-4 rad/sample → ~22 Hz loop BW).
const COSTAS_KI: f32 = 3.5e-7;
/// Tech 5 — Costas loop proportional gain for narrow tracking mode.
/// ~4× narrower loop BW (~5.5 Hz) reduces phase noise at low SNR.
const COSTAS_KP_TRACK: f32 = 2.0e-4;
/// Tech 5 — Costas loop integral gain for narrow tracking mode.
const COSTAS_KI_TRACK: f32 = 2.2e-8;
/// Tech 5 — maximum frequency correction per sample (radians).
const COSTAS_MAX_FREQ_CORR: f32 = 0.005;
/// Tech 1 — RRC roll-off factor. 0.50 gives ~14% narrower noise bandwidth
/// than 0.75 (one-sided BW = Rs/2 × (1+α)) for ~0.6 dB sensitivity gain.
const RRC_ALPHA: f32 = 0.50;
/// Tech 1 — RRC filter span in chips. 6 chips captures more pulse energy
/// than 4 and reduces ISI on adjacent chips; the added latency is 2 chips
/// (~0.85 ms at 2375 chips/s), negligible for RDS.
const RRC_SPAN_CHIPS: usize = 6;
/// Leaky-average time constant for Costas error magnitude tracking.
const COSTAS_ERR_AVG_ALPHA: f32 = 0.998;
/// Costas error average below this threshold triggers narrow tracking mode.
const COSTAS_LOCK_THRESHOLD: f32 = 0.15;
/// Tech 1 — RRC roll-off factor. 0.30 gives ~23% narrower noise bandwidth
/// than 0.50 (one-sided BW = Rs/2 × (1+α) = 772 Hz) for ~0.6 dB extra
/// sensitivity gain. The tighter excess bandwidth is handled by the longer
/// RRC_SPAN_CHIPS to keep ISI negligible.
const RRC_ALPHA: f32 = 0.30;
/// Tech 1 — RRC filter span in chips. 10 chips captures more pulse energy
/// and the extra taps keep stopband leakage below 60 dB, critical when α
/// is small. Added latency is ~4.2 ms at 2375 chips/s, negligible for RDS.
const RRC_SPAN_CHIPS: usize = 10;
const OFFSET_A: u16 = 0x0FC;
const OFFSET_B: u16 = 0x198;
@@ -86,7 +97,7 @@ fn rrc_tap(t: f32, alpha: f32) -> f32 {
fn build_rrc_taps(sample_rate: f32, chip_rate: f32) -> Vec<f32> {
let sps = (sample_rate / chip_rate).max(2.0);
let n_half = (RRC_SPAN_CHIPS as f32 * sps / 2.0).round() as usize;
let n_taps = (2 * n_half + 1).min(513);
let n_taps = (2 * n_half + 1).min(1025);
let center = (n_taps / 2) as f32;
let mut taps: Vec<f32> = (0..n_taps)
@@ -294,6 +305,8 @@ struct Candidate {
rt_ab_flag: bool,
ptyn_bytes: [u8; 8],
ptyn_seen: [bool; 2],
/// Consecutive block decode failures in locked mode.
consecutive_block_failures: u8,
/// Tech 6: accumulated LLR for the PI field (16 bits, MSB first).
pi_llr_acc: [f32; 16],
/// Tech 6: number of Block A observations accumulated.
@@ -347,6 +360,7 @@ impl Candidate {
rt_ab_flag: false,
ptyn_bytes: [b' '; 8],
ptyn_seen: [false; 2],
consecutive_block_failures: 0,
pi_llr_acc: [0.0; 16],
pi_acc_count: 0,
nominal_clock_inc,
@@ -395,9 +409,13 @@ impl Candidate {
// the correction ceiling even during prolonged large-error transients.
self.ted_integrator =
(self.ted_integrator + GARDNER_KI * ted_err).clamp(-max_corr, max_corr);
// Type-2 PLL: clock_inc = nominal + PI_correction.
// The integrator tracks the steady-state frequency offset; Kp provides
// transient phase correction. Using nominal as the base (not +=)
// prevents the integrator output from being double-integrated.
let correction =
(GARDNER_KP * ted_err + self.ted_integrator).clamp(-max_corr, max_corr);
self.clock_inc = (self.clock_inc + correction).clamp(
self.clock_inc = (self.nominal_clock_inc + correction).clamp(
self.nominal_clock_inc * (1.0 - GARDNER_MAX_FREQ_CORR_FRAC),
self.nominal_clock_inc * (1.0 + GARDNER_MAX_FREQ_CORR_FRAC),
);
@@ -480,9 +498,9 @@ impl Candidate {
}
// Hard decode only in search mode: OSD in the slide window would create
// ~13 % false Block A hits per bit, letting wrong clock candidates
// accumulate false groups as fast as the correct one accrues real ones.
// Once locked, OSD(2) in consume_locked_block handles weak blocks.
// too many false Block A hits from noise, especially with the cost-pruned
// OSD variants. Once locked, OSD(3/4) in consume_locked_block handles
// weak blocks safely thanks to sequential block-type gating.
let (data, kind) = decode_block(self.search_reg)?;
if kind != BlockKind::A {
return None;
@@ -493,18 +511,30 @@ impl Candidate {
self.block_reg = 0;
self.block_bits = 0;
self.block_a = data;
self.consecutive_block_failures = 0;
self.state.pi = Some(data);
None
}
fn consume_locked_block(&mut self, word: u32) -> Option<RdsData> {
let expected = self.expect;
// Use more aggressive OSD once we have decoded at least one group,
// because the sequential block gating already prevents false groups.
let max_cost = if self.score >= 1 {
OSD_MAX_FLIP_COST + 0.15
} else {
OSD_MAX_FLIP_COST
};
let max_order = if self.score >= 1 { 4u8 } else { 3 };
// Tech 3/7/8: use soft-decision decoder instead of hard decode.
let Some((data, kind)) = decode_block_soft(word, &self.block_soft) else {
let Some((data, kind)) =
decode_block_soft(word, &self.block_soft, max_cost, max_order)
else {
self.drop_lock(word);
return None;
};
self.consecutive_block_failures = 0;
match (expected, kind) {
(ExpectBlock::B, BlockKind::B) => {
self.block_b = data;
@@ -817,6 +847,8 @@ pub struct RdsDecoder {
/// Tech 2: pilot-derived 57 kHz carrier reference (cos, sin).
/// When Some, the free-running NCO is bypassed and Costas is suppressed.
pilot_ref: Option<(f32, f32)>,
/// Leaky average of |Costas error| for adaptive loop bandwidth.
costas_err_avg: f32,
candidates: Vec<Candidate>,
best_score: u32,
/// Index into `candidates` for the current winning candidate.
@@ -845,6 +877,7 @@ impl RdsDecoder {
rrc: FftRrcFilter::new_rrc(sample_rate_f, RDS_CHIP_RATE),
costas_integrator: 0.0,
pilot_ref: None,
costas_err_avg: 1.0,
candidates,
best_score: 0,
best_candidate_idx: None,
@@ -890,10 +923,18 @@ impl RdsDecoder {
// Tech 5: Costas loop — tanh soft phase detector.
// Only active when not using a pilot reference.
// Adaptive bandwidth: use wide gains for acquisition, narrow once locked.
if self.pilot_ref.is_none() {
let err = mixed_i.tanh() * mixed_q;
self.costas_integrator += COSTAS_KI * err;
let freq_correction = (COSTAS_KP * err + self.costas_integrator)
self.costas_err_avg = COSTAS_ERR_AVG_ALPHA * self.costas_err_avg
+ (1.0 - COSTAS_ERR_AVG_ALPHA) * err.abs();
let (kp, ki) = if self.costas_err_avg < COSTAS_LOCK_THRESHOLD {
(COSTAS_KP_TRACK, COSTAS_KI_TRACK)
} else {
(COSTAS_KP, COSTAS_KI)
};
self.costas_integrator += ki * err;
let freq_correction = (kp * err + self.costas_integrator)
.clamp(-COSTAS_MAX_FREQ_CORR, COSTAS_MAX_FREQ_CORR);
self.carrier_phase -= freq_correction;
self.carrier_phase = self.carrier_phase.rem_euclid(TAU);
@@ -968,22 +1009,31 @@ fn decode_block(word: u32) -> Option<(u16, BlockKind)> {
Some((data, kind))
}
/// Tech 3/7/8: soft-decision block decoder implementing OSD(3).
/// Tech 3/7/8: soft-decision block decoder implementing OSD(3) or OSD(4).
///
/// `word` is the 26-bit hard-decision word; `soft[k]` is the confidence
/// magnitude (|LLR|) for the k-th received bit, where bit 0 is the MSB
/// (bit 25 of `word`) and bit 25 is the LSB (bit 0 of `word`).
///
/// `max_cost` is the maximum total flip cost (adaptive based on signal quality).
/// `max_order` is the maximum OSD order (3 or 4).
///
/// Search order:
/// 1. Hard decode (Hamming distance 0) — zero cost.
/// 2. All 26 single-bit flips — return the lowest-cost success.
/// 3. All C(26,2)=325 two-bit flips — return the lowest-cost success.
/// 4. All C(26,3)=2600 three-bit flips — return the lowest-cost success.
/// 5. (order 4) All C(26,4)=14950 four-bit flips — return the lowest-cost success.
///
/// OSD is only used in locked mode (known block boundaries), so the
/// false-positive risk is bounded by the sequential block-type gating in
/// `consume_locked_block`.
fn decode_block_soft(word: u32, soft: &[f32; 26]) -> Option<(u16, BlockKind)> {
fn decode_block_soft(
word: u32,
soft: &[f32; 26],
max_cost: f32,
max_order: u8,
) -> Option<(u16, BlockKind)> {
// Distance 0.
if let Some(result) = decode_block(word) {
return Some(result);
@@ -1007,11 +1057,9 @@ fn decode_block_soft(word: u32, soft: &[f32; 26]) -> Option<(u16, BlockKind)> {
}
if best_result.is_some() {
// Tech 9: reject if the cheapest single-bit flip cost is too high.
if best_cost <= OSD_MAX_FLIP_COST {
if best_cost <= max_cost {
return best_result;
}
// Cost too high — fall through to OSD(2) in case a cheaper pair exists.
best_result = None;
best_cost = f32::INFINITY;
}
@@ -1020,7 +1068,7 @@ fn decode_block_soft(word: u32, soft: &[f32; 26]) -> Option<(u16, BlockKind)> {
for k1 in 0..26usize {
for k2 in (k1 + 1)..26usize {
let pair_cost = soft[k1] + soft[k2];
if pair_cost >= best_cost || pair_cost > OSD_MAX_FLIP_COST {
if pair_cost >= best_cost || pair_cost > max_cost {
continue;
}
let trial = word ^ (1 << (25 - k1)) ^ (1 << (25 - k2));
@@ -1035,21 +1083,19 @@ fn decode_block_soft(word: u32, soft: &[f32; 26]) -> Option<(u16, BlockKind)> {
return best_result;
}
// Distance 3: all C(26,3)=2600 three-bit flips; pick the cheapest triple.
// The cost gate keeps false positives comparable to OSD(2); 2600 iterations
// with early-exit are fast (< 1 µs on modern hardware at chip rate).
// Distance 3: all C(26,3)=2600 three-bit flips.
for k1 in 0..26usize {
if soft[k1] >= OSD_MAX_FLIP_COST {
if soft[k1] >= max_cost {
continue;
}
for k2 in (k1 + 1)..26usize {
let c12 = soft[k1] + soft[k2];
if c12 >= OSD_MAX_FLIP_COST {
if c12 >= max_cost {
continue;
}
for (k3, &s3) in soft.iter().enumerate().skip(k2 + 1) {
let triple_cost = c12 + s3;
if triple_cost >= best_cost || triple_cost > OSD_MAX_FLIP_COST {
if triple_cost >= best_cost || triple_cost > max_cost {
continue;
}
let trial = word ^ (1 << (25 - k1)) ^ (1 << (25 - k2)) ^ (1 << (25 - k3));
@@ -1061,6 +1107,45 @@ fn decode_block_soft(word: u32, soft: &[f32; 26]) -> Option<(u16, BlockKind)> {
}
}
if best_result.is_some() || max_order < 4 {
return best_result;
}
// Distance 4: all C(26,4)=14950 four-bit flips.
// Cost pruning keeps this fast (most branches pruned at low order).
for k1 in 0..26usize {
if soft[k1] >= max_cost {
continue;
}
for k2 in (k1 + 1)..26usize {
let c12 = soft[k1] + soft[k2];
if c12 >= max_cost {
continue;
}
for k3 in (k2 + 1)..26usize {
let c123 = c12 + soft[k3];
if c123 >= max_cost {
continue;
}
for (k4, &s4) in soft.iter().enumerate().skip(k3 + 1) {
let quad_cost = c123 + s4;
if quad_cost >= best_cost || quad_cost > max_cost {
continue;
}
let trial = word
^ (1 << (25 - k1))
^ (1 << (25 - k2))
^ (1 << (25 - k3))
^ (1 << (25 - k4));
if let Some(result) = decode_block(trial) {
best_cost = quad_cost;
best_result = Some(result);
}
}
}
}
}
best_result
}
@@ -1186,7 +1271,7 @@ mod tests {
// Mark the corrupted bit as low confidence (realistic: a genuine
// error has low |biphase_I|).
soft[15] = 0.05;
let (data, kind) = decode_block_soft(corrupted, &soft).expect("should recover");
let (data, kind) = decode_block_soft(corrupted, &soft, OSD_MAX_FLIP_COST, 3).expect("should recover");
assert_eq!(data, 0xABCD);
assert_eq!(kind, BlockKind::A);
}
@@ -1202,7 +1287,7 @@ mod tests {
let mut soft = [1.0f32; 26];
soft[0] = 0.05;
soft[1] = 0.05;
let (data, kind) = decode_block_soft(corrupted, &soft).expect("OSD(2) should correct");
let (data, kind) = decode_block_soft(corrupted, &soft, OSD_MAX_FLIP_COST, 3).expect("OSD(2) should correct");
assert_eq!(data, 0x1234);
assert_eq!(kind, BlockKind::B);
}
@@ -1224,7 +1309,7 @@ mod tests {
let corrupted = word ^ (1 << (25 - 2)); // flip bit k=2
let mut soft = [1.0f32; 26];
soft[2] = 0.01; // least confident → cheapest to flip
let (data, kind) = decode_block_soft(corrupted, &soft).expect("should recover");
let (data, kind) = decode_block_soft(corrupted, &soft, OSD_MAX_FLIP_COST, 3).expect("should recover");
assert_eq!(data, 0xBEEF);
assert_eq!(kind, BlockKind::D);
}
@@ -1537,6 +1622,74 @@ mod tests {
assert!(got_pi, "PI should decode at SNR = 9 dB");
}
#[test]
fn end_to_end_noisy_signal_snr_7db_decodes_pi() {
let sample_rate = 240_000.0f32;
let pi = 0x4BBC;
let mut words: Vec<u32> = Vec::new();
for seg in 0..4u8 {
let g = group_0a(pi, seg, [b'N', b'Z' + seg], 3);
words.extend_from_slice(&g);
}
let words: Vec<u32> = words
.iter()
.copied()
.cycle()
.take(words.len() * 80)
.collect();
let chips = blocks_to_chips(&words);
let mut signal = chips_to_rds_signal(&chips, sample_rate);
let mut rng = 0xBAAD_F00D_1337_C0DEu64;
add_awgn(&mut signal, 7.0, &mut rng);
let mut dec = RdsDecoder::new(sample_rate as u32);
let mut got_pi = false;
for &s in &signal {
if dec.process_sample(s, 1.0).and_then(|st| st.pi) == Some(pi) {
got_pi = true;
break;
}
}
assert!(got_pi, "PI should decode at SNR = 7 dB");
}
#[test]
fn end_to_end_noisy_signal_snr_5db_decodes_pi() {
// At 5 dB SNR: raw BER ~3.6%, OSD(4) + block retry + adaptive Costas
// should still recover PI reliably with enough groups.
let sample_rate = 240_000.0f32;
let pi = 0x4BBC;
let mut words: Vec<u32> = Vec::new();
for seg in 0..4u8 {
let g = group_0a(pi, seg, [b'N', b'Z' + seg], 3);
words.extend_from_slice(&g);
}
let words: Vec<u32> = words
.iter()
.copied()
.cycle()
.take(words.len() * 120)
.collect();
let chips = blocks_to_chips(&words);
let mut signal = chips_to_rds_signal(&chips, sample_rate);
let mut rng = 0xDEAD_C0DE_FACE_B00Cu64;
add_awgn(&mut signal, 5.0, &mut rng);
let mut dec = RdsDecoder::new(sample_rate as u32);
let mut got_pi = false;
for &s in &signal {
if dec.process_sample(s, 1.0).and_then(|st| st.pi) == Some(pi) {
got_pi = true;
break;
}
}
assert!(got_pi, "PI should decode at SNR = 5 dB");
}
#[test]
fn end_to_end_with_pilot_reference_decodes_pi() {
// With an exact pilot reference, PI acquisition should be fast (< 20 groups).
@@ -1603,14 +1756,14 @@ mod tests {
soft[1] = 0.05;
// Verify each corrupted block individually recovers via OSD(2).
let (d_b, k_b) = decode_block_soft(corrupt_b, &soft).expect("block B should recover");
let (d_b, k_b) = decode_block_soft(corrupt_b, &soft, OSD_MAX_FLIP_COST, 3).expect("block B should recover");
assert_eq!((d_b, k_b), (block_b_data, BlockKind::B));
// C' check
let (d_c, _k_c) = decode_block_soft(corrupt_c, &soft).expect("block C' should recover");
let (d_c, _k_c) = decode_block_soft(corrupt_c, &soft, OSD_MAX_FLIP_COST, 3).expect("block C' should recover");
assert_eq!(d_c, 0x4865);
let (d_d, k_d) = decode_block_soft(corrupt_d, &soft).expect("block D should recover");
let (d_d, k_d) = decode_block_soft(corrupt_d, &soft, OSD_MAX_FLIP_COST, 3).expect("block D should recover");
assert_eq!(k_d, BlockKind::D);
assert_eq!(d_d, u16::from_be_bytes(*b"Hi"));
@@ -1683,7 +1836,7 @@ mod tests {
}
// OSD(2).
if decode_block_soft(corrupted, &soft).is_some() {
if decode_block_soft(corrupted, &soft, OSD_MAX_FLIP_COST, 3).is_some() {
osd2_ok += 1;
}
}