diff options
Diffstat (limited to 'src/kernel/ao_fec_rx.c')
-rw-r--r-- | src/kernel/ao_fec_rx.c | 318 |
1 files changed, 318 insertions, 0 deletions
diff --git a/src/kernel/ao_fec_rx.c b/src/kernel/ao_fec_rx.c new file mode 100644 index 00000000..c4f5559a --- /dev/null +++ b/src/kernel/ao_fec_rx.c @@ -0,0 +1,318 @@ +/* + * Copyright © 2012 Keith Packard <keithp@keithp.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <ao_fec.h> +#include <stdio.h> + +#ifdef TELEMEGA +#include <ao.h> +#endif + +#if AO_PROFILE +#include <ao_profile.h> + +uint32_t ao_fec_decode_start, ao_fec_decode_end; +#endif + +/* + * byte order repeats through 3 2 1 0 + * + * bit-pair order repeats through + * + * 1/0 3/2 5/4 7/6 + * + * So, the over all order is: + * + * 3,1/0 2,1/0 1,1/0 0,1/0 + * 3,3/2 2,3/2 1,3/2 0,3/2 + * 3,5/4 2,5/4 1,5/4 0,5/4 + * 3,7/6 2,7/6 1,7/6 0,7/6 + * + * The raw bit order is thus + * + * 1e/1f 16/17 0e/0f 06/07 + * 1c/1d 14/15 0c/0d 04/05 + * 1a/1b 12/13 0a/0b 02/03 + * 18/19 10/11 08/09 00/01 + */ + +static const uint8_t ao_interleave_order[] = { + 0x1e, 0x16, 0x0e, 0x06, + 0x1c, 0x14, 0x0c, 0x04, + 0x1a, 0x12, 0x0a, 0x02, + 0x18, 0x10, 0x08, 0x00 +}; + +static inline uint16_t ao_interleave_index(uint16_t i) { + return (i & ~0x1e) | ao_interleave_order[(i & 0x1e) >> 1]; +} + +#define NUM_STATE 8 +#define NUM_HIST 24 + +typedef uint32_t bits_t; + +#define V_0 0xff +#define V_1 0x00 + +/* + * These are just the 'zero' states; the 'one' states mirror them + */ +static const uint8_t ao_fec_decode_table[NUM_STATE*2] = { + V_0, V_0, /* 000 */ + V_0, V_1, /* 001 */ + V_1, V_1, /* 010 */ + V_1, V_0, /* 011 */ + V_1, V_1, /* 100 */ + V_1, V_0, /* 101 */ + V_0, V_0, /* 110 */ + V_0, V_1 /* 111 */ +}; + +static inline uint8_t +ao_next_state(uint8_t state, uint8_t bit) +{ + return ((state << 1) | bit) & 0x7; +} + +/* + * 'in' is 8-bits per symbol soft decision data + * 'len' is input byte length. 'out' must be + * 'len'/16 bytes long + */ + +uint8_t +ao_fec_decode(const uint8_t *in, uint16_t len, uint8_t *out, uint8_t out_len, uint16_t (*callback)(void)) +{ + static uint32_t cost[2][NUM_STATE]; /* path cost */ + static bits_t bits[2][NUM_STATE]; /* save bits to quickly output them */ + + uint16_t i; /* input byte index */ + uint16_t b; /* encoded symbol index (bytes/2) */ + uint16_t o; /* output bit index */ + uint8_t p; /* previous cost/bits index */ + uint8_t n; /* next cost/bits index */ + uint8_t state; /* state index */ + const uint8_t *whiten = ao_fec_whiten_table; + uint16_t interleave; /* input byte array index */ + uint8_t s0, s1; + uint16_t avail; + uint16_t crc = AO_FEC_CRC_INIT; +#if AO_PROFILE + uint32_t start_tick; +#endif + + p = 0; + for (state = 0; state < NUM_STATE; state++) { + cost[0][state] = 0x7fffffff; + bits[0][state] = 0; + } + cost[0][0] = 0; + + if (callback) + avail = 0; + else + avail = len; + +#if AO_PROFILE + if (!avail) { + avail = callback(); + if (!avail) + return 0; + } + start_tick = ao_profile_tick(); +#endif + o = 0; + for (i = 0; i < len; i += 2) { + b = i/2; + n = p ^ 1; + + if (!avail) { + avail = callback(); + if (!avail) + return 0; + } + + /* Fetch one pair of input bytes, de-interleaving + * the input. + */ + interleave = ao_interleave_index(i); + s0 = in[interleave]; + s1 = in[interleave+1]; + + avail -= 2; + + /* Compute path costs and accumulate output bit path + * for each state and encoded bit value. Unrolling + * this loop is worth about > 30% performance boost. + * Decoding 76-byte remote access packets is reduced + * from 14.700ms to 9.3ms. Redoing the loop to + * directly compare the two pasts for each future state + * reduces this down to 5.7ms + */ + + /* Ok, of course this is tricky, it's optimized. + * + * First, it's important to realize that we have 8 + * states representing the combinations of the three + * most recent bits from the encoder. Flipping any + * of these three bits flips both output bits. + * + * 'state<<1' represents the target state for a new + * bit value of 0. '(state<<1)+1' represents the + * target state for a new bit value of 1. + * + * 'state' is the previous state with an oldest bit + * value of 0. 'state + 4' is the previous state with + * an oldest bit value of 1. These two states will + * either lead to 'state<<1' or '(state<<1)+1', depending + * on whether the next encoded bit was a zero or a one. + * + * m0 and m1 are the cost of coming to 'state<<1' from + * one of the two possible previous states 'state' and + * 'state + 4'. + * + * Because we know the expected values of each + * received bit are flipped between these two previous + * states: + * + * bitcost(state+4) = 510 - bitcost(state) + * + * With those two total costs in hand, we then pick + * the lower as the cost of the 'state<<1', and compute + * the path of bits leading to that state. + * + * Then, do the same for '(state<<1) + 1'. This time, + * instead of computing the m0 and m1 values from + * scratch, because the only difference is that we're + * expecting a one bit instead of a zero bit, we just + * flip the bitcost values around to match the + * expected transmitted bits with some tricky + * arithmetic which is equivalent to: + * + * m0 = cost[p][state] + (510 - bitcost); + * m1 = cost[p][state+4] + bitcost + * + * Then, the lowest cost and bit trace of the new state + * is saved. + */ + +#define DO_STATE(state) { \ + uint32_t bitcost; \ + \ + uint32_t m0; \ + uint32_t m1; \ + uint32_t bit; \ + \ + bitcost = ((uint32_t) (s0 ^ ao_fec_decode_table[(state<<1)]) + \ + (uint32_t) (s1 ^ ao_fec_decode_table[(state<<1)|1])); \ + \ + m0 = cost[p][state] + bitcost; \ + m1 = cost[p][state+4] + (510 - bitcost); \ + bit = m0 > m1; \ + cost[n][state<<1] = bit ? m1 : m0; \ + bits[n][state<<1] = (bits[p][state + (bit<<2)] << 1) | (state&1); \ + \ + m0 -= (bitcost+bitcost-510); \ + m1 += (bitcost+bitcost-510); \ + bit = m0 > m1; \ + cost[n][(state<<1)+1] = bit ? m1 : m0; \ + bits[n][(state<<1)+1] = (bits[p][state + (bit<<2)] << 1) | (state&1); \ + } + + DO_STATE(0); + DO_STATE(1); + DO_STATE(2); + DO_STATE(3); + +#if 0 + printf ("bit %3d symbol %2x %2x:", i/2, s0, s1); + for (state = 0; state < NUM_STATE; state++) { + printf (" %8u(%08x)", cost[n][state], bits[n][state]); + } + printf ("\n"); +#endif + p = n; + + /* A loop is needed to handle the last output byte. It + * won't have any bits of future data to perform full + * error correction, but we might as well give the + * best possible answer anyways. + */ + while ((b - o) >= (8 + NUM_HIST) || (i + 2 >= len && b > o)) { + + /* Compute number of bits to the end of the + * last full byte of data. This is generally + * NUM_HIST, unless we've reached + * the end of the input, in which case + * it will be seven. + */ + int8_t dist = b - (o + 8); /* distance to last ready-for-writing bit */ + uint32_t min_cost; /* lowest cost */ + uint8_t min_state; /* lowest cost state */ + uint8_t byte; + + /* Find the best fit at the current point + * of the decode. + */ + min_cost = cost[p][0]; + min_state = 0; + for (state = 1; state < NUM_STATE; state++) { + if (cost[p][state] < min_cost) { + min_cost = cost[p][state]; + min_state = state; + } + } + + /* The very last byte of data has the very last bit + * of data left in the state value; just smash the + * bits value in place and reset the 'dist' from + * -1 to 0 so that the full byte is read out + */ + if (dist < 0) { + bits[p][min_state] = (bits[p][min_state] << 1) | (min_state & 1); + dist = 0; + } + +#if 0 + printf ("\tbit %3d min_cost %5d old bit %3d old_state %x bits %02x whiten %0x\n", + i/2, min_cost, o + 8, min_state, (bits[p][min_state] >> dist) & 0xff, *whiten); +#endif + byte = (bits[p][min_state] >> dist) ^ *whiten++; + *out++ = byte; + if (out_len > 2) + crc = ao_fec_crc_byte(byte, crc); + + if (!--out_len) { + if ((out[-2] == (uint8_t) (crc >> 8)) && + out[-1] == (uint8_t) crc) + out[-1] = AO_FEC_DECODE_CRC_OK; + else + out[-1] = 0; + out[-2] = 0; + goto done; + } + o += 8; + } + } +done: +#if AO_PROFILE + ao_fec_decode_start = start_tick; + ao_fec_decode_end = ao_profile_tick(); +#endif + return 1; +} |