--- a52dec-0.7.4.old/configure.in +++ a52dec-0.7.4/configure.in @@ -34,11 +34,11 @@ case "$host" in i?86-* | k?-*) case "$host" in - i386-*) OPT_CFLAGS="$CFLAGS -mcpu=i386";; - i486-*) OPT_CFLAGS="$CFLAGS -mcpu=i486";; - i586-*) OPT_CFLAGS="$CFLAGS -mcpu=pentium";; - i686-*) OPT_CFLAGS="$CFLAGS -mcpu=pentiumpro";; - k6-*) OPT_CFLAGS="$CFLAGS -mcpu=k6";; + i386-*) OPT_CFLAGS="$CFLAGS -mtune=i386";; + i486-*) OPT_CFLAGS="$CFLAGS -mtune=i486";; + i586-*) OPT_CFLAGS="$CFLAGS -mtune=pentium";; + i686-*) OPT_CFLAGS="$CFLAGS -mtune=pentiumpro";; + k6-*) OPT_CFLAGS="$CFLAGS -mtune=k6";; esac AC_TRY_CFLAGS([$OPT_CFLAGS],[CFLAGS=$OPT_CFLAGS]);; sparc-* | sparc64-*) --- a52dec-0.7.4.old/include/a52.h +++ a52dec-0.7.4/include/a52.h @@ -30,7 +30,90 @@ typedef double sample_t; #endif -typedef struct a52_state_s a52_state_t; +typedef struct { + uint8_t bai; /* fine SNR offset, fast gain */ + uint8_t deltbae; /* delta bit allocation exists */ + int8_t deltba[50]; /* per-band delta bit allocation */ +} ba_t; + +typedef struct { + uint8_t exp[256]; /* decoded channel exponents */ + int8_t bap[256]; /* derived channel bit allocation */ +} expbap_t; + +typedef struct { + sample_t real; + sample_t imag; +} complex_t; + +typedef struct { + uint8_t fscod; /* sample rate */ + uint8_t halfrate; /* halfrate factor */ + uint8_t acmod; /* coded channels */ + uint8_t lfeon; /* coded lfe channel */ + sample_t clev; /* centre channel mix level */ + sample_t slev; /* surround channels mix level */ + + int output; /* type of output */ + sample_t level; /* output level */ + sample_t bias; /* output bias */ + + int dynrnge; /* apply dynamic range */ + sample_t dynrng; /* dynamic range */ + void * dynrngdata; /* dynamic range callback funtion and data */ + sample_t (* dynrngcall) (sample_t range, void * dynrngdata); + + uint8_t chincpl; /* channel coupled */ + uint8_t phsflginu; /* phase flags in use (stereo only) */ + uint8_t cplstrtmant; /* coupling channel start mantissa */ + uint8_t cplendmant; /* coupling channel end mantissa */ + uint32_t cplbndstrc; /* coupling band structure */ + sample_t cplco[5][18]; /* coupling coordinates */ + + /* derived information */ + uint8_t cplstrtbnd; /* coupling start band (for bit allocation) */ + uint8_t ncplbnd; /* number of coupling bands */ + + uint8_t rematflg; /* stereo rematrixing */ + + uint8_t endmant[5]; /* channel end mantissa */ + + uint16_t bai; /* bit allocation information */ + + uint32_t * buffer_start; + uint16_t lfsr_state; /* dither state */ + uint32_t bits_left; + uint32_t current_word; + + uint8_t csnroffst; /* coarse SNR offset */ + ba_t cplba; /* coupling bit allocation parameters */ + ba_t ba[5]; /* channel bit allocation parameters */ + ba_t lfeba; /* lfe bit allocation parameters */ + + uint8_t cplfleak; /* coupling fast leak init */ + uint8_t cplsleak; /* coupling slow leak init */ + + expbap_t cpl_expbap; + expbap_t fbw_expbap[5]; + expbap_t lfe_expbap; + + sample_t * samples; + int downmixed; + + /* Root values for IFFT */ + sample_t * roots16; // size 3 + sample_t * roots32; // size 7 + sample_t * roots64; // size 15 + sample_t * roots128; // size 31 + + /* Twiddle factors for IMDCT */ + complex_t * pre1; // size 128 + complex_t * post1; // size 64 + complex_t * pre2; // size 64 + complex_t * post2; // size 32 + + sample_t * a52_imdct_window; // size 256 +} a52_state_t; #define A52_CHANNEL 0 #define A52_MONO 1 --- a52dec-0.7.4.old/liba52/Makefile.am +++ a52dec-0.7.4/liba52/Makefile.am @@ -1,9 +1,9 @@ -CFLAGS = @CFLAGS@ @LIBA52_CFLAGS@ +AM_CFLAGS = @CFLAGS@ @LIBA52_CFLAGS@ lib_LTLIBRARIES = liba52.la liba52_la_SOURCES = bitstream.c imdct.c bit_allocate.c parse.c downmix.c liba52_la_LIBADD = @LIBA52_LIBS@ -lm -liba52_la_LDFLAGS = -no-undefined +liba52_la_LDFLAGS = -no-undefined -release @VERSION@ EXTRA_DIST = configure.incl a52_internal.h bitstream.h tables.h --- a52dec-0.7.4.old/liba52/a52_internal.h +++ a52dec-0.7.4/liba52/a52_internal.h @@ -21,72 +21,6 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -typedef struct { - uint8_t bai; /* fine SNR offset, fast gain */ - uint8_t deltbae; /* delta bit allocation exists */ - int8_t deltba[50]; /* per-band delta bit allocation */ -} ba_t; - -typedef struct { - uint8_t exp[256]; /* decoded channel exponents */ - int8_t bap[256]; /* derived channel bit allocation */ -} expbap_t; - -struct a52_state_s { - uint8_t fscod; /* sample rate */ - uint8_t halfrate; /* halfrate factor */ - uint8_t acmod; /* coded channels */ - uint8_t lfeon; /* coded lfe channel */ - sample_t clev; /* centre channel mix level */ - sample_t slev; /* surround channels mix level */ - - int output; /* type of output */ - sample_t level; /* output level */ - sample_t bias; /* output bias */ - - int dynrnge; /* apply dynamic range */ - sample_t dynrng; /* dynamic range */ - void * dynrngdata; /* dynamic range callback funtion and data */ - sample_t (* dynrngcall) (sample_t range, void * dynrngdata); - - uint8_t chincpl; /* channel coupled */ - uint8_t phsflginu; /* phase flags in use (stereo only) */ - uint8_t cplstrtmant; /* coupling channel start mantissa */ - uint8_t cplendmant; /* coupling channel end mantissa */ - uint32_t cplbndstrc; /* coupling band structure */ - sample_t cplco[5][18]; /* coupling coordinates */ - - /* derived information */ - uint8_t cplstrtbnd; /* coupling start band (for bit allocation) */ - uint8_t ncplbnd; /* number of coupling bands */ - - uint8_t rematflg; /* stereo rematrixing */ - - uint8_t endmant[5]; /* channel end mantissa */ - - uint16_t bai; /* bit allocation information */ - - uint32_t * buffer_start; - uint16_t lfsr_state; /* dither state */ - uint32_t bits_left; - uint32_t current_word; - - uint8_t csnroffst; /* coarse SNR offset */ - ba_t cplba; /* coupling bit allocation parameters */ - ba_t ba[5]; /* channel bit allocation parameters */ - ba_t lfeba; /* lfe bit allocation parameters */ - - uint8_t cplfleak; /* coupling fast leak init */ - uint8_t cplsleak; /* coupling slow leak init */ - - expbap_t cpl_expbap; - expbap_t fbw_expbap[5]; - expbap_t lfe_expbap; - - sample_t * samples; - int downmixed; -}; - #define LEVEL_PLUS6DB 2.0 #define LEVEL_PLUS3DB 1.4142135623730951 #define LEVEL_3DB 0.7071067811865476 @@ -115,6 +49,6 @@ sample_t clev, sample_t slev); void a52_upmix (sample_t * samples, int acmod, int output); -void a52_imdct_init (uint32_t mm_accel); -void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias); -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias); +void a52_imdct_init (a52_state_t * state, uint32_t mm_accel); +void a52_imdct_256 (a52_state_t * state, sample_t * data, sample_t * delay, sample_t bias); +void a52_imdct_512 (a52_state_t * state, sample_t * data, sample_t * delay, sample_t bias); --- a52dec-0.7.4.old/liba52/configure.incl +++ a52dec-0.7.4/liba52/configure.incl @@ -1,9 +1,6 @@ AC_SUBST([LIBA52_CFLAGS]) AC_SUBST([LIBA52_LIBS]) -dnl avoid -fPIC when possible -LIBA52_CFLAGS="$LIBA52_CFLAGS -prefer-non-pic" - AC_ARG_ENABLE([double], [ --enable-double use double-precision samples]) if test x"$enable_double" = x"yes"; then --- a52dec-0.7.4.old/liba52/imdct.c +++ a52dec-0.7.4/liba52/imdct.c @@ -40,11 +40,6 @@ #include "a52_internal.h" #include "mm_accel.h" -typedef struct complex_s { - sample_t real; - sample_t imag; -} complex_t; - static uint8_t fftorder[] = { 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176, 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88, @@ -56,22 +51,8 @@ 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86 }; -/* Root values for IFFT */ -static sample_t roots16[3]; -static sample_t roots32[7]; -static sample_t roots64[15]; -static sample_t roots128[31]; - -/* Twiddle factors for IMDCT */ -static complex_t pre1[128]; -static complex_t post1[64]; -static complex_t pre2[64]; -static complex_t post2[32]; - -static sample_t a52_imdct_window[256]; - -static void (* ifft128) (complex_t * buf); -static void (* ifft64) (complex_t * buf); +static void (* ifft128) (a52_state_t * state, complex_t * buf); +static void (* ifft64) (a52_state_t * state, complex_t * buf); static inline void ifft2 (complex_t * buf) { @@ -167,7 +148,7 @@ a1.imag += tmp4; \ } while (0) -static inline void ifft8 (complex_t * buf) +static inline void ifft8 (a52_state_t * state, complex_t * buf) { double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; @@ -175,7 +156,7 @@ ifft2 (buf + 4); ifft2 (buf + 6); BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]); - BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], roots16[1]); + BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], state->roots16[1]); } static void ifft_pass (complex_t * buf, sample_t * weight, int n) @@ -205,66 +186,66 @@ } while (--i); } -static void ifft16 (complex_t * buf) +static void ifft16 (a52_state_t * state, complex_t * buf) { - ifft8 (buf); + ifft8 (state, buf); ifft4 (buf + 8); ifft4 (buf + 12); - ifft_pass (buf, roots16 - 4, 4); + ifft_pass (buf, state->roots16 - 4, 4); } -static void ifft32 (complex_t * buf) +static void ifft32 (a52_state_t * state, complex_t * buf) { - ifft16 (buf); - ifft8 (buf + 16); - ifft8 (buf + 24); - ifft_pass (buf, roots32 - 8, 8); + ifft16 (state, buf); + ifft8 (state, buf + 16); + ifft8 (state, buf + 24); + ifft_pass (buf, state->roots32 - 8, 8); } -static void ifft64_c (complex_t * buf) +static void ifft64_c (a52_state_t * state, complex_t * buf) { - ifft32 (buf); - ifft16 (buf + 32); - ifft16 (buf + 48); - ifft_pass (buf, roots64 - 16, 16); + ifft32 (state, buf); + ifft16 (state, buf + 32); + ifft16 (state, buf + 48); + ifft_pass (buf, state->roots64 - 16, 16); } -static void ifft128_c (complex_t * buf) +static void ifft128_c (a52_state_t * state, complex_t * buf) { - ifft32 (buf); - ifft16 (buf + 32); - ifft16 (buf + 48); - ifft_pass (buf, roots64 - 16, 16); + ifft32 (state, buf); + ifft16 (state, buf + 32); + ifft16 (state, buf + 48); + ifft_pass (buf, state->roots64 - 16, 16); - ifft32 (buf + 64); - ifft32 (buf + 96); - ifft_pass (buf, roots128 - 32, 32); + ifft32 (state, buf + 64); + ifft32 (state, buf + 96); + ifft_pass (buf, state->roots128 - 32, 32); } -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) +void a52_imdct_512 (a52_state_t * state, sample_t * data, sample_t * delay, sample_t bias) { int i, k; sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; - const sample_t * window = a52_imdct_window; + const sample_t * window = state->a52_imdct_window; complex_t buf[128]; for (i = 0; i < 128; i++) { k = fftorder[i]; - t_r = pre1[i].real; - t_i = pre1[i].imag; + t_r = state->pre1[i].real; + t_i = state->pre1[i].imag; buf[i].real = t_i * data[255-k] + t_r * data[k]; buf[i].imag = t_r * data[255-k] - t_i * data[k]; } - ifft128 (buf); + ifft128 (state, buf); /* Post IFFT complex multiply plus IFFT complex conjugate*/ /* Window and convert to real valued signal */ for (i = 0; i < 64; i++) { /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ - t_r = post1[i].real; - t_i = post1[i].imag; + t_r = state->post1[i].real; + t_i = state->post1[i].imag; a_r = t_r * buf[i].real + t_i * buf[i].imag; a_i = t_i * buf[i].real - t_r * buf[i].imag; @@ -285,18 +266,18 @@ } } -void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) +void a52_imdct_256(a52_state_t * state, sample_t * data, sample_t * delay, sample_t bias) { int i, k; sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2; - const sample_t * window = a52_imdct_window; + const sample_t * window = state->a52_imdct_window; complex_t buf1[64], buf2[64]; /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ for (i = 0; i < 64; i++) { k = fftorder[i]; - t_r = pre2[i].real; - t_i = pre2[i].imag; + t_r = state->pre2[i].real; + t_i = state->pre2[i].imag; buf1[i].real = t_i * data[254-k] + t_r * data[k]; buf1[i].imag = t_r * data[254-k] - t_i * data[k]; @@ -305,15 +286,15 @@ buf2[i].imag = t_r * data[255-k] - t_i * data[k+1]; } - ifft64 (buf1); - ifft64 (buf2); + ifft64 (state, buf1); + ifft64 (state, buf2); /* Post IFFT complex multiply */ /* Window and convert to real valued signal */ for (i = 0; i < 32; i++) { /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ - t_r = post2[i].real; - t_i = post2[i].imag; + t_r = state->post2[i].real; + t_i = state->post2[i].imag; a_r = t_r * buf1[i].real + t_i * buf1[i].imag; a_i = t_i * buf1[i].real - t_r * buf1[i].imag; @@ -362,7 +343,7 @@ return bessel; } -void a52_imdct_init (uint32_t mm_accel) +void a52_imdct_init (a52_state_t * state, uint32_t mm_accel) { int i, k; double sum; @@ -371,50 +352,50 @@ sum = 0; for (i = 0; i < 256; i++) { sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256)); - a52_imdct_window[i] = sum; + state->a52_imdct_window[i] = sum; } sum++; for (i = 0; i < 256; i++) - a52_imdct_window[i] = sqrt (a52_imdct_window[i] / sum); + state->a52_imdct_window[i] = sqrt (state->a52_imdct_window[i] / sum); for (i = 0; i < 3; i++) - roots16[i] = cos ((M_PI / 8) * (i + 1)); + state->roots16[i] = cos ((M_PI / 8) * (i + 1)); for (i = 0; i < 7; i++) - roots32[i] = cos ((M_PI / 16) * (i + 1)); + state->roots32[i] = cos ((M_PI / 16) * (i + 1)); for (i = 0; i < 15; i++) - roots64[i] = cos ((M_PI / 32) * (i + 1)); + state->roots64[i] = cos ((M_PI / 32) * (i + 1)); for (i = 0; i < 31; i++) - roots128[i] = cos ((M_PI / 64) * (i + 1)); + state->roots128[i] = cos ((M_PI / 64) * (i + 1)); for (i = 0; i < 64; i++) { k = fftorder[i] / 2 + 64; - pre1[i].real = cos ((M_PI / 256) * (k - 0.25)); - pre1[i].imag = sin ((M_PI / 256) * (k - 0.25)); + state->pre1[i].real = cos ((M_PI / 256) * (k - 0.25)); + state->pre1[i].imag = sin ((M_PI / 256) * (k - 0.25)); } for (i = 64; i < 128; i++) { k = fftorder[i] / 2 + 64; - pre1[i].real = -cos ((M_PI / 256) * (k - 0.25)); - pre1[i].imag = -sin ((M_PI / 256) * (k - 0.25)); + state->pre1[i].real = -cos ((M_PI / 256) * (k - 0.25)); + state->pre1[i].imag = -sin ((M_PI / 256) * (k - 0.25)); } for (i = 0; i < 64; i++) { - post1[i].real = cos ((M_PI / 256) * (i + 0.5)); - post1[i].imag = sin ((M_PI / 256) * (i + 0.5)); + state->post1[i].real = cos ((M_PI / 256) * (i + 0.5)); + state->post1[i].imag = sin ((M_PI / 256) * (i + 0.5)); } for (i = 0; i < 64; i++) { k = fftorder[i] / 4; - pre2[i].real = cos ((M_PI / 128) * (k - 0.25)); - pre2[i].imag = sin ((M_PI / 128) * (k - 0.25)); + state->pre2[i].real = cos ((M_PI / 128) * (k - 0.25)); + state->pre2[i].imag = sin ((M_PI / 128) * (k - 0.25)); } for (i = 0; i < 32; i++) { - post2[i].real = cos ((M_PI / 128) * (i + 0.5)); - post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); + state->post2[i].real = cos ((M_PI / 128) * (i + 0.5)); + state->post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); } #ifdef LIBA52_DJBFFT @@ -425,7 +406,7 @@ } else #endif { - fprintf (stderr, "No accelerated IMDCT transform found\n"); + // fprintf (stderr, "No accelerated IMDCT transform found\n"); ifft128 = ifft128_c; ifft64 = ifft64_c; } --- a52dec-0.7.4.old/liba52/parse.c +++ a52dec-0.7.4/liba52/parse.c @@ -56,16 +56,53 @@ a52_state_t * state; int i; - state = malloc (sizeof (a52_state_t)); + state = calloc (1, sizeof (a52_state_t)); if (state == NULL) return NULL; state->samples = memalign (16, 256 * 12 * sizeof (sample_t)); if (state->samples == NULL) { - free (state); - return NULL; + goto fail; } + /* Root values for IFFT */ + state->roots16 = memalign (16, 3 * sizeof (sample_t)); + if (state->roots16 == NULL) + goto fail; + + state->roots32 = memalign (16, 7 * sizeof (sample_t)); + if (state->roots32 == NULL) + goto fail; + + state->roots64 = memalign (16, 15 * sizeof (sample_t)); + if (state->roots64 == NULL) + goto fail; + + state->roots128 = memalign (16, 31 * sizeof (sample_t)); + if (state->roots128 == NULL) + goto fail; + + /* Twiddle factors for IMDCT */ + state->pre1 = memalign (16, 128 * sizeof (complex_t)); + if (state->pre1 == NULL) + goto fail; + + state->post1 = memalign (16, 64 * sizeof (complex_t)); + if (state->post1 == NULL) + goto fail; + + state->pre2 = memalign (16, 64 * sizeof (complex_t)); + if (state->pre2 == NULL) + goto fail; + + state->post2 = memalign (16, 32 * sizeof (complex_t)); + if (state->post2 == NULL) + goto fail; + + state->a52_imdct_window = memalign (16, 256 * sizeof (sample_t)); + if (state->a52_imdct_window == NULL) + goto fail; + for (i = 0; i < 256 * 12; i++) state->samples[i] = 0; @@ -73,9 +110,27 @@ state->lfsr_state = 1; - a52_imdct_init (mm_accel); + a52_imdct_init (state, mm_accel); return state; + +fail: + if ( state ) + { + free (state->a52_imdct_window); + free (state->post2); + free (state->pre2); + free (state->post1); + free (state->pre1); + free (state->roots128); + free (state->roots64); + free (state->roots32); + free (state->roots16); + free (state->samples); + free (state); + } + return NULL; + } sample_t * a52_samples (a52_state_t * state) @@ -825,7 +880,7 @@ state->dynrng, 0, 7); for (i = 7; i < 256; i++) (samples-256)[i] = 0; - a52_imdct_512 (samples - 256, samples + 1536 - 256, state->bias); + a52_imdct_512 (state, samples - 256, samples + 1536 - 256, state->bias); } else { /* just skip the LFE coefficients */ coeff_get (state, samples + 1280, &state->lfe_expbap, &quantizer, @@ -854,10 +909,10 @@ if (coeff[i]) { if (blksw[i]) - a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i, + a52_imdct_256 (state, samples + 256 * i, samples + 1536 + 256 * i, bias); else - a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i, + a52_imdct_512 (state, samples + 256 * i, samples + 1536 + 256 * i, bias); } else { int j; @@ -883,11 +938,11 @@ if (blksw[0]) for (i = 0; i < nfchans; i++) - a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i, + a52_imdct_256 (state, samples + 256 * i, samples + 1536 + 256 * i, state->bias); else for (i = 0; i < nfchans; i++) - a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i, + a52_imdct_512 (state, samples + 256 * i, samples + 1536 + 256 * i, state->bias); } @@ -896,6 +951,15 @@ void a52_free (a52_state_t * state) { + free (state->a52_imdct_window); + free (state->post2); + free (state->pre2); + free (state->post1); + free (state->pre1); + free (state->roots128); + free (state->roots64); + free (state->roots32); + free (state->roots16); free (state->samples); free (state); } --- a52dec-0.7.4.old/src/Makefile.am +++ a52dec-0.7.4/src/Makefile.am @@ -1,9 +1,9 @@ -CFLAGS = @A52DEC_CFLAGS@ +AM_CFLAGS = @A52DEC_CFLAGS@ bin_PROGRAMS = a52dec extract_a52 a52dec_SOURCES = a52dec.c getopt.c gettimeofday.c a52dec_LDADD = $(top_builddir)/liba52/liba52.la \ - $(top_builddir)/libao/libao.a @LIBAO_LIBS@ + $(top_builddir)/libao/libao.a @LIBAO_LIBS@ -lm extract_a52_SOURCES = extract_a52.c getopt.c man_MANS = a52dec.1 extract_a52.1