This is a hand-tuned version of the code with impossible parts of the FASTDIV function ommitted. 2-5% faster overall on Cortex-A8. Signed-off-by: Mans Rullgard <mans@mansr.com>tags/n0.9
| @@ -23,6 +23,61 @@ | |||
| #include <stdint.h> | |||
| #include "config.h" | |||
| #include "libavutil/intmath.h" | |||
| #if HAVE_ARMV6 && HAVE_INLINE_ASM | |||
| #define decode_blockcodes decode_blockcodes | |||
| static inline int decode_blockcodes(int code1, int code2, int levels, | |||
| int *values) | |||
| { | |||
| int v0, v1, v2, v3, v4, v5; | |||
| __asm__ ("smmul %8, %14, %18 \n" | |||
| "smmul %11, %15, %18 \n" | |||
| "smlabb %14, %8, %17, %14 \n" | |||
| "smlabb %15, %11, %17, %15 \n" | |||
| "smmul %9, %8, %18 \n" | |||
| "smmul %12, %11, %18 \n" | |||
| "sub %14, %14, %16, lsr #1 \n" | |||
| "sub %15, %15, %16, lsr #1 \n" | |||
| "smlabb %8, %9, %17, %8 \n" | |||
| "smlabb %11, %12, %17, %11 \n" | |||
| "smmul %10, %9, %18 \n" | |||
| "smmul %13, %12, %18 \n" | |||
| "str %14, %0 \n" | |||
| "str %15, %4 \n" | |||
| "sub %8, %8, %16, lsr #1 \n" | |||
| "sub %11, %11, %16, lsr #1 \n" | |||
| "smlabb %9, %10, %17, %9 \n" | |||
| "smlabb %12, %13, %17, %12 \n" | |||
| "smmul %14, %10, %18 \n" | |||
| "smmul %15, %13, %18 \n" | |||
| "str %8, %1 \n" | |||
| "str %11, %5 \n" | |||
| "sub %9, %9, %16, lsr #1 \n" | |||
| "sub %12, %12, %16, lsr #1 \n" | |||
| "smlabb %10, %14, %17, %10 \n" | |||
| "smlabb %13, %15, %17, %13 \n" | |||
| "str %9, %2 \n" | |||
| "str %12, %6 \n" | |||
| "sub %10, %10, %16, lsr #1 \n" | |||
| "sub %13, %13, %16, lsr #1 \n" | |||
| "str %10, %3 \n" | |||
| "str %13, %7 \n" | |||
| : "=m"(values[0]), "=m"(values[1]), | |||
| "=m"(values[2]), "=m"(values[3]), | |||
| "=m"(values[4]), "=m"(values[5]), | |||
| "=m"(values[6]), "=m"(values[7]), | |||
| "=&r"(v0), "=&r"(v1), "=&r"(v2), | |||
| "=&r"(v3), "=&r"(v4), "=&r"(v5), | |||
| "+&r"(code1), "+&r"(code2) | |||
| : "r"(levels - 1), "r"(-levels), "r"(ff_inverse[levels])); | |||
| return code1 | code2; | |||
| } | |||
| #endif | |||
| #if HAVE_NEON && HAVE_INLINE_ASM && HAVE_ASM_MOD_Y | |||
| @@ -1038,6 +1038,7 @@ static void dca_downmix(float *samples, int srcfmt, | |||
| } | |||
| #ifndef decode_blockcodes | |||
| /* Very compact version of the block code decoder that does not use table | |||
| * look-up but is slightly slower */ | |||
| static int decode_blockcode(int code, int levels, int *values) | |||
| @@ -1051,13 +1052,15 @@ static int decode_blockcode(int code, int levels, int *values) | |||
| code = div; | |||
| } | |||
| if (code == 0) | |||
| return 0; | |||
| else { | |||
| av_log(NULL, AV_LOG_ERROR, "ERROR: block code look-up failed\n"); | |||
| return AVERROR_INVALIDDATA; | |||
| } | |||
| return code; | |||
| } | |||
| static int decode_blockcodes(int code1, int code2, int levels, int *values) | |||
| { | |||
| return decode_blockcode(code1, levels, values) | | |||
| decode_blockcode(code2, levels, values + 4); | |||
| } | |||
| #endif | |||
| static const uint8_t abits_sizes[7] = { 7, 10, 12, 13, 15, 17, 19 }; | |||
| static const uint8_t abits_levels[7] = { 3, 5, 7, 9, 13, 17, 25 }; | |||
| @@ -1125,16 +1128,20 @@ static int dca_subsubframe(DCAContext * s, int base_channel, int block_index) | |||
| if (abits >= 11 || !dca_smpl_bitalloc[abits].vlc[sel].table){ | |||
| if (abits <= 7){ | |||
| /* Block code */ | |||
| int block_code1, block_code2, size, levels; | |||
| int block_code1, block_code2, size, levels, err; | |||
| size = abits_sizes[abits-1]; | |||
| levels = abits_levels[abits-1]; | |||
| block_code1 = get_bits(&s->gb, size); | |||
| /* FIXME Should test return value */ | |||
| decode_blockcode(block_code1, levels, block); | |||
| block_code2 = get_bits(&s->gb, size); | |||
| decode_blockcode(block_code2, levels, &block[4]); | |||
| err = decode_blockcodes(block_code1, block_code2, | |||
| levels, block); | |||
| if (err) { | |||
| av_log(s->avctx, AV_LOG_ERROR, | |||
| "ERROR: block code look-up failed\n"); | |||
| return AVERROR_INVALIDDATA; | |||
| } | |||
| }else{ | |||
| /* no coding */ | |||
| for (m = 0; m < 8; m++) | |||