The reason is this is easier for PIC code (in particular on darwin...). Keep the old names as pointers (static in cabac_functions.h so gcc knows these are just immediate offsets) so the c code can nicely stay the same (alternatively could use offsets directly in the functions needing the tables). This should produce the same code as before with non-pic and better code (confirmed) with pic. The assembly uses the new table but still won't work for PIC case. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>tags/n0.11
| @@ -31,6 +31,29 @@ | |||
| #include "cabac.h" | |||
| #include "cabac_functions.h" | |||
| uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63] = { | |||
| 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5, | |||
| 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, | |||
| 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, | |||
| 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, | |||
| 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |||
| 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |||
| 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |||
| 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |||
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |||
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |||
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |||
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |||
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
| }; | |||
| static const uint8_t lps_range[64][4]= { | |||
| {128,176,208,240}, {128,167,197,227}, {128,158,187,216}, {123,150,178,205}, | |||
| {116,142,169,195}, {111,135,160,185}, {105,128,152,175}, {100,122,144,166}, | |||
| @@ -50,8 +73,6 @@ static const uint8_t lps_range[64][4]= { | |||
| { 6, 8, 9, 11}, { 6, 7, 9, 10}, { 6, 7, 8, 9}, { 2, 2, 2, 2}, | |||
| }; | |||
| uint8_t ff_h264_mlps_state[4*64]; | |||
| uint8_t ff_h264_lps_range[4*2*64]; | |||
| static uint8_t h264_lps_state[2*64]; | |||
| static uint8_t h264_mps_state[2*64]; | |||
| @@ -77,27 +98,11 @@ static const uint8_t lps_state[64]= { | |||
| 36,36,37,37,37,38,38,63, | |||
| }; | |||
| const uint8_t ff_h264_norm_shift[512]= { | |||
| 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5, | |||
| 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, | |||
| 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, | |||
| 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, | |||
| 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |||
| 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |||
| 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |||
| 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |||
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |||
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |||
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |||
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |||
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
| static const uint8_t last_coeff_flag_offset_8x8[63] = { | |||
| 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |||
| 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |||
| 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | |||
| 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 | |||
| }; | |||
| /** | |||
| @@ -158,6 +163,9 @@ void ff_init_cabac_states(CABACContext *c){ | |||
| ff_h264_mlps_state[128-2*i-2]= 0; | |||
| } | |||
| } | |||
| for(i=0; i< 63; i++){ | |||
| ff_h264_last_coeff_flag_offset_8x8[i] = last_coeff_flag_offset_8x8[i]; | |||
| } | |||
| } | |||
| #ifdef TEST | |||
| @@ -31,6 +31,11 @@ | |||
| #include "put_bits.h" | |||
| #define H264_NORM_SHIFT_OFFSET 0 | |||
| #define H264_LPS_RANGE_OFFSET 512 | |||
| #define H264_MLPS_STATE_OFFSET 1024 | |||
| #define H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET 1280 | |||
| #define CABAC_BITS 16 | |||
| #define CABAC_MASK ((1<<CABAC_BITS)-1) | |||
| @@ -36,9 +36,11 @@ | |||
| # include "x86/cabac.h" | |||
| #endif | |||
| extern const uint8_t ff_h264_norm_shift[512]; | |||
| extern uint8_t ff_h264_mlps_state[4*64]; | |||
| extern uint8_t ff_h264_lps_range[4*2*64]; ///< rangeTabLPS | |||
| extern uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63]; | |||
| static uint8_t * const ff_h264_norm_shift = ff_h264_cabac_tables + H264_NORM_SHIFT_OFFSET; | |||
| static uint8_t * const ff_h264_lps_range = ff_h264_cabac_tables + H264_LPS_RANGE_OFFSET; | |||
| static uint8_t * const ff_h264_mlps_state = ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET; | |||
| static uint8_t * const ff_h264_last_coeff_flag_offset_8x8 = ff_h264_cabac_tables + H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET; | |||
| static void refill(CABACContext *c){ | |||
| #if CABAC_BITS == 16 | |||
| @@ -1561,13 +1561,6 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, | |||
| return base_ctx[cat] + ctx; | |||
| } | |||
| DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = { | |||
| 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |||
| 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |||
| 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | |||
| 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 | |||
| }; | |||
| static av_always_inline void | |||
| decode_cabac_residual_internal(H264Context *h, DCTELEM *block, | |||
| int cat, int n, const uint8_t *scantable, | |||
| @@ -1670,7 +1663,7 @@ decode_cabac_residual_internal(H264Context *h, DCTELEM *block, | |||
| last_coeff_ctx_base-significant_coeff_ctx_base); | |||
| } | |||
| #else | |||
| DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); | |||
| DECODE_SIGNIFICANCE( 63, sig_off[last], ff_h264_last_coeff_flag_offset_8x8[last] ); | |||
| } else { | |||
| if (is_dc && chroma422) { // dc 422 | |||
| DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); | |||
| @@ -24,6 +24,7 @@ | |||
| #include "libavcodec/cabac.h" | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/x86_cpu.h" | |||
| #include "libavutil/internal.h" | |||
| #include "config.h" | |||
| #if HAVE_FAST_CMOV | |||
| @@ -51,16 +52,16 @@ | |||
| "xor "tmp" , "ret" \n\t" | |||
| #endif /* HAVE_FAST_CMOV */ | |||
| #define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte, end) \ | |||
| #define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off) \ | |||
| "movzbl "statep" , "ret" \n\t"\ | |||
| "mov "range" , "tmp" \n\t"\ | |||
| "and $0xC0 , "range" \n\t"\ | |||
| "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\ | |||
| "movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\ | |||
| "sub "range" , "tmp" \n\t"\ | |||
| BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp) \ | |||
| "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\ | |||
| "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx \n\t"\ | |||
| "shl %%cl , "range" \n\t"\ | |||
| "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\ | |||
| "movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp" \n\t"\ | |||
| "shl %%cl , "low" \n\t"\ | |||
| "mov "tmpbyte" , "statep" \n\t"\ | |||
| "test "lowword" , "lowword" \n\t"\ | |||
| @@ -73,7 +74,7 @@ | |||
| "shr $15 , %%ecx \n\t"\ | |||
| "bswap "tmp" \n\t"\ | |||
| "shr $15 , "tmp" \n\t"\ | |||
| "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\ | |||
| "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\ | |||
| "sub $0xFFFF , "tmp" \n\t"\ | |||
| "neg %%ecx \n\t"\ | |||
| "add $7 , %%ecx \n\t"\ | |||
| @@ -93,11 +94,14 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c, | |||
| __asm__ volatile( | |||
| BRANCHLESS_GET_CABAC("%0", "(%4)", "%1", "%w1", | |||
| "%2", "%3", "%b3", | |||
| "%a6(%5)", "%a7(%5)") | |||
| "%a6(%5)", "%a7(%5)", "%a8", "%a9", "%a10") | |||
| : "=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp) | |||
| : "r"(state), "r"(c), | |||
| "i"(offsetof(CABACContext, bytestream)), | |||
| "i"(offsetof(CABACContext, bytestream_end)) | |||
| "i"(offsetof(CABACContext, bytestream_end)), | |||
| "i"(H264_NORM_SHIFT_OFFSET), | |||
| "i"(H264_LPS_RANGE_OFFSET), | |||
| "i"(H264_MLPS_STATE_OFFSET) | |||
| : "%"REG_c, "memory" | |||
| ); | |||
| return bit & 1; | |||
| @@ -45,12 +45,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | |||
| int minusindex= 4-(intptr_t)index; | |||
| int bit; | |||
| x86_reg coeff_count; | |||
| __asm__ volatile( | |||
| "3: \n\t" | |||
| BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3", | |||
| "%5", "%k0", "%b0", | |||
| "%a11(%6)", "%a12(%6)") | |||
| "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15") | |||
| "test $1, %4 \n\t" | |||
| " jz 4f \n\t" | |||
| @@ -58,7 +59,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | |||
| BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3", | |||
| "%5", "%k0", "%b0", | |||
| "%a11(%6)", "%a12(%6)") | |||
| "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15") | |||
| "sub %10, %1 \n\t" | |||
| "mov %2, %0 \n\t" | |||
| @@ -86,7 +87,10 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | |||
| "+&r"(c->low), "=&r"(bit), "+&r"(c->range) | |||
| : "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off), | |||
| "i"(offsetof(CABACContext, bytestream)), | |||
| "i"(offsetof(CABACContext, bytestream_end)) | |||
| "i"(offsetof(CABACContext, bytestream_end)), | |||
| "i"(H264_NORM_SHIFT_OFFSET), | |||
| "i"(H264_LPS_RANGE_OFFSET), | |||
| "i"(H264_MLPS_STATE_OFFSET) | |||
| : "%"REG_c, "memory" | |||
| ); | |||
| return coeff_count; | |||
| @@ -100,6 +104,7 @@ static int decode_significance_8x8_x86(CABACContext *c, | |||
| x86_reg coeff_count; | |||
| x86_reg last=0; | |||
| x86_reg state; | |||
| __asm__ volatile( | |||
| "mov %1, %6 \n\t" | |||
| "3: \n\t" | |||
| @@ -110,18 +115,19 @@ static int decode_significance_8x8_x86(CABACContext *c, | |||
| BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3", | |||
| "%5", "%k0", "%b0", | |||
| "%a12(%7)", "%a13(%7)") | |||
| "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16") | |||
| "mov %1, %k6 \n\t" | |||
| "test $1, %4 \n\t" | |||
| " jz 4f \n\t" | |||
| "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t" | |||
| "movzbl "MANGLE(ff_h264_cabac_tables)"+%a17(%k6), %k6\n\t" | |||
| "add %11, %6 \n\t" | |||
| BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3", | |||
| "%5", "%k0", "%b0", | |||
| "%a12(%7)", "%a13(%7)") | |||
| "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16") | |||
| "mov %2, %0 \n\t" | |||
| "mov %1, %k6 \n\t" | |||
| @@ -147,7 +153,11 @@ static int decode_significance_8x8_x86(CABACContext *c, | |||
| : "r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), | |||
| "m"(sig_off), "m"(last_coeff_ctx_base), | |||
| "i"(offsetof(CABACContext, bytestream)), | |||
| "i"(offsetof(CABACContext, bytestream_end)) | |||
| "i"(offsetof(CABACContext, bytestream_end)), | |||
| "i"(H264_NORM_SHIFT_OFFSET), | |||
| "i"(H264_LPS_RANGE_OFFSET), | |||
| "i"(H264_MLPS_STATE_OFFSET), | |||
| "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) | |||
| : "%"REG_c, "memory" | |||
| ); | |||
| return coeff_count; | |||