The reason is this is easier for PIC code (in particular on darwin...). Keep the old names as pointers (static in cabac_functions.h so gcc knows these are just immediate offsets) so the c code can nicely stay the same (alternatively could use offsets directly in the functions needing the tables). This should produce the same code as before with non-pic and better code (confirmed) with pic. The assembly uses the new table but still won't work for PIC case. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>tags/n0.11
@@ -31,6 +31,29 @@ | |||
#include "cabac.h" | |||
#include "cabac_functions.h" | |||
uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63] = { | |||
9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5, | |||
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, | |||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, | |||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, | |||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
}; | |||
static const uint8_t lps_range[64][4]= { | |||
{128,176,208,240}, {128,167,197,227}, {128,158,187,216}, {123,150,178,205}, | |||
{116,142,169,195}, {111,135,160,185}, {105,128,152,175}, {100,122,144,166}, | |||
@@ -50,8 +73,6 @@ static const uint8_t lps_range[64][4]= { | |||
{ 6, 8, 9, 11}, { 6, 7, 9, 10}, { 6, 7, 8, 9}, { 2, 2, 2, 2}, | |||
}; | |||
uint8_t ff_h264_mlps_state[4*64]; | |||
uint8_t ff_h264_lps_range[4*2*64]; | |||
static uint8_t h264_lps_state[2*64]; | |||
static uint8_t h264_mps_state[2*64]; | |||
@@ -77,27 +98,11 @@ static const uint8_t lps_state[64]= { | |||
36,36,37,37,37,38,38,63, | |||
}; | |||
const uint8_t ff_h264_norm_shift[512]= { | |||
9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5, | |||
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, | |||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, | |||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, | |||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |||
static const uint8_t last_coeff_flag_offset_8x8[63] = { | |||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |||
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | |||
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 | |||
}; | |||
/** | |||
@@ -158,6 +163,9 @@ void ff_init_cabac_states(CABACContext *c){ | |||
ff_h264_mlps_state[128-2*i-2]= 0; | |||
} | |||
} | |||
for(i=0; i< 63; i++){ | |||
ff_h264_last_coeff_flag_offset_8x8[i] = last_coeff_flag_offset_8x8[i]; | |||
} | |||
} | |||
#ifdef TEST | |||
@@ -31,6 +31,11 @@ | |||
#include "put_bits.h" | |||
#define H264_NORM_SHIFT_OFFSET 0 | |||
#define H264_LPS_RANGE_OFFSET 512 | |||
#define H264_MLPS_STATE_OFFSET 1024 | |||
#define H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET 1280 | |||
#define CABAC_BITS 16 | |||
#define CABAC_MASK ((1<<CABAC_BITS)-1) | |||
@@ -36,9 +36,11 @@ | |||
# include "x86/cabac.h" | |||
#endif | |||
extern const uint8_t ff_h264_norm_shift[512]; | |||
extern uint8_t ff_h264_mlps_state[4*64]; | |||
extern uint8_t ff_h264_lps_range[4*2*64]; ///< rangeTabLPS | |||
extern uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63]; | |||
static uint8_t * const ff_h264_norm_shift = ff_h264_cabac_tables + H264_NORM_SHIFT_OFFSET; | |||
static uint8_t * const ff_h264_lps_range = ff_h264_cabac_tables + H264_LPS_RANGE_OFFSET; | |||
static uint8_t * const ff_h264_mlps_state = ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET; | |||
static uint8_t * const ff_h264_last_coeff_flag_offset_8x8 = ff_h264_cabac_tables + H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET; | |||
static void refill(CABACContext *c){ | |||
#if CABAC_BITS == 16 | |||
@@ -1561,13 +1561,6 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, | |||
return base_ctx[cat] + ctx; | |||
} | |||
DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = { | |||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |||
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | |||
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 | |||
}; | |||
static av_always_inline void | |||
decode_cabac_residual_internal(H264Context *h, DCTELEM *block, | |||
int cat, int n, const uint8_t *scantable, | |||
@@ -1670,7 +1663,7 @@ decode_cabac_residual_internal(H264Context *h, DCTELEM *block, | |||
last_coeff_ctx_base-significant_coeff_ctx_base); | |||
} | |||
#else | |||
DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); | |||
DECODE_SIGNIFICANCE( 63, sig_off[last], ff_h264_last_coeff_flag_offset_8x8[last] ); | |||
} else { | |||
if (is_dc && chroma422) { // dc 422 | |||
DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); | |||
@@ -24,6 +24,7 @@ | |||
#include "libavcodec/cabac.h" | |||
#include "libavutil/attributes.h" | |||
#include "libavutil/x86_cpu.h" | |||
#include "libavutil/internal.h" | |||
#include "config.h" | |||
#if HAVE_FAST_CMOV | |||
@@ -51,16 +52,16 @@ | |||
"xor "tmp" , "ret" \n\t" | |||
#endif /* HAVE_FAST_CMOV */ | |||
#define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte, end) \ | |||
#define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off) \ | |||
"movzbl "statep" , "ret" \n\t"\ | |||
"mov "range" , "tmp" \n\t"\ | |||
"and $0xC0 , "range" \n\t"\ | |||
"movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\ | |||
"movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\ | |||
"sub "range" , "tmp" \n\t"\ | |||
BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp) \ | |||
"movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\ | |||
"movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx \n\t"\ | |||
"shl %%cl , "range" \n\t"\ | |||
"movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\ | |||
"movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp" \n\t"\ | |||
"shl %%cl , "low" \n\t"\ | |||
"mov "tmpbyte" , "statep" \n\t"\ | |||
"test "lowword" , "lowword" \n\t"\ | |||
@@ -73,7 +74,7 @@ | |||
"shr $15 , %%ecx \n\t"\ | |||
"bswap "tmp" \n\t"\ | |||
"shr $15 , "tmp" \n\t"\ | |||
"movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\ | |||
"movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\ | |||
"sub $0xFFFF , "tmp" \n\t"\ | |||
"neg %%ecx \n\t"\ | |||
"add $7 , %%ecx \n\t"\ | |||
@@ -93,11 +94,14 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c, | |||
__asm__ volatile( | |||
BRANCHLESS_GET_CABAC("%0", "(%4)", "%1", "%w1", | |||
"%2", "%3", "%b3", | |||
"%a6(%5)", "%a7(%5)") | |||
"%a6(%5)", "%a7(%5)", "%a8", "%a9", "%a10") | |||
: "=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp) | |||
: "r"(state), "r"(c), | |||
"i"(offsetof(CABACContext, bytestream)), | |||
"i"(offsetof(CABACContext, bytestream_end)) | |||
"i"(offsetof(CABACContext, bytestream_end)), | |||
"i"(H264_NORM_SHIFT_OFFSET), | |||
"i"(H264_LPS_RANGE_OFFSET), | |||
"i"(H264_MLPS_STATE_OFFSET) | |||
: "%"REG_c, "memory" | |||
); | |||
return bit & 1; | |||
@@ -45,12 +45,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | |||
int minusindex= 4-(intptr_t)index; | |||
int bit; | |||
x86_reg coeff_count; | |||
__asm__ volatile( | |||
"3: \n\t" | |||
BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3", | |||
"%5", "%k0", "%b0", | |||
"%a11(%6)", "%a12(%6)") | |||
"%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15") | |||
"test $1, %4 \n\t" | |||
" jz 4f \n\t" | |||
@@ -58,7 +59,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | |||
BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3", | |||
"%5", "%k0", "%b0", | |||
"%a11(%6)", "%a12(%6)") | |||
"%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15") | |||
"sub %10, %1 \n\t" | |||
"mov %2, %0 \n\t" | |||
@@ -86,7 +87,10 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | |||
"+&r"(c->low), "=&r"(bit), "+&r"(c->range) | |||
: "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off), | |||
"i"(offsetof(CABACContext, bytestream)), | |||
"i"(offsetof(CABACContext, bytestream_end)) | |||
"i"(offsetof(CABACContext, bytestream_end)), | |||
"i"(H264_NORM_SHIFT_OFFSET), | |||
"i"(H264_LPS_RANGE_OFFSET), | |||
"i"(H264_MLPS_STATE_OFFSET) | |||
: "%"REG_c, "memory" | |||
); | |||
return coeff_count; | |||
@@ -100,6 +104,7 @@ static int decode_significance_8x8_x86(CABACContext *c, | |||
x86_reg coeff_count; | |||
x86_reg last=0; | |||
x86_reg state; | |||
__asm__ volatile( | |||
"mov %1, %6 \n\t" | |||
"3: \n\t" | |||
@@ -110,18 +115,19 @@ static int decode_significance_8x8_x86(CABACContext *c, | |||
BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3", | |||
"%5", "%k0", "%b0", | |||
"%a12(%7)", "%a13(%7)") | |||
"%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16") | |||
"mov %1, %k6 \n\t" | |||
"test $1, %4 \n\t" | |||
" jz 4f \n\t" | |||
"movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t" | |||
"movzbl "MANGLE(ff_h264_cabac_tables)"+%a17(%k6), %k6\n\t" | |||
"add %11, %6 \n\t" | |||
BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3", | |||
"%5", "%k0", "%b0", | |||
"%a12(%7)", "%a13(%7)") | |||
"%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16") | |||
"mov %2, %0 \n\t" | |||
"mov %1, %k6 \n\t" | |||
@@ -147,7 +153,11 @@ static int decode_significance_8x8_x86(CABACContext *c, | |||
: "r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), | |||
"m"(sig_off), "m"(last_coeff_ctx_base), | |||
"i"(offsetof(CABACContext, bytestream)), | |||
"i"(offsetof(CABACContext, bytestream_end)) | |||
"i"(offsetof(CABACContext, bytestream_end)), | |||
"i"(H264_NORM_SHIFT_OFFSET), | |||
"i"(H264_LPS_RANGE_OFFSET), | |||
"i"(H264_MLPS_STATE_OFFSET), | |||
"i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) | |||
: "%"REG_c, "memory" | |||
); | |||
return coeff_count; | |||