| @@ -1649,7 +1649,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT | |||||
| const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD]; | const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD]; | ||||
| #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) | #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) | ||||
| coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, | coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, | ||||
| last_coeff_ctx_base-significant_coeff_ctx_base, sig_off); | |||||
| last_coeff_ctx_base, sig_off); | |||||
| } else { | } else { | ||||
| coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index, | coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index, | ||||
| last_coeff_ctx_base-significant_coeff_ctx_base); | last_coeff_ctx_base-significant_coeff_ctx_base); | ||||
| @@ -34,8 +34,8 @@ | |||||
| "cmova %%ecx , "range" \n\t"\ | "cmova %%ecx , "range" \n\t"\ | ||||
| "sbb %%ecx , %%ecx \n\t"\ | "sbb %%ecx , %%ecx \n\t"\ | ||||
| "and %%ecx , "tmp" \n\t"\ | "and %%ecx , "tmp" \n\t"\ | ||||
| "sub "tmp" , "low" \n\t"\ | |||||
| "xor %%ecx , "ret" \n\t" | |||||
| "xor %%ecx , "ret" \n\t"\ | |||||
| "sub "tmp" , "low" \n\t" | |||||
| #else /* HAVE_FAST_CMOV */ | #else /* HAVE_FAST_CMOV */ | ||||
| #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\ | #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\ | ||||
| "mov "tmp" , %%ecx \n\t"\ | "mov "tmp" , %%ecx \n\t"\ | ||||
| @@ -62,21 +62,20 @@ | |||||
| "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\ | "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\ | ||||
| "shl %%cl , "range" \n\t"\ | "shl %%cl , "range" \n\t"\ | ||||
| "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\ | "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\ | ||||
| "mov "tmpbyte" , "statep" \n\t"\ | |||||
| "shl %%cl , "low" \n\t"\ | "shl %%cl , "low" \n\t"\ | ||||
| "mov "tmpbyte" , "statep" \n\t"\ | |||||
| "test "lowword" , "lowword" \n\t"\ | "test "lowword" , "lowword" \n\t"\ | ||||
| " jnz 1f \n\t"\ | " jnz 1f \n\t"\ | ||||
| "mov "byte"("cabac"), %%"REG_c" \n\t"\ | "mov "byte"("cabac"), %%"REG_c" \n\t"\ | ||||
| "add $2 , "byte "("cabac") \n\t"\ | |||||
| "movzwl (%%"REG_c") , "tmp" \n\t"\ | "movzwl (%%"REG_c") , "tmp" \n\t"\ | ||||
| "bswap "tmp" \n\t"\ | |||||
| "shr $15 , "tmp" \n\t"\ | |||||
| "sub $0xFFFF , "tmp" \n\t"\ | |||||
| "add $2 , %%"REG_c" \n\t"\ | |||||
| "mov %%"REG_c" , "byte "("cabac") \n\t"\ | |||||
| "lea -1("low") , %%ecx \n\t"\ | "lea -1("low") , %%ecx \n\t"\ | ||||
| "xor "low" , %%ecx \n\t"\ | "xor "low" , %%ecx \n\t"\ | ||||
| "shr $15 , %%ecx \n\t"\ | "shr $15 , %%ecx \n\t"\ | ||||
| "bswap "tmp" \n\t"\ | |||||
| "shr $15 , "tmp" \n\t"\ | |||||
| "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\ | "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\ | ||||
| "sub $0xFFFF , "tmp" \n\t"\ | |||||
| "neg %%ecx \n\t"\ | "neg %%ecx \n\t"\ | ||||
| "add $7 , %%ecx \n\t"\ | "add $7 , %%ecx \n\t"\ | ||||
| "shl %%cl , "tmp" \n\t"\ | "shl %%cl , "tmp" \n\t"\ | ||||
| @@ -72,8 +72,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | |||||
| "test $1, %4 \n\t" | "test $1, %4 \n\t" | ||||
| " jnz 4f \n\t" | " jnz 4f \n\t" | ||||
| "add $4, %0 \n\t" | |||||
| "mov %0, %2 \n\t" | |||||
| "add $4, %2 \n\t" | |||||
| "3: \n\t" | "3: \n\t" | ||||
| "add $1, %1 \n\t" | "add $1, %1 \n\t" | ||||
| @@ -101,7 +100,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | |||||
| static int decode_significance_8x8_x86(CABACContext *c, | static int decode_significance_8x8_x86(CABACContext *c, | ||||
| uint8_t *significant_coeff_ctx_base, | uint8_t *significant_coeff_ctx_base, | ||||
| int *index, x86_reg last_off, const uint8_t *sig_off){ | |||||
| int *index, uint8_t *last_coeff_ctx_base, const uint8_t *sig_off){ | |||||
| int minusindex= 4-(intptr_t)index; | int minusindex= 4-(intptr_t)index; | ||||
| int bit; | int bit; | ||||
| x86_reg coeff_count; | x86_reg coeff_count; | ||||
| @@ -128,7 +127,6 @@ static int decode_significance_8x8_x86(CABACContext *c, | |||||
| " jz 3f \n\t" | " jz 3f \n\t" | ||||
| "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t" | "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t" | ||||
| "add %9, %6 \n\t" | |||||
| "add %11, %6 \n\t" | "add %11, %6 \n\t" | ||||
| BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3", | BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3", | ||||
| @@ -141,8 +139,7 @@ static int decode_significance_8x8_x86(CABACContext *c, | |||||
| "test $1, %4 \n\t" | "test $1, %4 \n\t" | ||||
| " jnz 4f \n\t" | " jnz 4f \n\t" | ||||
| "add $4, %0 \n\t" | |||||
| "mov %0, %2 \n\t" | |||||
| "add $4, %2 \n\t" | |||||
| "3: \n\t" | "3: \n\t" | ||||
| "addl $1, %k6 \n\t" | "addl $1, %k6 \n\t" | ||||
| @@ -159,7 +156,7 @@ static int decode_significance_8x8_x86(CABACContext *c, | |||||
| "movl %3, %a13(%7) \n\t" | "movl %3, %a13(%7) \n\t" | ||||
| :"=&q"(coeff_count),"+m"(last), "+m"(index), "=&r"(low), "=&r"(bit), | :"=&q"(coeff_count),"+m"(last), "+m"(index), "=&r"(low), "=&r"(bit), | ||||
| "=&r"(range), "=&r"(state) | "=&r"(range), "=&r"(state) | ||||
| :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_off), | |||||
| :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_coeff_ctx_base), | |||||
| "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), | "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), | ||||
| "i"(offsetof(CABACContext, bytestream)) | "i"(offsetof(CABACContext, bytestream)) | ||||
| : "%"REG_c, "memory" | : "%"REG_c, "memory" | ||||