This is a lot more reliable to get cmov rather than trying to trick gcc into generating it, useful since it's 2% faster overall. Patch by Eli Friedman <eli.friedman at gmail> Originally committed as revision 24471 to svn://svn.ffmpeg.org/ffmpeg/trunktags/n0.8
| @@ -208,23 +208,25 @@ static av_always_inline unsigned int vp56_rac_renorm(VP56RangeCoder *c) | |||||
| return code_word; | return code_word; | ||||
| } | } | ||||
| #if ARCH_X86 | |||||
| #include "x86/vp56_arith.h" | |||||
| #endif | |||||
| #ifndef vp56_rac_get_prob | |||||
| #define vp56_rac_get_prob vp56_rac_get_prob | |||||
| static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) | static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) | ||||
| { | { | ||||
| /* Don't put c->high in a local variable; if we do that, gcc gets | |||||
| * the stupids and turns the code below into a branch again. */ | |||||
| unsigned int code_word = vp56_rac_renorm(c); | unsigned int code_word = vp56_rac_renorm(c); | ||||
| unsigned int low = 1 + (((c->high - 1) * prob) >> 8); | unsigned int low = 1 + (((c->high - 1) * prob) >> 8); | ||||
| unsigned int low_shift = low << 8; | unsigned int low_shift = low << 8; | ||||
| int bit = code_word >= low_shift; | int bit = code_word >= low_shift; | ||||
| /* Incantation to convince GCC to turn these into conditional moves | |||||
| * instead of branches -- faster, as this branch is basically | |||||
| * unpredictable. */ | |||||
| c->high = bit ? c->high - low : low; | c->high = bit ? c->high - low : low; | ||||
| c->code_word = bit ? code_word - low_shift : code_word; | c->code_word = bit ? code_word - low_shift : code_word; | ||||
| return bit; | return bit; | ||||
| } | } | ||||
| #endif | |||||
| // branchy variant, to be used where there's a branch based on the bit decoded | // branchy variant, to be used where there's a branch based on the bit decoded | ||||
| static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob) | static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob) | ||||
| @@ -0,0 +1,54 @@ | |||||
| /** | |||||
| * VP5 and VP6 compatible video decoder (arith decoder) | |||||
| * | |||||
| * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> | |||||
| * Copyright (C) 2010 Eli Friedman | |||||
| * | |||||
| * This file is part of FFmpeg. | |||||
| * | |||||
| * FFmpeg is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * FFmpeg is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with FFmpeg; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #ifndef AVCODEC_X86_VP56_ARITH_H | |||||
| #define AVCODEC_X86_VP56_ARITH_H | |||||
| #if HAVE_FAST_CMOV | |||||
| #define vp56_rac_get_prob vp56_rac_get_prob | |||||
| static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) | |||||
| { | |||||
| unsigned int code_word = vp56_rac_renorm(c); | |||||
| unsigned int high = c->high; | |||||
| unsigned int low = 1 + (((high - 1) * prob) >> 8); | |||||
| unsigned int low_shift = low << 8; | |||||
| int bit = 0; | |||||
| __asm__( | |||||
| "subl %4, %1 \n\t" | |||||
| "subl %3, %2 \n\t" | |||||
| "leal (%2, %3), %3 \n\t" | |||||
| "setae %b0 \n\t" | |||||
| "cmovb %4, %1 \n\t" | |||||
| "cmovb %3, %2 \n\t" | |||||
| : "+q"(bit), "+r"(high), "+r"(code_word), "+r"(low_shift) | |||||
| : "r"(low) | |||||
| ); | |||||
| c->high = high; | |||||
| c->code_word = code_word; | |||||
| return bit; | |||||
| } | |||||
| #endif | |||||
| #endif /* AVCODEC_X86_VP56_ARITH_H */ | |||||