You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

851 lines
28KB

  1. /*
  2. * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  3. * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. */
  22. /**
  23. * @file cabac.h
  24. * Context Adaptive Binary Arithmetic Coder.
  25. */
  26. //#undef NDEBUG
  27. #include <assert.h>
  28. #define CABAC_BITS 16
  29. #define CABAC_MASK ((1<<CABAC_BITS)-1)
  30. #define BRANCHLESS_CABAC_DECODER 1
  31. //#define ARCH_X86_DISABLED 1
  32. typedef struct CABACContext{
  33. int low;
  34. int range;
  35. int outstanding_count;
  36. #ifdef STRICT_LIMITS
  37. int symCount;
  38. #endif
  39. const uint8_t *bytestream_start;
  40. const uint8_t *bytestream;
  41. const uint8_t *bytestream_end;
  42. PutBitContext pb;
  43. }CABACContext;
  44. extern uint8_t ff_h264_mlps_state[4*64];
  45. extern uint8_t ff_h264_lps_range[4*2*64]; ///< rangeTabLPS
  46. extern uint8_t ff_h264_mps_state[2*64]; ///< transIdxMPS
  47. extern uint8_t ff_h264_lps_state[2*64]; ///< transIdxLPS
  48. extern const uint8_t ff_h264_norm_shift[512];
  49. void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
  50. void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
  51. void ff_init_cabac_states(CABACContext *c);
  52. static inline void put_cabac_bit(CABACContext *c, int b){
  53. put_bits(&c->pb, 1, b);
  54. for(;c->outstanding_count; c->outstanding_count--){
  55. put_bits(&c->pb, 1, 1-b);
  56. }
  57. }
  58. static inline void renorm_cabac_encoder(CABACContext *c){
  59. while(c->range < 0x100){
  60. //FIXME optimize
  61. if(c->low<0x100){
  62. put_cabac_bit(c, 0);
  63. }else if(c->low<0x200){
  64. c->outstanding_count++;
  65. c->low -= 0x100;
  66. }else{
  67. put_cabac_bit(c, 1);
  68. c->low -= 0x200;
  69. }
  70. c->range+= c->range;
  71. c->low += c->low;
  72. }
  73. }
  74. static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
  75. int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + *state];
  76. if(bit == ((*state)&1)){
  77. c->range -= RangeLPS;
  78. *state= ff_h264_mps_state[*state];
  79. }else{
  80. c->low += c->range - RangeLPS;
  81. c->range = RangeLPS;
  82. *state= ff_h264_lps_state[*state];
  83. }
  84. renorm_cabac_encoder(c);
  85. #ifdef STRICT_LIMITS
  86. c->symCount++;
  87. #endif
  88. }
  89. static void put_cabac_static(CABACContext *c, int RangeLPS, int bit){
  90. assert(c->range > RangeLPS);
  91. if(!bit){
  92. c->range -= RangeLPS;
  93. }else{
  94. c->low += c->range - RangeLPS;
  95. c->range = RangeLPS;
  96. }
  97. renorm_cabac_encoder(c);
  98. #ifdef STRICT_LIMITS
  99. c->symCount++;
  100. #endif
  101. }
  102. /**
  103. * @param bit 0 -> write zero bit, !=0 write one bit
  104. */
  105. static void put_cabac_bypass(CABACContext *c, int bit){
  106. c->low += c->low;
  107. if(bit){
  108. c->low += c->range;
  109. }
  110. //FIXME optimize
  111. if(c->low<0x200){
  112. put_cabac_bit(c, 0);
  113. }else if(c->low<0x400){
  114. c->outstanding_count++;
  115. c->low -= 0x200;
  116. }else{
  117. put_cabac_bit(c, 1);
  118. c->low -= 0x400;
  119. }
  120. #ifdef STRICT_LIMITS
  121. c->symCount++;
  122. #endif
  123. }
  124. /**
  125. *
  126. * @return the number of bytes written
  127. */
  128. static int put_cabac_terminate(CABACContext *c, int bit){
  129. c->range -= 2;
  130. if(!bit){
  131. renorm_cabac_encoder(c);
  132. }else{
  133. c->low += c->range;
  134. c->range= 2;
  135. renorm_cabac_encoder(c);
  136. assert(c->low <= 0x1FF);
  137. put_cabac_bit(c, c->low>>9);
  138. put_bits(&c->pb, 2, ((c->low>>7)&3)|1);
  139. flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
  140. }
  141. #ifdef STRICT_LIMITS
  142. c->symCount++;
  143. #endif
  144. return (put_bits_count(&c->pb)+7)>>3;
  145. }
  146. /**
  147. * put (truncated) unary binarization.
  148. */
  149. static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){
  150. int i;
  151. assert(v <= max);
  152. #if 1
  153. for(i=0; i<v; i++){
  154. put_cabac(c, state, 1);
  155. if(i < max_index) state++;
  156. }
  157. if(truncated==0 || v<max)
  158. put_cabac(c, state, 0);
  159. #else
  160. if(v <= max_index){
  161. for(i=0; i<v; i++){
  162. put_cabac(c, state+i, 1);
  163. }
  164. if(truncated==0 || v<max)
  165. put_cabac(c, state+i, 0);
  166. }else{
  167. for(i=0; i<=max_index; i++){
  168. put_cabac(c, state+i, 1);
  169. }
  170. for(; i<v; i++){
  171. put_cabac(c, state+max_index, 1);
  172. }
  173. if(truncated==0 || v<max)
  174. put_cabac(c, state+max_index, 0);
  175. }
  176. #endif
  177. }
  178. /**
  179. * put unary exp golomb k-th order binarization.
  180. */
  181. static void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){
  182. int i;
  183. if(v==0)
  184. put_cabac(c, state, 0);
  185. else{
  186. const int sign= v < 0;
  187. if(is_signed) v= FFABS(v);
  188. if(v<max){
  189. for(i=0; i<v; i++){
  190. put_cabac(c, state, 1);
  191. if(i < max_index) state++;
  192. }
  193. put_cabac(c, state, 0);
  194. }else{
  195. int m= 1<<k;
  196. for(i=0; i<max; i++){
  197. put_cabac(c, state, 1);
  198. if(i < max_index) state++;
  199. }
  200. v -= max;
  201. while(v >= m){ //FIXME optimize
  202. put_cabac_bypass(c, 1);
  203. v-= m;
  204. m+= m;
  205. }
  206. put_cabac_bypass(c, 0);
  207. while(m>>=1){
  208. put_cabac_bypass(c, v&m);
  209. }
  210. }
  211. if(is_signed)
  212. put_cabac_bypass(c, sign);
  213. }
  214. }
  215. static void refill(CABACContext *c){
  216. #if CABAC_BITS == 16
  217. c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
  218. #else
  219. c->low+= c->bytestream[0]<<1;
  220. #endif
  221. c->low -= CABAC_MASK;
  222. c->bytestream+= CABAC_BITS/8;
  223. }
  224. static void refill2(CABACContext *c){
  225. int i, x;
  226. x= c->low ^ (c->low-1);
  227. i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)];
  228. x= -CABAC_MASK;
  229. #if CABAC_BITS == 16
  230. x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
  231. #else
  232. x+= c->bytestream[0]<<1;
  233. #endif
  234. c->low += x<<i;
  235. c->bytestream+= CABAC_BITS/8;
  236. }
  237. static inline void renorm_cabac_decoder(CABACContext *c){
  238. while(c->range < 0x100){
  239. c->range+= c->range;
  240. c->low+= c->low;
  241. if(!(c->low & CABAC_MASK))
  242. refill(c);
  243. }
  244. }
  245. static inline void renorm_cabac_decoder_once(CABACContext *c){
  246. #ifdef ARCH_X86_DISABLED
  247. int temp;
  248. #if 0
  249. //P3:683 athlon:475
  250. asm(
  251. "lea -0x100(%0), %2 \n\t"
  252. "shr $31, %2 \n\t" //FIXME 31->63 for x86-64
  253. "shl %%cl, %0 \n\t"
  254. "shl %%cl, %1 \n\t"
  255. : "+r"(c->range), "+r"(c->low), "+c"(temp)
  256. );
  257. #elif 0
  258. //P3:680 athlon:474
  259. asm(
  260. "cmp $0x100, %0 \n\t"
  261. "setb %%cl \n\t" //FIXME 31->63 for x86-64
  262. "shl %%cl, %0 \n\t"
  263. "shl %%cl, %1 \n\t"
  264. : "+r"(c->range), "+r"(c->low), "+c"(temp)
  265. );
  266. #elif 1
  267. int temp2;
  268. //P3:665 athlon:517
  269. asm(
  270. "lea -0x100(%0), %%eax \n\t"
  271. "cdq \n\t"
  272. "mov %0, %%eax \n\t"
  273. "and %%edx, %0 \n\t"
  274. "and %1, %%edx \n\t"
  275. "add %%eax, %0 \n\t"
  276. "add %%edx, %1 \n\t"
  277. : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
  278. );
  279. #elif 0
  280. int temp2;
  281. //P3:673 athlon:509
  282. asm(
  283. "cmp $0x100, %0 \n\t"
  284. "sbb %%edx, %%edx \n\t"
  285. "mov %0, %%eax \n\t"
  286. "and %%edx, %0 \n\t"
  287. "and %1, %%edx \n\t"
  288. "add %%eax, %0 \n\t"
  289. "add %%edx, %1 \n\t"
  290. : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
  291. );
  292. #else
  293. int temp2;
  294. //P3:677 athlon:511
  295. asm(
  296. "cmp $0x100, %0 \n\t"
  297. "lea (%0, %0), %%eax \n\t"
  298. "lea (%1, %1), %%edx \n\t"
  299. "cmovb %%eax, %0 \n\t"
  300. "cmovb %%edx, %1 \n\t"
  301. : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
  302. );
  303. #endif
  304. #else
  305. //P3:675 athlon:476
  306. int shift= (uint32_t)(c->range - 0x100)>>31;
  307. c->range<<= shift;
  308. c->low <<= shift;
  309. #endif
  310. if(!(c->low & CABAC_MASK))
  311. refill(c);
  312. }
  313. static int always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){
  314. //FIXME gcc generates duplicate load/stores for c->low and c->range
  315. #define LOW "0"
  316. #define RANGE "4"
  317. #define BYTESTART "12"
  318. #define BYTE "16"
  319. #define BYTEEND "20"
  320. #if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
  321. int bit;
  322. #ifndef BRANCHLESS_CABAC_DECODER
  323. asm volatile(
  324. "movzbl (%1), %0 \n\t"
  325. "movl "RANGE "(%2), %%ebx \n\t"
  326. "movl "RANGE "(%2), %%edx \n\t"
  327. "andl $0xC0, %%ebx \n\t"
  328. "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%ebx, 2), %%esi\n\t"
  329. "movl "LOW "(%2), %%ebx \n\t"
  330. //eax:state ebx:low, edx:range, esi:RangeLPS
  331. "subl %%esi, %%edx \n\t"
  332. "movl %%edx, %%ecx \n\t"
  333. "shll $17, %%ecx \n\t"
  334. "cmpl %%ecx, %%ebx \n\t"
  335. " ja 1f \n\t"
  336. #if 1
  337. //athlon:4067 P3:4110
  338. "lea -0x100(%%edx), %%ecx \n\t"
  339. "shr $31, %%ecx \n\t"
  340. "shl %%cl, %%edx \n\t"
  341. "shl %%cl, %%ebx \n\t"
  342. #else
  343. //athlon:4057 P3:4130
  344. "cmp $0x100, %%edx \n\t" //FIXME avoidable
  345. "setb %%cl \n\t"
  346. "shl %%cl, %%edx \n\t"
  347. "shl %%cl, %%ebx \n\t"
  348. #endif
  349. "movzbl "MANGLE(ff_h264_mps_state)"(%0), %%ecx \n\t"
  350. "movb %%cl, (%1) \n\t"
  351. //eax:state ebx:low, edx:range, esi:RangeLPS
  352. "test %%bx, %%bx \n\t"
  353. " jnz 2f \n\t"
  354. "movl "BYTE "(%2), %%esi \n\t"
  355. "subl $0xFFFF, %%ebx \n\t"
  356. "movzwl (%%esi), %%ecx \n\t"
  357. "bswap %%ecx \n\t"
  358. "shrl $15, %%ecx \n\t"
  359. "addl $2, %%esi \n\t"
  360. "addl %%ecx, %%ebx \n\t"
  361. "movl %%esi, "BYTE "(%2) \n\t"
  362. "jmp 2f \n\t"
  363. "1: \n\t"
  364. //eax:state ebx:low, edx:range, esi:RangeLPS
  365. "subl %%ecx, %%ebx \n\t"
  366. "movl %%esi, %%edx \n\t"
  367. "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
  368. "shll %%cl, %%ebx \n\t"
  369. "shll %%cl, %%edx \n\t"
  370. "movzbl "MANGLE(ff_h264_lps_state)"(%0), %%ecx \n\t"
  371. "movb %%cl, (%1) \n\t"
  372. "addl $1, %0 \n\t"
  373. "test %%bx, %%bx \n\t"
  374. " jnz 2f \n\t"
  375. "movl "BYTE "(%2), %%ecx \n\t"
  376. "movzwl (%%ecx), %%esi \n\t"
  377. "bswap %%esi \n\t"
  378. "shrl $15, %%esi \n\t"
  379. "subl $0xFFFF, %%esi \n\t"
  380. "addl $2, %%ecx \n\t"
  381. "movl %%ecx, "BYTE "(%2) \n\t"
  382. "leal -1(%%ebx), %%ecx \n\t"
  383. "xorl %%ebx, %%ecx \n\t"
  384. "shrl $15, %%ecx \n\t"
  385. "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
  386. "neg %%ecx \n\t"
  387. "add $7, %%ecx \n\t"
  388. "shll %%cl , %%esi \n\t"
  389. "addl %%esi, %%ebx \n\t"
  390. "2: \n\t"
  391. "movl %%edx, "RANGE "(%2) \n\t"
  392. "movl %%ebx, "LOW "(%2) \n\t"
  393. :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used
  394. :"r"(state), "r"(c)
  395. : "%ecx", "%ebx", "%edx", "%esi", "memory"
  396. );
  397. bit&=1;
  398. #else /* BRANCHLESS_CABAC_DECODER */
  399. #if defined CMOV_IS_FAST
  400. #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
  401. "mov "tmp" , %%ecx \n\t"\
  402. "shl $17 , "tmp" \n\t"\
  403. "cmp "low" , "tmp" \n\t"\
  404. "cmova %%ecx , "range" \n\t"\
  405. "sbb %%ecx , %%ecx \n\t"\
  406. "and %%ecx , "tmp" \n\t"\
  407. "sub "tmp" , "low" \n\t"\
  408. "xor %%ecx , "ret" \n\t"
  409. #else /* CMOV_IS_FAST */
  410. #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
  411. "mov "tmp" , %%ecx \n\t"\
  412. "shl $17 , "tmp" \n\t"\
  413. "sub "low" , "tmp" \n\t"\
  414. "sar $31 , "tmp" \n\t" /*lps_mask*/\
  415. "sub %%ecx , "range" \n\t" /*RangeLPS - range*/\
  416. "and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\
  417. "add %%ecx , "range" \n\t" /*new range*/\
  418. "shl $17 , %%ecx \n\t"\
  419. "and "tmp" , %%ecx \n\t"\
  420. "sub %%ecx , "low" \n\t"\
  421. "xor "tmp" , "ret" \n\t"
  422. #endif /* CMOV_IS_FAST */
  423. #define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
  424. "movzbl "statep" , "ret" \n\t"\
  425. "mov "range" , "tmp" \n\t"\
  426. "and $0xC0 , "range" \n\t"\
  427. "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
  428. "sub "range" , "tmp" \n\t"\
  429. BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
  430. "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\
  431. "shl %%cl , "range" \n\t"\
  432. "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\
  433. "mov "tmpbyte" , "statep" \n\t"\
  434. "shl %%cl , "low" \n\t"\
  435. "test "lowword" , "lowword" \n\t"\
  436. " jnz 1f \n\t"\
  437. "mov "BYTE"("cabac"), %%ecx \n\t"\
  438. "movzwl (%%ecx) , "tmp" \n\t"\
  439. "bswap "tmp" \n\t"\
  440. "shr $15 , "tmp" \n\t"\
  441. "sub $0xFFFF , "tmp" \n\t"\
  442. "add $2 , %%ecx \n\t"\
  443. "mov %%ecx , "BYTE "("cabac") \n\t"\
  444. "lea -1("low") , %%ecx \n\t"\
  445. "xor "low" , %%ecx \n\t"\
  446. "shr $15 , %%ecx \n\t"\
  447. "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\
  448. "neg %%ecx \n\t"\
  449. "add $7 , %%ecx \n\t"\
  450. "shl %%cl , "tmp" \n\t"\
  451. "add "tmp" , "low" \n\t"\
  452. "1: \n\t"
  453. asm volatile(
  454. "movl "RANGE "(%2), %%esi \n\t"
  455. "movl "LOW "(%2), %%ebx \n\t"
  456. BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
  457. "movl %%esi, "RANGE "(%2) \n\t"
  458. "movl %%ebx, "LOW "(%2) \n\t"
  459. :"=&a"(bit)
  460. :"r"(state), "r"(c)
  461. : "%ecx", "%ebx", "%edx", "%esi", "memory"
  462. );
  463. bit&=1;
  464. #endif /* BRANCHLESS_CABAC_DECODER */
  465. #else /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
  466. int s = *state;
  467. int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s];
  468. int bit, lps_mask attribute_unused;
  469. c->range -= RangeLPS;
  470. #ifndef BRANCHLESS_CABAC_DECODER
  471. if(c->low < (c->range<<17)){
  472. bit= s&1;
  473. *state= ff_h264_mps_state[s];
  474. renorm_cabac_decoder_once(c);
  475. }else{
  476. bit= ff_h264_norm_shift[RangeLPS];
  477. c->low -= (c->range<<17);
  478. *state= ff_h264_lps_state[s];
  479. c->range = RangeLPS<<bit;
  480. c->low <<= bit;
  481. bit= (s&1)^1;
  482. if(!(c->low & 0xFFFF)){
  483. refill2(c);
  484. }
  485. }
  486. #else /* BRANCHLESS_CABAC_DECODER */
  487. lps_mask= ((c->range<<17) - c->low)>>31;
  488. c->low -= (c->range<<17) & lps_mask;
  489. c->range += (RangeLPS - c->range) & lps_mask;
  490. s^=lps_mask;
  491. *state= (ff_h264_mlps_state+128)[s];
  492. bit= s&1;
  493. lps_mask= ff_h264_norm_shift[c->range];
  494. c->range<<= lps_mask;
  495. c->low <<= lps_mask;
  496. if(!(c->low & CABAC_MASK))
  497. refill2(c);
  498. #endif /* BRANCHLESS_CABAC_DECODER */
  499. #endif /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
  500. return bit;
  501. }
  502. static int __attribute((noinline)) get_cabac_noinline(CABACContext *c, uint8_t * const state){
  503. return get_cabac_inline(c,state);
  504. }
  505. static int get_cabac(CABACContext *c, uint8_t * const state){
  506. return get_cabac_inline(c,state);
  507. }
  508. static int get_cabac_bypass(CABACContext *c){
  509. #if 0 //not faster
  510. int bit;
  511. asm volatile(
  512. "movl "RANGE "(%1), %%ebx \n\t"
  513. "movl "LOW "(%1), %%eax \n\t"
  514. "shl $17, %%ebx \n\t"
  515. "add %%eax, %%eax \n\t"
  516. "sub %%ebx, %%eax \n\t"
  517. "cdq \n\t"
  518. "and %%edx, %%ebx \n\t"
  519. "add %%ebx, %%eax \n\t"
  520. "test %%ax, %%ax \n\t"
  521. " jnz 1f \n\t"
  522. "movl "BYTE "(%1), %%ebx \n\t"
  523. "subl $0xFFFF, %%eax \n\t"
  524. "movzwl (%%ebx), %%ecx \n\t"
  525. "bswap %%ecx \n\t"
  526. "shrl $15, %%ecx \n\t"
  527. "addl $2, %%ebx \n\t"
  528. "addl %%ecx, %%eax \n\t"
  529. "movl %%ebx, "BYTE "(%1) \n\t"
  530. "1: \n\t"
  531. "movl %%eax, "LOW "(%1) \n\t"
  532. :"=&d"(bit)
  533. :"r"(c)
  534. : "%eax", "%ebx", "%ecx", "memory"
  535. );
  536. return bit+1;
  537. #else
  538. int range;
  539. c->low += c->low;
  540. if(!(c->low & CABAC_MASK))
  541. refill(c);
  542. range= c->range<<17;
  543. if(c->low < range){
  544. return 0;
  545. }else{
  546. c->low -= range;
  547. return 1;
  548. }
  549. #endif
  550. }
  551. static always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
  552. #ifdef ARCH_X86
  553. asm volatile(
  554. "movl "RANGE "(%1), %%ebx \n\t"
  555. "movl "LOW "(%1), %%eax \n\t"
  556. "shl $17, %%ebx \n\t"
  557. "add %%eax, %%eax \n\t"
  558. "sub %%ebx, %%eax \n\t"
  559. "cdq \n\t"
  560. "and %%edx, %%ebx \n\t"
  561. "add %%ebx, %%eax \n\t"
  562. "xor %%edx, %%ecx \n\t"
  563. "sub %%edx, %%ecx \n\t"
  564. "test %%ax, %%ax \n\t"
  565. " jnz 1f \n\t"
  566. "movl "BYTE "(%1), %%ebx \n\t"
  567. "subl $0xFFFF, %%eax \n\t"
  568. "movzwl (%%ebx), %%edx \n\t"
  569. "bswap %%edx \n\t"
  570. "shrl $15, %%edx \n\t"
  571. "addl $2, %%ebx \n\t"
  572. "addl %%edx, %%eax \n\t"
  573. "movl %%ebx, "BYTE "(%1) \n\t"
  574. "1: \n\t"
  575. "movl %%eax, "LOW "(%1) \n\t"
  576. :"+c"(val)
  577. :"r"(c)
  578. : "%eax", "%ebx", "%edx", "memory"
  579. );
  580. return val;
  581. #else
  582. int range, mask;
  583. c->low += c->low;
  584. if(!(c->low & CABAC_MASK))
  585. refill(c);
  586. range= c->range<<17;
  587. c->low -= range;
  588. mask= c->low >> 31;
  589. range &= mask;
  590. c->low += range;
  591. return (val^mask)-mask;
  592. #endif
  593. }
  594. //FIXME the x86 code from this file should be moved into i386/h264 or cabac something.c/h (note ill kill you if you move my code away from under my fingers before iam finished with it!)
  595. //FIXME use some macros to avoid duplicatin get_cabac (cant be done yet as that would make optimization work hard)
  596. #ifdef ARCH_X86
  597. static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *significant_coeff_ctx_base, int *index){
  598. void *end= significant_coeff_ctx_base + max_coeff - 1;
  599. int minusstart= -(int)significant_coeff_ctx_base;
  600. int minusindex= 4-(int)index;
  601. int coeff_count;
  602. asm volatile(
  603. "movl "RANGE "(%3), %%esi \n\t"
  604. "movl "LOW "(%3), %%ebx \n\t"
  605. "2: \n\t"
  606. BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
  607. "test $1, %%edx \n\t"
  608. " jz 3f \n\t"
  609. BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
  610. "movl %2, %%eax \n\t"
  611. "movl %4, %%ecx \n\t"
  612. "addl %1, %%ecx \n\t"
  613. "movl %%ecx, (%%eax) \n\t"
  614. "test $1, %%edx \n\t"
  615. " jnz 4f \n\t"
  616. "addl $4, %%eax \n\t"
  617. "movl %%eax, %2 \n\t"
  618. "3: \n\t"
  619. "addl $1, %1 \n\t"
  620. "cmpl %5, %1 \n\t"
  621. " jb 2b \n\t"
  622. "movl %2, %%eax \n\t"
  623. "movl %4, %%ecx \n\t"
  624. "addl %1, %%ecx \n\t"
  625. "movl %%ecx, (%%eax) \n\t"
  626. "4: \n\t"
  627. "addl %6, %%eax \n\t"
  628. "shr $2, %%eax \n\t"
  629. "movl %%esi, "RANGE "(%3) \n\t"
  630. "movl %%ebx, "LOW "(%3) \n\t"
  631. :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index)\
  632. :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex)\
  633. : "%ecx", "%ebx", "%edx", "%esi", "memory"\
  634. );
  635. return coeff_count;
  636. }
  637. static int decode_significance_8x8_x86(CABACContext *c, uint8_t *significant_coeff_ctx_base, int *index, uint8_t *sig_off){
  638. int minusindex= 4-(int)index;
  639. int coeff_count;
  640. int last=0;
  641. asm volatile(
  642. "movl "RANGE "(%3), %%esi \n\t"
  643. "movl "LOW "(%3), %%ebx \n\t"
  644. "mov %1, %%edi \n\t"
  645. "2: \n\t"
  646. "mov %6, %%eax \n\t"
  647. "movzbl (%%eax, %%edi), %%edi \n\t"
  648. "add %5, %%edi \n\t"
  649. BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%edi)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
  650. "mov %1, %%edi \n\t"
  651. "test $1, %%edx \n\t"
  652. " jz 3f \n\t"
  653. "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t"
  654. "add %5, %%edi \n\t"
  655. BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%edi)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
  656. "movl %2, %%eax \n\t"
  657. "mov %1, %%edi \n\t"
  658. "movl %%edi, (%%eax) \n\t"
  659. "test $1, %%edx \n\t"
  660. " jnz 4f \n\t"
  661. "addl $4, %%eax \n\t"
  662. "movl %%eax, %2 \n\t"
  663. "3: \n\t"
  664. "addl $1, %%edi \n\t"
  665. "mov %%edi, %1 \n\t"
  666. "cmpl $63, %%edi \n\t"
  667. " jb 2b \n\t"
  668. "movl %2, %%eax \n\t"
  669. "movl %%edi, (%%eax) \n\t"
  670. "4: \n\t"
  671. "addl %4, %%eax \n\t"
  672. "shr $2, %%eax \n\t"
  673. "movl %%esi, "RANGE "(%3) \n\t"
  674. "movl %%ebx, "LOW "(%3) \n\t"
  675. :"=&a"(coeff_count),"+m"(last), "+m"(index)\
  676. :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)\
  677. : "%ecx", "%ebx", "%edx", "%esi", "%edi", "memory"\
  678. );
  679. return coeff_count;
  680. }
  681. #endif
  682. /**
  683. *
  684. * @return the number of bytes read or 0 if no end
  685. */
  686. static int get_cabac_terminate(CABACContext *c){
  687. c->range -= 2;
  688. if(c->low < c->range<<17){
  689. renorm_cabac_decoder_once(c);
  690. return 0;
  691. }else{
  692. return c->bytestream - c->bytestream_start;
  693. }
  694. }
  695. /**
  696. * get (truncated) unnary binarization.
  697. */
  698. static int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){
  699. int i;
  700. for(i=0; i<max; i++){
  701. if(get_cabac(c, state)==0)
  702. return i;
  703. if(i< max_index) state++;
  704. }
  705. return truncated ? max : -1;
  706. }
  707. /**
  708. * get unary exp golomb k-th order binarization.
  709. */
  710. static int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){
  711. int i, v;
  712. int m= 1<<k;
  713. if(get_cabac(c, state)==0)
  714. return 0;
  715. if(0 < max_index) state++;
  716. for(i=1; i<max; i++){
  717. if(get_cabac(c, state)==0){
  718. if(is_signed && get_cabac_bypass(c)){
  719. return -i;
  720. }else
  721. return i;
  722. }
  723. if(i < max_index) state++;
  724. }
  725. while(get_cabac_bypass(c)){
  726. i+= m;
  727. m+= m;
  728. }
  729. v=0;
  730. while(m>>=1){
  731. v+= v + get_cabac_bypass(c);
  732. }
  733. i += v;
  734. if(is_signed && get_cabac_bypass(c)){
  735. return -i;
  736. }else
  737. return i;
  738. }