You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

661 lines
19KB

  1. /*
  2. * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  3. * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. */
  22. /**
  23. * @file cabac.h
  24. * Context Adaptive Binary Arithmetic Coder.
  25. */
  26. //#undef NDEBUG
  27. #include <assert.h>
  28. #define CABAC_BITS 16
  29. #define CABAC_MASK ((1<<CABAC_BITS)-1)
  30. #define BRANCHLESS_CABAC_DECODER 1
  31. #define CMOV_IS_FAST 1
  32. typedef struct CABACContext{
  33. int low;
  34. int range;
  35. int outstanding_count;
  36. #ifdef STRICT_LIMITS
  37. int symCount;
  38. #endif
  39. const uint8_t *bytestream_start;
  40. const uint8_t *bytestream;
  41. const uint8_t *bytestream_end;
  42. PutBitContext pb;
  43. }CABACContext;
  44. extern uint8_t ff_h264_mlps_state[4*64];
  45. extern uint8_t ff_h264_lps_range[4][2*64]; ///< rangeTabLPS
  46. extern uint8_t ff_h264_mps_state[2*64]; ///< transIdxMPS
  47. extern uint8_t ff_h264_lps_state[2*64]; ///< transIdxLPS
  48. extern const uint8_t ff_h264_norm_shift[512];
  49. void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
  50. void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
  51. void ff_init_cabac_states(CABACContext *c);
  52. static inline void put_cabac_bit(CABACContext *c, int b){
  53. put_bits(&c->pb, 1, b);
  54. for(;c->outstanding_count; c->outstanding_count--){
  55. put_bits(&c->pb, 1, 1-b);
  56. }
  57. }
  58. static inline void renorm_cabac_encoder(CABACContext *c){
  59. while(c->range < 0x100){
  60. //FIXME optimize
  61. if(c->low<0x100){
  62. put_cabac_bit(c, 0);
  63. }else if(c->low<0x200){
  64. c->outstanding_count++;
  65. c->low -= 0x100;
  66. }else{
  67. put_cabac_bit(c, 1);
  68. c->low -= 0x200;
  69. }
  70. c->range+= c->range;
  71. c->low += c->low;
  72. }
  73. }
  74. static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
  75. int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + *state];
  76. if(bit == ((*state)&1)){
  77. c->range -= RangeLPS;
  78. *state= ff_h264_mps_state[*state];
  79. }else{
  80. c->low += c->range - RangeLPS;
  81. c->range = RangeLPS;
  82. *state= ff_h264_lps_state[*state];
  83. }
  84. renorm_cabac_encoder(c);
  85. #ifdef STRICT_LIMITS
  86. c->symCount++;
  87. #endif
  88. }
  89. static void put_cabac_static(CABACContext *c, int RangeLPS, int bit){
  90. assert(c->range > RangeLPS);
  91. if(!bit){
  92. c->range -= RangeLPS;
  93. }else{
  94. c->low += c->range - RangeLPS;
  95. c->range = RangeLPS;
  96. }
  97. renorm_cabac_encoder(c);
  98. #ifdef STRICT_LIMITS
  99. c->symCount++;
  100. #endif
  101. }
  102. /**
  103. * @param bit 0 -> write zero bit, !=0 write one bit
  104. */
  105. static void put_cabac_bypass(CABACContext *c, int bit){
  106. c->low += c->low;
  107. if(bit){
  108. c->low += c->range;
  109. }
  110. //FIXME optimize
  111. if(c->low<0x200){
  112. put_cabac_bit(c, 0);
  113. }else if(c->low<0x400){
  114. c->outstanding_count++;
  115. c->low -= 0x200;
  116. }else{
  117. put_cabac_bit(c, 1);
  118. c->low -= 0x400;
  119. }
  120. #ifdef STRICT_LIMITS
  121. c->symCount++;
  122. #endif
  123. }
  124. /**
  125. *
  126. * @return the number of bytes written
  127. */
  128. static int put_cabac_terminate(CABACContext *c, int bit){
  129. c->range -= 2;
  130. if(!bit){
  131. renorm_cabac_encoder(c);
  132. }else{
  133. c->low += c->range;
  134. c->range= 2;
  135. renorm_cabac_encoder(c);
  136. assert(c->low <= 0x1FF);
  137. put_cabac_bit(c, c->low>>9);
  138. put_bits(&c->pb, 2, ((c->low>>7)&3)|1);
  139. flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
  140. }
  141. #ifdef STRICT_LIMITS
  142. c->symCount++;
  143. #endif
  144. return (put_bits_count(&c->pb)+7)>>3;
  145. }
  146. /**
  147. * put (truncated) unary binarization.
  148. */
  149. static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){
  150. int i;
  151. assert(v <= max);
  152. #if 1
  153. for(i=0; i<v; i++){
  154. put_cabac(c, state, 1);
  155. if(i < max_index) state++;
  156. }
  157. if(truncated==0 || v<max)
  158. put_cabac(c, state, 0);
  159. #else
  160. if(v <= max_index){
  161. for(i=0; i<v; i++){
  162. put_cabac(c, state+i, 1);
  163. }
  164. if(truncated==0 || v<max)
  165. put_cabac(c, state+i, 0);
  166. }else{
  167. for(i=0; i<=max_index; i++){
  168. put_cabac(c, state+i, 1);
  169. }
  170. for(; i<v; i++){
  171. put_cabac(c, state+max_index, 1);
  172. }
  173. if(truncated==0 || v<max)
  174. put_cabac(c, state+max_index, 0);
  175. }
  176. #endif
  177. }
  178. /**
  179. * put unary exp golomb k-th order binarization.
  180. */
  181. static void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){
  182. int i;
  183. if(v==0)
  184. put_cabac(c, state, 0);
  185. else{
  186. const int sign= v < 0;
  187. if(is_signed) v= FFABS(v);
  188. if(v<max){
  189. for(i=0; i<v; i++){
  190. put_cabac(c, state, 1);
  191. if(i < max_index) state++;
  192. }
  193. put_cabac(c, state, 0);
  194. }else{
  195. int m= 1<<k;
  196. for(i=0; i<max; i++){
  197. put_cabac(c, state, 1);
  198. if(i < max_index) state++;
  199. }
  200. v -= max;
  201. while(v >= m){ //FIXME optimize
  202. put_cabac_bypass(c, 1);
  203. v-= m;
  204. m+= m;
  205. }
  206. put_cabac_bypass(c, 0);
  207. while(m>>=1){
  208. put_cabac_bypass(c, v&m);
  209. }
  210. }
  211. if(is_signed)
  212. put_cabac_bypass(c, sign);
  213. }
  214. }
  215. static void refill(CABACContext *c){
  216. #if CABAC_BITS == 16
  217. c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
  218. #else
  219. c->low+= c->bytestream[0]<<1;
  220. #endif
  221. c->low -= CABAC_MASK;
  222. c->bytestream+= CABAC_BITS/8;
  223. }
  224. static void refill2(CABACContext *c){
  225. int i, x;
  226. x= c->low ^ (c->low-1);
  227. i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)];
  228. x= -CABAC_MASK;
  229. #if CABAC_BITS == 16
  230. x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
  231. #else
  232. x+= c->bytestream[0]<<1;
  233. #endif
  234. c->low += x<<i;
  235. c->bytestream+= CABAC_BITS/8;
  236. }
  237. static inline void renorm_cabac_decoder(CABACContext *c){
  238. while(c->range < 0x100){
  239. c->range+= c->range;
  240. c->low+= c->low;
  241. if(!(c->low & CABAC_MASK))
  242. refill(c);
  243. }
  244. }
  245. static inline void renorm_cabac_decoder_once(CABACContext *c){
  246. #ifdef ARCH_X86_DISABLED
  247. int temp;
  248. #if 0
  249. //P3:683 athlon:475
  250. asm(
  251. "lea -0x100(%0), %2 \n\t"
  252. "shr $31, %2 \n\t" //FIXME 31->63 for x86-64
  253. "shl %%cl, %0 \n\t"
  254. "shl %%cl, %1 \n\t"
  255. : "+r"(c->range), "+r"(c->low), "+c"(temp)
  256. );
  257. #elif 0
  258. //P3:680 athlon:474
  259. asm(
  260. "cmp $0x100, %0 \n\t"
  261. "setb %%cl \n\t" //FIXME 31->63 for x86-64
  262. "shl %%cl, %0 \n\t"
  263. "shl %%cl, %1 \n\t"
  264. : "+r"(c->range), "+r"(c->low), "+c"(temp)
  265. );
  266. #elif 1
  267. int temp2;
  268. //P3:665 athlon:517
  269. asm(
  270. "lea -0x100(%0), %%eax \n\t"
  271. "cdq \n\t"
  272. "mov %0, %%eax \n\t"
  273. "and %%edx, %0 \n\t"
  274. "and %1, %%edx \n\t"
  275. "add %%eax, %0 \n\t"
  276. "add %%edx, %1 \n\t"
  277. : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
  278. );
  279. #elif 0
  280. int temp2;
  281. //P3:673 athlon:509
  282. asm(
  283. "cmp $0x100, %0 \n\t"
  284. "sbb %%edx, %%edx \n\t"
  285. "mov %0, %%eax \n\t"
  286. "and %%edx, %0 \n\t"
  287. "and %1, %%edx \n\t"
  288. "add %%eax, %0 \n\t"
  289. "add %%edx, %1 \n\t"
  290. : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
  291. );
  292. #else
  293. int temp2;
  294. //P3:677 athlon:511
  295. asm(
  296. "cmp $0x100, %0 \n\t"
  297. "lea (%0, %0), %%eax \n\t"
  298. "lea (%1, %1), %%edx \n\t"
  299. "cmovb %%eax, %0 \n\t"
  300. "cmovb %%edx, %1 \n\t"
  301. : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
  302. );
  303. #endif
  304. #else
  305. //P3:675 athlon:476
  306. int shift= (uint32_t)(c->range - 0x100)>>31;
  307. c->range<<= shift;
  308. c->low <<= shift;
  309. #endif
  310. if(!(c->low & CABAC_MASK))
  311. refill(c);
  312. }
  313. static int always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){
  314. //FIXME gcc generates duplicate load/stores for c->low and c->range
  315. #if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
  316. int bit;
  317. #define LOW "0"
  318. #define RANGE "4"
  319. #define BYTESTART "12"
  320. #define BYTE "16"
  321. #define BYTEEND "20"
  322. #ifndef BRANCHLESS_CABAC_DECODER
  323. asm volatile(
  324. "movzbl (%1), %0 \n\t"
  325. "movl "RANGE "(%2), %%ebx \n\t"
  326. "movl "RANGE "(%2), %%edx \n\t"
  327. "andl $0xC0, %%ebx \n\t"
  328. "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%ebx, 2), %%esi\n\t"
  329. "movl "LOW "(%2), %%ebx \n\t"
  330. //eax:state ebx:low, edx:range, esi:RangeLPS
  331. "subl %%esi, %%edx \n\t"
  332. "movl %%edx, %%ecx \n\t"
  333. "shll $17, %%ecx \n\t"
  334. "cmpl %%ecx, %%ebx \n\t"
  335. " ja 1f \n\t"
  336. #if 1
  337. //athlon:4067 P3:4110
  338. "lea -0x100(%%edx), %%ecx \n\t"
  339. "shr $31, %%ecx \n\t"
  340. "shl %%cl, %%edx \n\t"
  341. "shl %%cl, %%ebx \n\t"
  342. #else
  343. //athlon:4057 P3:4130
  344. "cmp $0x100, %%edx \n\t" //FIXME avoidable
  345. "setb %%cl \n\t"
  346. "shl %%cl, %%edx \n\t"
  347. "shl %%cl, %%ebx \n\t"
  348. #endif
  349. "movzbl "MANGLE(ff_h264_mps_state)"(%0), %%ecx \n\t"
  350. "movb %%cl, (%1) \n\t"
  351. //eax:state ebx:low, edx:range, esi:RangeLPS
  352. "test %%bx, %%bx \n\t"
  353. " jnz 2f \n\t"
  354. "movl "BYTE "(%2), %%esi \n\t"
  355. "subl $0xFFFF, %%ebx \n\t"
  356. "movzwl (%%esi), %%ecx \n\t"
  357. "bswap %%ecx \n\t"
  358. "shrl $15, %%ecx \n\t"
  359. "addl $2, %%esi \n\t"
  360. "addl %%ecx, %%ebx \n\t"
  361. "movl %%esi, "BYTE "(%2) \n\t"
  362. "jmp 2f \n\t"
  363. "1: \n\t"
  364. //eax:state ebx:low, edx:range, esi:RangeLPS
  365. "subl %%ecx, %%ebx \n\t"
  366. "movl %%esi, %%edx \n\t"
  367. "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
  368. "shll %%cl, %%ebx \n\t"
  369. "shll %%cl, %%edx \n\t"
  370. "movzbl "MANGLE(ff_h264_lps_state)"(%0), %%ecx \n\t"
  371. "movb %%cl, (%1) \n\t"
  372. "addl $1, %0 \n\t"
  373. "test %%bx, %%bx \n\t"
  374. " jnz 2f \n\t"
  375. "movl "BYTE "(%2), %%ecx \n\t"
  376. "movzwl (%%ecx), %%esi \n\t"
  377. "bswap %%esi \n\t"
  378. "shrl $15, %%esi \n\t"
  379. "subl $0xFFFF, %%esi \n\t"
  380. "addl $2, %%ecx \n\t"
  381. "movl %%ecx, "BYTE "(%2) \n\t"
  382. "leal -1(%%ebx), %%ecx \n\t"
  383. "xorl %%ebx, %%ecx \n\t"
  384. "shrl $15, %%ecx \n\t"
  385. "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
  386. "neg %%ecx \n\t"
  387. "add $7, %%ecx \n\t"
  388. "shll %%cl , %%esi \n\t"
  389. "addl %%esi, %%ebx \n\t"
  390. "2: \n\t"
  391. "movl %%edx, "RANGE "(%2) \n\t"
  392. "movl %%ebx, "LOW "(%2) \n\t"
  393. :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used
  394. :"r"(state), "r"(c)
  395. : "%ecx", "%ebx", "%edx", "%esi", "memory"
  396. );
  397. bit&=1;
  398. #else /* BRANCHLESS_CABAC_DECODER */
  399. asm volatile(
  400. "movzbl (%1), %0 \n\t"
  401. "movl "RANGE "(%2), %%ebx \n\t"
  402. "movl "RANGE "(%2), %%edx \n\t"
  403. "andl $0xC0, %%ebx \n\t"
  404. "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%ebx, 2), %%esi\n\t"
  405. "movl "LOW "(%2), %%ebx \n\t"
  406. //eax:state ebx:low, edx:range, esi:RangeLPS
  407. "subl %%esi, %%edx \n\t"
  408. #if (defined CMOV_IS_FAST && __CPU__ >= 686)
  409. "movl %%edx, %%ecx \n\t"
  410. "shl $17, %%edx \n\t"
  411. "cmpl %%ebx, %%edx \n\t"
  412. "cmova %%ecx, %%esi \n\t"
  413. "sbbl %%ecx, %%ecx \n\t"
  414. "andl %%ecx, %%edx \n\t"
  415. "subl %%edx, %%ebx \n\t"
  416. "xorl %%ecx, %0 \n\t"
  417. #else /* CMOV_IS_FAST */
  418. "movl %%edx, %%ecx \n\t"
  419. "shl $17, %%edx \n\t"
  420. "subl %%ebx, %%edx \n\t"
  421. "sarl $31, %%edx \n\t" //lps_mask
  422. "subl %%ecx, %%esi \n\t" //RangeLPS - range
  423. "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask
  424. "addl %%ecx, %%esi \n\t" //new range
  425. "shl $17, %%ecx \n\t"
  426. "andl %%edx, %%ecx \n\t"
  427. "subl %%ecx, %%ebx \n\t"
  428. "xorl %%edx, %0 \n\t"
  429. #endif /* CMOV_IS_FAST */
  430. //eax:state ebx:low edx:mask esi:range
  431. //eax:bit ebx:low esi:range
  432. "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
  433. "shll %%cl, %%esi \n\t"
  434. "movzbl "MANGLE(ff_h264_mlps_state)"+128(%0), %%edx \n\t"
  435. "movb %%dl, (%1) \n\t"
  436. "movl %%esi, "RANGE "(%2) \n\t"
  437. "shll %%cl, %%ebx \n\t"
  438. "movl %%ebx, "LOW "(%2) \n\t"
  439. "test %%bx, %%bx \n\t"
  440. " jnz 1f \n\t"
  441. "movl "BYTE "(%2), %%ecx \n\t"
  442. "movzwl (%%ecx), %%esi \n\t"
  443. "bswap %%esi \n\t"
  444. "shrl $15, %%esi \n\t"
  445. "subl $0xFFFF, %%esi \n\t"
  446. "addl $2, %%ecx \n\t"
  447. "movl %%ecx, "BYTE "(%2) \n\t"
  448. "leal -1(%%ebx), %%ecx \n\t"
  449. "xorl %%ebx, %%ecx \n\t"
  450. "shrl $15, %%ecx \n\t"
  451. "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
  452. "neg %%ecx \n\t"
  453. "add $7, %%ecx \n\t"
  454. "shll %%cl , %%esi \n\t"
  455. "addl %%esi, %%ebx \n\t"
  456. "movl %%ebx, "LOW "(%2) \n\t"
  457. "1: \n\t"
  458. :"=&a"(bit)
  459. :"r"(state), "r"(c)
  460. : "%ecx", "%ebx", "%edx", "%esi", "memory"
  461. );
  462. bit&=1;
  463. #endif /* BRANCHLESS_CABAC_DECODER */
  464. #else /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
  465. int s = *state;
  466. int RangeLPS= ff_h264_lps_range[0][2*(c->range&0xC0) + s];
  467. int bit, lps_mask attribute_unused;
  468. c->range -= RangeLPS;
  469. #ifndef BRANCHLESS_CABAC_DECODER
  470. if(c->low < (c->range<<17)){
  471. bit= s&1;
  472. *state= ff_h264_mps_state[s];
  473. renorm_cabac_decoder_once(c);
  474. }else{
  475. bit= ff_h264_norm_shift[RangeLPS];
  476. c->low -= (c->range<<17);
  477. *state= ff_h264_lps_state[s];
  478. c->range = RangeLPS<<bit;
  479. c->low <<= bit;
  480. bit= (s&1)^1;
  481. if(!(c->low & 0xFFFF)){
  482. refill2(c);
  483. }
  484. }
  485. #else /* BRANCHLESS_CABAC_DECODER */
  486. lps_mask= ((c->range<<17) - c->low)>>31;
  487. c->low -= (c->range<<17) & lps_mask;
  488. c->range += (RangeLPS - c->range) & lps_mask;
  489. s^=lps_mask;
  490. *state= (ff_h264_mlps_state+128)[s];
  491. bit= s&1;
  492. lps_mask= ff_h264_norm_shift[c->range];
  493. c->range<<= lps_mask;
  494. c->low <<= lps_mask;
  495. if(!(c->low & CABAC_MASK))
  496. refill2(c);
  497. #endif /* BRANCHLESS_CABAC_DECODER */
  498. #endif /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
  499. return bit;
  500. }
  501. static int __attribute((noinline)) get_cabac_noinline(CABACContext *c, uint8_t * const state){
  502. return get_cabac_inline(c,state);
  503. }
  504. static int get_cabac(CABACContext *c, uint8_t * const state){
  505. return get_cabac_inline(c,state);
  506. }
  507. static int get_cabac_bypass(CABACContext *c){
  508. int range;
  509. c->low += c->low;
  510. if(!(c->low & CABAC_MASK))
  511. refill(c);
  512. range= c->range<<17;
  513. if(c->low < range){
  514. return 0;
  515. }else{
  516. c->low -= range;
  517. return 1;
  518. }
  519. }
  520. /**
  521. *
  522. * @return the number of bytes read or 0 if no end
  523. */
  524. static int get_cabac_terminate(CABACContext *c){
  525. c->range -= 2;
  526. if(c->low < c->range<<17){
  527. renorm_cabac_decoder_once(c);
  528. return 0;
  529. }else{
  530. return c->bytestream - c->bytestream_start;
  531. }
  532. }
  533. /**
  534. * get (truncated) unnary binarization.
  535. */
  536. static int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){
  537. int i;
  538. for(i=0; i<max; i++){
  539. if(get_cabac(c, state)==0)
  540. return i;
  541. if(i< max_index) state++;
  542. }
  543. return truncated ? max : -1;
  544. }
  545. /**
  546. * get unary exp golomb k-th order binarization.
  547. */
  548. static int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){
  549. int i, v;
  550. int m= 1<<k;
  551. if(get_cabac(c, state)==0)
  552. return 0;
  553. if(0 < max_index) state++;
  554. for(i=1; i<max; i++){
  555. if(get_cabac(c, state)==0){
  556. if(is_signed && get_cabac_bypass(c)){
  557. return -i;
  558. }else
  559. return i;
  560. }
  561. if(i < max_index) state++;
  562. }
  563. while(get_cabac_bypass(c)){
  564. i+= m;
  565. m+= m;
  566. }
  567. v=0;
  568. while(m>>=1){
  569. v+= v + get_cabac_bypass(c);
  570. }
  571. i += v;
  572. if(is_signed && get_cabac_bypass(c)){
  573. return -i;
  574. }else
  575. return i;
  576. }