You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

658 lines
19KB

  1. /*
  2. * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  3. * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. */
  22. /**
  23. * @file cabac.h
  24. * Context Adaptive Binary Arithmetic Coder.
  25. */
  26. //#undef NDEBUG
  27. #include <assert.h>
  28. #define CABAC_BITS 16
  29. #define CABAC_MASK ((1<<CABAC_BITS)-1)
  30. #define BRANCHLESS_CABAC_DECODER 1
  31. #define CMOV_IS_FAST 1
  32. typedef struct CABACContext{
  33. int low;
  34. int range;
  35. int outstanding_count;
  36. #ifdef STRICT_LIMITS
  37. int symCount;
  38. #endif
  39. const uint8_t *bytestream_start;
  40. const uint8_t *bytestream;
  41. const uint8_t *bytestream_end;
  42. PutBitContext pb;
  43. }CABACContext;
  44. extern uint8_t ff_h264_mlps_state[4*64];
  45. extern uint8_t ff_h264_lps_range[2*65][4]; ///< rangeTabLPS
  46. extern uint8_t ff_h264_mps_state[2*64]; ///< transIdxMPS
  47. extern uint8_t ff_h264_lps_state[2*64]; ///< transIdxLPS
  48. extern const uint8_t ff_h264_norm_shift[128];
  49. void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
  50. void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
  51. void ff_init_cabac_states(CABACContext *c);
  52. static inline void put_cabac_bit(CABACContext *c, int b){
  53. put_bits(&c->pb, 1, b);
  54. for(;c->outstanding_count; c->outstanding_count--){
  55. put_bits(&c->pb, 1, 1-b);
  56. }
  57. }
  58. static inline void renorm_cabac_encoder(CABACContext *c){
  59. while(c->range < 0x100){
  60. //FIXME optimize
  61. if(c->low<0x100){
  62. put_cabac_bit(c, 0);
  63. }else if(c->low<0x200){
  64. c->outstanding_count++;
  65. c->low -= 0x100;
  66. }else{
  67. put_cabac_bit(c, 1);
  68. c->low -= 0x200;
  69. }
  70. c->range+= c->range;
  71. c->low += c->low;
  72. }
  73. }
  74. static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
  75. int RangeLPS= ff_h264_lps_range[*state][c->range>>6];
  76. if(bit == ((*state)&1)){
  77. c->range -= RangeLPS;
  78. *state= ff_h264_mps_state[*state];
  79. }else{
  80. c->low += c->range - RangeLPS;
  81. c->range = RangeLPS;
  82. *state= ff_h264_lps_state[*state];
  83. }
  84. renorm_cabac_encoder(c);
  85. #ifdef STRICT_LIMITS
  86. c->symCount++;
  87. #endif
  88. }
  89. static void put_cabac_static(CABACContext *c, int RangeLPS, int bit){
  90. assert(c->range > RangeLPS);
  91. if(!bit){
  92. c->range -= RangeLPS;
  93. }else{
  94. c->low += c->range - RangeLPS;
  95. c->range = RangeLPS;
  96. }
  97. renorm_cabac_encoder(c);
  98. #ifdef STRICT_LIMITS
  99. c->symCount++;
  100. #endif
  101. }
  102. /**
  103. * @param bit 0 -> write zero bit, !=0 write one bit
  104. */
  105. static void put_cabac_bypass(CABACContext *c, int bit){
  106. c->low += c->low;
  107. if(bit){
  108. c->low += c->range;
  109. }
  110. //FIXME optimize
  111. if(c->low<0x200){
  112. put_cabac_bit(c, 0);
  113. }else if(c->low<0x400){
  114. c->outstanding_count++;
  115. c->low -= 0x200;
  116. }else{
  117. put_cabac_bit(c, 1);
  118. c->low -= 0x400;
  119. }
  120. #ifdef STRICT_LIMITS
  121. c->symCount++;
  122. #endif
  123. }
  124. /**
  125. *
  126. * @return the number of bytes written
  127. */
  128. static int put_cabac_terminate(CABACContext *c, int bit){
  129. c->range -= 2;
  130. if(!bit){
  131. renorm_cabac_encoder(c);
  132. }else{
  133. c->low += c->range;
  134. c->range= 2;
  135. renorm_cabac_encoder(c);
  136. assert(c->low <= 0x1FF);
  137. put_cabac_bit(c, c->low>>9);
  138. put_bits(&c->pb, 2, ((c->low>>7)&3)|1);
  139. flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
  140. }
  141. #ifdef STRICT_LIMITS
  142. c->symCount++;
  143. #endif
  144. return (put_bits_count(&c->pb)+7)>>3;
  145. }
  146. /**
  147. * put (truncated) unary binarization.
  148. */
  149. static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){
  150. int i;
  151. assert(v <= max);
  152. #if 1
  153. for(i=0; i<v; i++){
  154. put_cabac(c, state, 1);
  155. if(i < max_index) state++;
  156. }
  157. if(truncated==0 || v<max)
  158. put_cabac(c, state, 0);
  159. #else
  160. if(v <= max_index){
  161. for(i=0; i<v; i++){
  162. put_cabac(c, state+i, 1);
  163. }
  164. if(truncated==0 || v<max)
  165. put_cabac(c, state+i, 0);
  166. }else{
  167. for(i=0; i<=max_index; i++){
  168. put_cabac(c, state+i, 1);
  169. }
  170. for(; i<v; i++){
  171. put_cabac(c, state+max_index, 1);
  172. }
  173. if(truncated==0 || v<max)
  174. put_cabac(c, state+max_index, 0);
  175. }
  176. #endif
  177. }
  178. /**
  179. * put unary exp golomb k-th order binarization.
  180. */
  181. static void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){
  182. int i;
  183. if(v==0)
  184. put_cabac(c, state, 0);
  185. else{
  186. const int sign= v < 0;
  187. if(is_signed) v= FFABS(v);
  188. if(v<max){
  189. for(i=0; i<v; i++){
  190. put_cabac(c, state, 1);
  191. if(i < max_index) state++;
  192. }
  193. put_cabac(c, state, 0);
  194. }else{
  195. int m= 1<<k;
  196. for(i=0; i<max; i++){
  197. put_cabac(c, state, 1);
  198. if(i < max_index) state++;
  199. }
  200. v -= max;
  201. while(v >= m){ //FIXME optimize
  202. put_cabac_bypass(c, 1);
  203. v-= m;
  204. m+= m;
  205. }
  206. put_cabac_bypass(c, 0);
  207. while(m>>=1){
  208. put_cabac_bypass(c, v&m);
  209. }
  210. }
  211. if(is_signed)
  212. put_cabac_bypass(c, sign);
  213. }
  214. }
  215. static void refill(CABACContext *c){
  216. #if CABAC_BITS == 16
  217. c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
  218. #else
  219. c->low+= c->bytestream[0]<<1;
  220. #endif
  221. c->low -= CABAC_MASK;
  222. c->bytestream+= CABAC_BITS/8;
  223. }
  224. static void refill2(CABACContext *c){
  225. int i, x;
  226. x= c->low ^ (c->low-1);
  227. i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS+1)];
  228. x= -CABAC_MASK;
  229. #if CABAC_BITS == 16
  230. x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
  231. #else
  232. x+= c->bytestream[0]<<1;
  233. #endif
  234. c->low += x<<i;
  235. c->bytestream+= CABAC_BITS/8;
  236. }
  237. static inline void renorm_cabac_decoder(CABACContext *c){
  238. while(c->range < (0x200 << CABAC_BITS)){
  239. c->range+= c->range;
  240. c->low+= c->low;
  241. if(!(c->low & CABAC_MASK))
  242. refill(c);
  243. }
  244. }
  245. static inline void renorm_cabac_decoder_once(CABACContext *c){
  246. #ifdef ARCH_X86_DISABLED
  247. int temp;
  248. #if 0
  249. //P3:683 athlon:475
  250. asm(
  251. "lea -0x2000000(%0), %2 \n\t"
  252. "shr $31, %2 \n\t" //FIXME 31->63 for x86-64
  253. "shl %%cl, %0 \n\t"
  254. "shl %%cl, %1 \n\t"
  255. : "+r"(c->range), "+r"(c->low), "+c"(temp)
  256. );
  257. #elif 0
  258. //P3:680 athlon:474
  259. asm(
  260. "cmp $0x2000000, %0 \n\t"
  261. "setb %%cl \n\t" //FIXME 31->63 for x86-64
  262. "shl %%cl, %0 \n\t"
  263. "shl %%cl, %1 \n\t"
  264. : "+r"(c->range), "+r"(c->low), "+c"(temp)
  265. );
  266. #elif 1
  267. int temp2;
  268. //P3:665 athlon:517
  269. asm(
  270. "lea -0x2000000(%0), %%eax \n\t"
  271. "cdq \n\t"
  272. "mov %0, %%eax \n\t"
  273. "and %%edx, %0 \n\t"
  274. "and %1, %%edx \n\t"
  275. "add %%eax, %0 \n\t"
  276. "add %%edx, %1 \n\t"
  277. : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
  278. );
  279. #elif 0
  280. int temp2;
  281. //P3:673 athlon:509
  282. asm(
  283. "cmp $0x2000000, %0 \n\t"
  284. "sbb %%edx, %%edx \n\t"
  285. "mov %0, %%eax \n\t"
  286. "and %%edx, %0 \n\t"
  287. "and %1, %%edx \n\t"
  288. "add %%eax, %0 \n\t"
  289. "add %%edx, %1 \n\t"
  290. : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
  291. );
  292. #else
  293. int temp2;
  294. //P3:677 athlon:511
  295. asm(
  296. "cmp $0x2000000, %0 \n\t"
  297. "lea (%0, %0), %%eax \n\t"
  298. "lea (%1, %1), %%edx \n\t"
  299. "cmovb %%eax, %0 \n\t"
  300. "cmovb %%edx, %1 \n\t"
  301. : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
  302. );
  303. #endif
  304. #else
  305. //P3:675 athlon:476
  306. int shift= (uint32_t)(c->range - (0x200 << CABAC_BITS))>>31;
  307. c->range<<= shift;
  308. c->low <<= shift;
  309. #endif
  310. if(!(c->low & CABAC_MASK))
  311. refill(c);
  312. }
  313. static int always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){
  314. //FIXME gcc generates duplicate load/stores for c->low and c->range
  315. #ifdef ARCH_X86
  316. int bit;
  317. #define LOW "0"
  318. #define RANGE "4"
  319. #define BYTESTART "12"
  320. #define BYTE "16"
  321. #define BYTEEND "20"
  322. #ifndef BRANCHLESS_CABAC_DECODER
  323. asm volatile(
  324. "movzbl (%1), %%eax \n\t"
  325. "movl "RANGE "(%2), %%ebx \n\t"
  326. "movl "RANGE "(%2), %%edx \n\t"
  327. "shrl $23, %%ebx \n\t"
  328. "movzbl "MANGLE(ff_h264_lps_range)"(%%ebx, %%eax, 4), %%esi\n\t"
  329. "shll $17, %%esi \n\t"
  330. "movl "LOW "(%2), %%ebx \n\t"
  331. //eax:state ebx:low, edx:range, esi:RangeLPS
  332. "subl %%esi, %%edx \n\t"
  333. "cmpl %%edx, %%ebx \n\t"
  334. " ja 1f \n\t"
  335. #if 1
  336. //athlon:4067 P3:4110
  337. "lea -0x2000000(%%edx), %%ecx \n\t"
  338. "shr $31, %%ecx \n\t"
  339. "shl %%cl, %%edx \n\t"
  340. "shl %%cl, %%ebx \n\t"
  341. #else
  342. //athlon:4057 P3:4130
  343. "cmp $0x2000000, %%edx \n\t" //FIXME avoidable
  344. "setb %%cl \n\t"
  345. "shl %%cl, %%edx \n\t"
  346. "shl %%cl, %%ebx \n\t"
  347. #endif
  348. "movzbl "MANGLE(ff_h264_mps_state)"(%%eax), %%ecx \n\t"
  349. "movb %%cl, (%1) \n\t"
  350. //eax:state ebx:low, edx:range, esi:RangeLPS
  351. "test %%bx, %%bx \n\t"
  352. " jnz 2f \n\t"
  353. "movl "BYTE "(%2), %%esi \n\t"
  354. "subl $0xFFFF, %%ebx \n\t"
  355. "movzwl (%%esi), %%ecx \n\t"
  356. "bswap %%ecx \n\t"
  357. "shrl $15, %%ecx \n\t"
  358. "addl $2, %%esi \n\t"
  359. "addl %%ecx, %%ebx \n\t"
  360. "movl %%esi, "BYTE "(%2) \n\t"
  361. "jmp 2f \n\t"
  362. "1: \n\t"
  363. //eax:state ebx:low, edx:range, esi:RangeLPS
  364. "subl %%edx, %%ebx \n\t"
  365. "movl %%esi, %%edx \n\t"
  366. "shr $19, %%esi \n\t"
  367. "movzbl "MANGLE(ff_h264_lps_state)"(%%eax), %%ecx \n\t"
  368. "movb %%cl, (%1) \n\t"
  369. "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
  370. "shll %%cl, %%ebx \n\t"
  371. "shll %%cl, %%edx \n\t"
  372. "addl $1, %%eax \n\t"
  373. "test %%bx, %%bx \n\t"
  374. " jnz 2f \n\t"
  375. "movl "BYTE "(%2), %%ecx \n\t"
  376. "movzwl (%%ecx), %%esi \n\t"
  377. "bswap %%esi \n\t"
  378. "shrl $15, %%esi \n\t"
  379. "subl $0xFFFF, %%esi \n\t"
  380. "addl $2, %%ecx \n\t"
  381. "movl %%ecx, "BYTE "(%2) \n\t"
  382. "leal -1(%%ebx), %%ecx \n\t"
  383. "xorl %%ebx, %%ecx \n\t"
  384. "shrl $17, %%ecx \n\t"
  385. "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
  386. "neg %%ecx \n\t"
  387. "add $7, %%ecx \n\t"
  388. "shll %%cl , %%esi \n\t"
  389. "addl %%esi, %%ebx \n\t"
  390. "2: \n\t"
  391. "movl %%edx, "RANGE "(%2) \n\t"
  392. "movl %%ebx, "LOW "(%2) \n\t"
  393. :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used
  394. :"r"(state), "r"(c)
  395. : "%ecx", "%ebx", "%edx", "%esi", "memory"
  396. );
  397. bit&=1;
  398. #else /* BRANCHLESS_CABAC_DECODER */
  399. asm volatile(
  400. "movzbl (%1), %%eax \n\t"
  401. "movl "RANGE "(%2), %%ebx \n\t"
  402. "movl "RANGE "(%2), %%edx \n\t"
  403. "shrl $23, %%ebx \n\t"
  404. "movzbl "MANGLE(ff_h264_lps_range)"(%%ebx, %%eax, 4), %%esi\n\t"
  405. "shll $17, %%esi \n\t"
  406. "movl "LOW "(%2), %%ebx \n\t"
  407. //eax:state ebx:low, edx:range, esi:RangeLPS
  408. "subl %%esi, %%edx \n\t"
  409. #ifdef CMOV_IS_FAST //FIXME actually define this somewhere
  410. "cmpl %%ebx, %%edx \n\t"
  411. "cmova %%edx, %%esi \n\t"
  412. "sbbl %%ecx, %%ecx \n\t"
  413. "andl %%ecx, %%edx \n\t"
  414. "subl %%edx, %%ebx \n\t"
  415. "xorl %%ecx, %%eax \n\t"
  416. #else /* CMOV_IS_FAST */
  417. "movl %%edx, %%ecx \n\t"
  418. "subl %%ebx, %%edx \n\t"
  419. "sarl $31, %%edx \n\t" //lps_mask
  420. "subl %%ecx, %%esi \n\t" //RangeLPS - range
  421. "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask
  422. "addl %%ecx, %%esi \n\t" //new range
  423. "andl %%edx, %%ecx \n\t"
  424. "subl %%ecx, %%ebx \n\t"
  425. "xorl %%edx, %%eax \n\t"
  426. #endif /* CMOV_IS_FAST */
  427. //eax:state ebx:low edx:mask esi:range
  428. "movzbl "MANGLE(ff_h264_mlps_state)"+128(%%eax), %%ecx \n\t"
  429. "movb %%cl, (%1) \n\t"
  430. "movl %%esi, %%edx \n\t"
  431. //eax:bit ebx:low edx:range esi:range
  432. "shr $19, %%esi \n\t"
  433. "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
  434. "shll %%cl, %%edx \n\t"
  435. "movl %%edx, "RANGE "(%2) \n\t"
  436. "shll %%cl, %%ebx \n\t"
  437. "movl %%ebx, "LOW "(%2) \n\t"
  438. "test %%bx, %%bx \n\t"
  439. " jnz 1f \n\t"
  440. "movl "BYTE "(%2), %%ecx \n\t"
  441. "movzwl (%%ecx), %%esi \n\t"
  442. "bswap %%esi \n\t"
  443. "shrl $15, %%esi \n\t"
  444. "subl $0xFFFF, %%esi \n\t"
  445. "addl $2, %%ecx \n\t"
  446. "movl %%ecx, "BYTE "(%2) \n\t"
  447. "leal -1(%%ebx), %%ecx \n\t"
  448. "xorl %%ebx, %%ecx \n\t"
  449. "shrl $17, %%ecx \n\t"
  450. "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
  451. "neg %%ecx \n\t"
  452. "add $7, %%ecx \n\t"
  453. "shll %%cl , %%esi \n\t"
  454. "addl %%esi, %%ebx \n\t"
  455. "movl %%ebx, "LOW "(%2) \n\t"
  456. "1: \n\t"
  457. :"=&a"(bit)
  458. :"r"(state), "r"(c)
  459. : "%ecx", "%ebx", "%edx", "%esi", "memory"
  460. );
  461. bit&=1;
  462. #endif /* BRANCHLESS_CABAC_DECODER */
  463. #else /* ARCH_X86 */
  464. int s = *state;
  465. int RangeLPS= ff_h264_lps_range[s][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1);
  466. int bit, lps_mask attribute_unused;
  467. c->range -= RangeLPS;
  468. #ifndef BRANCHLESS_CABAC_DECODER
  469. if(c->low < c->range){
  470. bit= s&1;
  471. *state= ff_h264_mps_state[s];
  472. renorm_cabac_decoder_once(c);
  473. }else{
  474. bit= ff_h264_norm_shift[RangeLPS>>19];
  475. c->low -= c->range;
  476. *state= ff_h264_lps_state[s];
  477. c->range = RangeLPS<<bit;
  478. c->low <<= bit;
  479. bit= (s&1)^1;
  480. if(!(c->low & 0xFFFF)){
  481. refill2(c);
  482. }
  483. }
  484. #else /* BRANCHLESS_CABAC_DECODER */
  485. lps_mask= (c->range - c->low)>>31;
  486. c->low -= c->range & lps_mask;
  487. c->range += (RangeLPS - c->range) & lps_mask;
  488. s^=lps_mask;
  489. *state= (ff_h264_mlps_state+128)[s];
  490. bit= s&1;
  491. lps_mask= ff_h264_norm_shift[c->range>>(CABAC_BITS+3)];
  492. c->range<<= lps_mask;
  493. c->low <<= lps_mask;
  494. if(!(c->low & CABAC_MASK))
  495. refill2(c);
  496. #endif /* BRANCHLESS_CABAC_DECODER */
  497. #endif /* ARCH_X86 */
  498. return bit;
  499. }
  500. static int __attribute((noinline)) get_cabac_noinline(CABACContext *c, uint8_t * const state){
  501. return get_cabac_inline(c,state);
  502. }
  503. static int get_cabac(CABACContext *c, uint8_t * const state){
  504. return get_cabac_inline(c,state);
  505. }
  506. static int get_cabac_bypass(CABACContext *c){
  507. c->low += c->low;
  508. if(!(c->low & CABAC_MASK))
  509. refill(c);
  510. if(c->low < c->range){
  511. return 0;
  512. }else{
  513. c->low -= c->range;
  514. return 1;
  515. }
  516. }
  517. /**
  518. *
  519. * @return the number of bytes read or 0 if no end
  520. */
  521. static int get_cabac_terminate(CABACContext *c){
  522. c->range -= 4<<CABAC_BITS;
  523. if(c->low < c->range){
  524. renorm_cabac_decoder_once(c);
  525. return 0;
  526. }else{
  527. return c->bytestream - c->bytestream_start;
  528. }
  529. }
  530. /**
  531. * get (truncated) unnary binarization.
  532. */
  533. static int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){
  534. int i;
  535. for(i=0; i<max; i++){
  536. if(get_cabac(c, state)==0)
  537. return i;
  538. if(i< max_index) state++;
  539. }
  540. return truncated ? max : -1;
  541. }
  542. /**
  543. * get unary exp golomb k-th order binarization.
  544. */
  545. static int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){
  546. int i, v;
  547. int m= 1<<k;
  548. if(get_cabac(c, state)==0)
  549. return 0;
  550. if(0 < max_index) state++;
  551. for(i=1; i<max; i++){
  552. if(get_cabac(c, state)==0){
  553. if(is_signed && get_cabac_bypass(c)){
  554. return -i;
  555. }else
  556. return i;
  557. }
  558. if(i < max_index) state++;
  559. }
  560. while(get_cabac_bypass(c)){
  561. i+= m;
  562. m+= m;
  563. }
  564. v=0;
  565. while(m>>=1){
  566. v+= v + get_cabac_bypass(c);
  567. }
  568. i += v;
  569. if(is_signed && get_cabac_bypass(c)){
  570. return -i;
  571. }else
  572. return i;
  573. }