You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

646 lines
18KB

  1. /*
  2. * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  3. * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. */
  22. /**
  23. * @file cabac.h
  24. * Context Adaptive Binary Arithmetic Coder.
  25. */
  26. //#undef NDEBUG
  27. #include <assert.h>
  28. #define CABAC_BITS 16
  29. #define CABAC_MASK ((1<<CABAC_BITS)-1)
  30. #define BRANCHLESS_CABAC_DECODER 1
  31. typedef struct CABACContext{
  32. int low;
  33. int range;
  34. int outstanding_count;
  35. #ifdef STRICT_LIMITS
  36. int symCount;
  37. #endif
  38. uint8_t lps_range[2*65][4]; ///< rangeTabLPS
  39. uint8_t lps_state[2*64]; ///< transIdxLPS
  40. uint8_t mps_state[2*64]; ///< transIdxMPS
  41. const uint8_t *bytestream_start;
  42. const uint8_t *bytestream;
  43. const uint8_t *bytestream_end;
  44. PutBitContext pb;
  45. }CABACContext;
  46. extern const uint8_t ff_h264_lps_range[64][4];
  47. extern const uint8_t ff_h264_mps_state[64];
  48. extern const uint8_t ff_h264_lps_state[64];
  49. extern const uint8_t ff_h264_norm_shift[128];
  50. void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
  51. void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
  52. void ff_init_cabac_states(CABACContext *c, uint8_t const (*lps_range)[4],
  53. uint8_t const *mps_state, uint8_t const *lps_state, int state_count);
  54. static inline void put_cabac_bit(CABACContext *c, int b){
  55. put_bits(&c->pb, 1, b);
  56. for(;c->outstanding_count; c->outstanding_count--){
  57. put_bits(&c->pb, 1, 1-b);
  58. }
  59. }
  60. static inline void renorm_cabac_encoder(CABACContext *c){
  61. while(c->range < 0x100){
  62. //FIXME optimize
  63. if(c->low<0x100){
  64. put_cabac_bit(c, 0);
  65. }else if(c->low<0x200){
  66. c->outstanding_count++;
  67. c->low -= 0x100;
  68. }else{
  69. put_cabac_bit(c, 1);
  70. c->low -= 0x200;
  71. }
  72. c->range+= c->range;
  73. c->low += c->low;
  74. }
  75. }
  76. static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
  77. int RangeLPS= c->lps_range[*state][c->range>>6];
  78. if(bit == ((*state)&1)){
  79. c->range -= RangeLPS;
  80. *state= c->mps_state[*state];
  81. }else{
  82. c->low += c->range - RangeLPS;
  83. c->range = RangeLPS;
  84. *state= c->lps_state[*state];
  85. }
  86. renorm_cabac_encoder(c);
  87. #ifdef STRICT_LIMITS
  88. c->symCount++;
  89. #endif
  90. }
  91. static void put_cabac_static(CABACContext *c, int RangeLPS, int bit){
  92. assert(c->range > RangeLPS);
  93. if(!bit){
  94. c->range -= RangeLPS;
  95. }else{
  96. c->low += c->range - RangeLPS;
  97. c->range = RangeLPS;
  98. }
  99. renorm_cabac_encoder(c);
  100. #ifdef STRICT_LIMITS
  101. c->symCount++;
  102. #endif
  103. }
  104. /**
  105. * @param bit 0 -> write zero bit, !=0 write one bit
  106. */
  107. static void put_cabac_bypass(CABACContext *c, int bit){
  108. c->low += c->low;
  109. if(bit){
  110. c->low += c->range;
  111. }
  112. //FIXME optimize
  113. if(c->low<0x200){
  114. put_cabac_bit(c, 0);
  115. }else if(c->low<0x400){
  116. c->outstanding_count++;
  117. c->low -= 0x200;
  118. }else{
  119. put_cabac_bit(c, 1);
  120. c->low -= 0x400;
  121. }
  122. #ifdef STRICT_LIMITS
  123. c->symCount++;
  124. #endif
  125. }
  126. /**
  127. *
  128. * @return the number of bytes written
  129. */
  130. static int put_cabac_terminate(CABACContext *c, int bit){
  131. c->range -= 2;
  132. if(!bit){
  133. renorm_cabac_encoder(c);
  134. }else{
  135. c->low += c->range;
  136. c->range= 2;
  137. renorm_cabac_encoder(c);
  138. assert(c->low <= 0x1FF);
  139. put_cabac_bit(c, c->low>>9);
  140. put_bits(&c->pb, 2, ((c->low>>7)&3)|1);
  141. flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
  142. }
  143. #ifdef STRICT_LIMITS
  144. c->symCount++;
  145. #endif
  146. return (put_bits_count(&c->pb)+7)>>3;
  147. }
  148. /**
  149. * put (truncated) unary binarization.
  150. */
  151. static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){
  152. int i;
  153. assert(v <= max);
  154. #if 1
  155. for(i=0; i<v; i++){
  156. put_cabac(c, state, 1);
  157. if(i < max_index) state++;
  158. }
  159. if(truncated==0 || v<max)
  160. put_cabac(c, state, 0);
  161. #else
  162. if(v <= max_index){
  163. for(i=0; i<v; i++){
  164. put_cabac(c, state+i, 1);
  165. }
  166. if(truncated==0 || v<max)
  167. put_cabac(c, state+i, 0);
  168. }else{
  169. for(i=0; i<=max_index; i++){
  170. put_cabac(c, state+i, 1);
  171. }
  172. for(; i<v; i++){
  173. put_cabac(c, state+max_index, 1);
  174. }
  175. if(truncated==0 || v<max)
  176. put_cabac(c, state+max_index, 0);
  177. }
  178. #endif
  179. }
  180. /**
  181. * put unary exp golomb k-th order binarization.
  182. */
  183. static void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){
  184. int i;
  185. if(v==0)
  186. put_cabac(c, state, 0);
  187. else{
  188. const int sign= v < 0;
  189. if(is_signed) v= ABS(v);
  190. if(v<max){
  191. for(i=0; i<v; i++){
  192. put_cabac(c, state, 1);
  193. if(i < max_index) state++;
  194. }
  195. put_cabac(c, state, 0);
  196. }else{
  197. int m= 1<<k;
  198. for(i=0; i<max; i++){
  199. put_cabac(c, state, 1);
  200. if(i < max_index) state++;
  201. }
  202. v -= max;
  203. while(v >= m){ //FIXME optimize
  204. put_cabac_bypass(c, 1);
  205. v-= m;
  206. m+= m;
  207. }
  208. put_cabac_bypass(c, 0);
  209. while(m>>=1){
  210. put_cabac_bypass(c, v&m);
  211. }
  212. }
  213. if(is_signed)
  214. put_cabac_bypass(c, sign);
  215. }
  216. }
  217. static void refill(CABACContext *c){
  218. #if CABAC_BITS == 16
  219. c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
  220. #else
  221. c->low+= c->bytestream[0]<<1;
  222. #endif
  223. c->low -= CABAC_MASK;
  224. c->bytestream+= CABAC_BITS/8;
  225. }
  226. static void refill2(CABACContext *c){
  227. int i, x;
  228. x= c->low ^ (c->low-1);
  229. i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS+1)];
  230. x= -CABAC_MASK;
  231. #if CABAC_BITS == 16
  232. x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
  233. #else
  234. x+= c->bytestream[0]<<1;
  235. #endif
  236. c->low += x<<i;
  237. c->bytestream+= CABAC_BITS/8;
  238. }
  239. static inline void renorm_cabac_decoder(CABACContext *c){
  240. while(c->range < (0x200 << CABAC_BITS)){
  241. c->range+= c->range;
  242. c->low+= c->low;
  243. if(!(c->low & CABAC_MASK))
  244. refill(c);
  245. }
  246. }
  247. static inline void renorm_cabac_decoder_once(CABACContext *c){
  248. #ifdef ARCH_X86_DISABLED
  249. int temp;
  250. #if 0
  251. //P3:683 athlon:475
  252. asm(
  253. "lea -0x2000000(%0), %2 \n\t"
  254. "shr $31, %2 \n\t" //FIXME 31->63 for x86-64
  255. "shl %%cl, %0 \n\t"
  256. "shl %%cl, %1 \n\t"
  257. : "+r"(c->range), "+r"(c->low), "+c"(temp)
  258. );
  259. #elif 0
  260. //P3:680 athlon:474
  261. asm(
  262. "cmp $0x2000000, %0 \n\t"
  263. "setb %%cl \n\t" //FIXME 31->63 for x86-64
  264. "shl %%cl, %0 \n\t"
  265. "shl %%cl, %1 \n\t"
  266. : "+r"(c->range), "+r"(c->low), "+c"(temp)
  267. );
  268. #elif 1
  269. int temp2;
  270. //P3:665 athlon:517
  271. asm(
  272. "lea -0x2000000(%0), %%eax \n\t"
  273. "cdq \n\t"
  274. "mov %0, %%eax \n\t"
  275. "and %%edx, %0 \n\t"
  276. "and %1, %%edx \n\t"
  277. "add %%eax, %0 \n\t"
  278. "add %%edx, %1 \n\t"
  279. : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
  280. );
  281. #elif 0
  282. int temp2;
  283. //P3:673 athlon:509
  284. asm(
  285. "cmp $0x2000000, %0 \n\t"
  286. "sbb %%edx, %%edx \n\t"
  287. "mov %0, %%eax \n\t"
  288. "and %%edx, %0 \n\t"
  289. "and %1, %%edx \n\t"
  290. "add %%eax, %0 \n\t"
  291. "add %%edx, %1 \n\t"
  292. : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
  293. );
  294. #else
  295. int temp2;
  296. //P3:677 athlon:511
  297. asm(
  298. "cmp $0x2000000, %0 \n\t"
  299. "lea (%0, %0), %%eax \n\t"
  300. "lea (%1, %1), %%edx \n\t"
  301. "cmovb %%eax, %0 \n\t"
  302. "cmovb %%edx, %1 \n\t"
  303. : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
  304. );
  305. #endif
  306. #else
  307. //P3:675 athlon:476
  308. int shift= (uint32_t)(c->range - (0x200 << CABAC_BITS))>>31;
  309. c->range<<= shift;
  310. c->low <<= shift;
  311. #endif
  312. if(!(c->low & CABAC_MASK))
  313. refill(c);
  314. }
  315. static int get_cabac(CABACContext *c, uint8_t * const state){
  316. //FIXME gcc generates duplicate load/stores for c->low and c->range
  317. #ifdef ARCH_X86
  318. int bit;
  319. #define LOW "0"
  320. #define RANGE "4"
  321. #define LPS_RANGE "12"
  322. #define LPS_STATE "12+2*65*4"
  323. #define MPS_STATE "12+2*65*4+2*64"
  324. #define BYTESTART "12+2*65*4+4*64"
  325. #define BYTE "16+2*65*4+4*64"
  326. #define BYTEEND "20+2*65*4+4*64"
  327. #ifndef BRANCHLESS_CABAC_DECODER
  328. asm volatile(
  329. "movzbl (%1), %%eax \n\t"
  330. "movl "RANGE "(%2), %%ebx \n\t"
  331. "movl "RANGE "(%2), %%edx \n\t"
  332. "shrl $23, %%ebx \n\t"
  333. "leal "LPS_RANGE"(%2, %%eax, 4), %%esi \n\t"
  334. "movzbl (%%ebx, %%esi), %%esi \n\t"
  335. "shll $17, %%esi \n\t"
  336. "movl "LOW "(%2), %%ebx \n\t"
  337. //eax:state ebx:low, edx:range, esi:RangeLPS
  338. "subl %%esi, %%edx \n\t"
  339. "cmpl %%edx, %%ebx \n\t"
  340. " ja 1f \n\t"
  341. "cmp $0x2000000, %%edx \n\t" //FIXME avoidable
  342. "setb %%cl \n\t"
  343. "shl %%cl, %%edx \n\t"
  344. "shl %%cl, %%ebx \n\t"
  345. "movzbl "MPS_STATE"(%2, %%eax), %%ecx \n\t"
  346. "movb %%cl, (%1) \n\t"
  347. //eax:state ebx:low, edx:range, esi:RangeLPS
  348. "test %%bx, %%bx \n\t"
  349. " jnz 2f \n\t"
  350. "movl "BYTE "(%2), %%esi \n\t"
  351. "subl $0xFFFF, %%ebx \n\t"
  352. "movzwl (%%esi), %%ecx \n\t"
  353. "bswap %%ecx \n\t"
  354. "shrl $15, %%ecx \n\t"
  355. "addl $2, %%esi \n\t"
  356. "addl %%ecx, %%ebx \n\t"
  357. "movl %%esi, "BYTE "(%2) \n\t"
  358. "jmp 2f \n\t"
  359. "1: \n\t"
  360. //eax:state ebx:low, edx:range, esi:RangeLPS
  361. "subl %%edx, %%ebx \n\t"
  362. "movl %%esi, %%edx \n\t"
  363. "shr $19, %%esi \n\t"
  364. "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
  365. "shll %%cl, %%ebx \n\t"
  366. "shll %%cl, %%edx \n\t"
  367. "movzbl "LPS_STATE"(%2, %%eax), %%ecx \n\t"
  368. "movb %%cl, (%1) \n\t"
  369. "addl $1, %%eax \n\t"
  370. "test %%bx, %%bx \n\t"
  371. " jnz 2f \n\t"
  372. "movl "BYTE "(%2), %%ecx \n\t"
  373. "movzwl (%%ecx), %%esi \n\t"
  374. "bswap %%esi \n\t"
  375. "shrl $15, %%esi \n\t"
  376. "subl $0xFFFF, %%esi \n\t"
  377. "addl $2, %%ecx \n\t"
  378. "movl %%ecx, "BYTE "(%2) \n\t"
  379. "leal -1(%%ebx), %%ecx \n\t"
  380. "xorl %%ebx, %%ecx \n\t"
  381. "shrl $17, %%ecx \n\t"
  382. "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
  383. "neg %%cl \n\t"
  384. "add $7, %%cl \n\t"
  385. "shll %%cl , %%esi \n\t"
  386. "addl %%esi, %%ebx \n\t"
  387. "2: \n\t"
  388. "movl %%edx, "RANGE "(%2) \n\t"
  389. "movl %%ebx, "LOW "(%2) \n\t"
  390. :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used
  391. :"r"(state), "r"(c)
  392. : "%ecx", "%ebx", "%edx", "%esi"
  393. );
  394. bit&=1;
  395. #else
  396. asm volatile(
  397. "movzbl (%1), %%eax \n\t"
  398. "movl "RANGE "(%2), %%ebx \n\t"
  399. "movl "RANGE "(%2), %%edx \n\t"
  400. "shrl $23, %%ebx \n\t"
  401. "leal "LPS_RANGE"(%2, %%eax, 4), %%esi \n\t"
  402. "movzbl (%%ebx, %%esi), %%esi \n\t"
  403. "shll $17, %%esi \n\t"
  404. "movl "LOW "(%2), %%ebx \n\t"
  405. //eax:state ebx:low, edx:range, esi:RangeLPS
  406. "subl %%esi, %%edx \n\t"
  407. #ifdef CMOV_IS_FAST //FIXME actually define this somewhere
  408. "cmpl %%ebx, %%edx \n\t"
  409. "cmova %%edx, %%esi \n\t"
  410. "sbbl %%ecx, %%ecx \n\t"
  411. "andl %%ecx, %%edx \n\t"
  412. "subl %%edx, %%ebx \n\t"
  413. "xorl %%ecx, %%eax \n\t"
  414. #else
  415. "movl %%edx, %%ecx \n\t"
  416. "subl %%ebx, %%edx \n\t"
  417. "sarl $31, %%edx \n\t" //lps_mask
  418. "subl %%ecx, %%esi \n\t" //RangeLPS - range
  419. "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask
  420. "addl %%ecx, %%esi \n\t" //new range
  421. "andl %%edx, %%ecx \n\t"
  422. "subl %%ecx, %%ebx \n\t"
  423. "xorl %%edx, %%eax \n\t"
  424. #endif
  425. //eax:state ebx:low edx:mask esi:range
  426. "movzbl "MPS_STATE"(%2, %%eax), %%ecx \n\t"
  427. "movb %%cl, (%1) \n\t"
  428. "movl %%esi, %%edx \n\t"
  429. //eax:bit ebx:low edx:range esi:range
  430. "shr $19, %%esi \n\t"
  431. "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
  432. "shll %%cl, %%ebx \n\t"
  433. "shll %%cl, %%edx \n\t"
  434. "test %%bx, %%bx \n\t"
  435. " jnz 1f \n\t"
  436. "movl "BYTE "(%2), %%ecx \n\t"
  437. "movzwl (%%ecx), %%esi \n\t"
  438. "bswap %%esi \n\t"
  439. "shrl $15, %%esi \n\t"
  440. "subl $0xFFFF, %%esi \n\t"
  441. "addl $2, %%ecx \n\t"
  442. "movl %%ecx, "BYTE "(%2) \n\t"
  443. "leal -1(%%ebx), %%ecx \n\t"
  444. "xorl %%ebx, %%ecx \n\t"
  445. "shrl $17, %%ecx \n\t"
  446. "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
  447. "neg %%cl \n\t"
  448. "add $7, %%cl \n\t"
  449. "shll %%cl , %%esi \n\t"
  450. "addl %%esi, %%ebx \n\t"
  451. "1: \n\t"
  452. "movl %%edx, "RANGE "(%2) \n\t"
  453. "movl %%ebx, "LOW "(%2) \n\t"
  454. :"=&a"(bit)
  455. :"r"(state), "r"(c)
  456. : "%ecx", "%ebx", "%edx", "%esi"
  457. );
  458. bit&=1;
  459. #endif
  460. #else
  461. int s = *state;
  462. int RangeLPS= c->lps_range[s][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1);
  463. int bit, lps_mask attribute_unused;
  464. c->range -= RangeLPS;
  465. #ifndef BRANCHLESS_CABAC_DECODER
  466. if(c->low < c->range){
  467. bit= s&1;
  468. *state= c->mps_state[s];
  469. renorm_cabac_decoder_once(c);
  470. }else{
  471. bit= ff_h264_norm_shift[RangeLPS>>19];
  472. c->low -= c->range;
  473. *state= c->lps_state[s];
  474. c->range = RangeLPS<<bit;
  475. c->low <<= bit;
  476. bit= (s&1)^1;
  477. if(!(c->low & 0xFFFF)){
  478. refill2(c);
  479. }
  480. }
  481. #else
  482. lps_mask= (c->range - c->low)>>31;
  483. c->low -= c->range & lps_mask;
  484. c->range += (RangeLPS - c->range) & lps_mask;
  485. s^=lps_mask;
  486. *state= c->mps_state[s];
  487. bit= s&1;
  488. lps_mask= ff_h264_norm_shift[c->range>>(CABAC_BITS+3)];
  489. c->range<<= lps_mask;
  490. c->low <<= lps_mask;
  491. if(!(c->low & CABAC_MASK))
  492. refill2(c);
  493. #endif
  494. #endif
  495. return bit;
  496. }
  497. static int get_cabac_bypass(CABACContext *c){
  498. c->low += c->low;
  499. if(!(c->low & CABAC_MASK))
  500. refill(c);
  501. if(c->low < c->range){
  502. return 0;
  503. }else{
  504. c->low -= c->range;
  505. return 1;
  506. }
  507. }
  508. /**
  509. *
  510. * @return the number of bytes read or 0 if no end
  511. */
  512. static int get_cabac_terminate(CABACContext *c){
  513. c->range -= 4<<CABAC_BITS;
  514. if(c->low < c->range){
  515. renorm_cabac_decoder_once(c);
  516. return 0;
  517. }else{
  518. return c->bytestream - c->bytestream_start;
  519. }
  520. }
  521. /**
  522. * get (truncated) unnary binarization.
  523. */
  524. static int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){
  525. int i;
  526. for(i=0; i<max; i++){
  527. if(get_cabac(c, state)==0)
  528. return i;
  529. if(i< max_index) state++;
  530. }
  531. return truncated ? max : -1;
  532. }
  533. /**
  534. * get unary exp golomb k-th order binarization.
  535. */
  536. static int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){
  537. int i, v;
  538. int m= 1<<k;
  539. if(get_cabac(c, state)==0)
  540. return 0;
  541. if(0 < max_index) state++;
  542. for(i=1; i<max; i++){
  543. if(get_cabac(c, state)==0){
  544. if(is_signed && get_cabac_bypass(c)){
  545. return -i;
  546. }else
  547. return i;
  548. }
  549. if(i < max_index) state++;
  550. }
  551. while(get_cabac_bypass(c)){
  552. i+= m;
  553. m+= m;
  554. }
  555. v=0;
  556. while(m>>=1){
  557. v+= v + get_cabac_bypass(c);
  558. }
  559. i += v;
  560. if(is_signed && get_cabac_bypass(c)){
  561. return -i;
  562. }else
  563. return i;
  564. }