You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1179 lines
43KB

  1. /*
  2. * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
  3. * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
  24. * @author Michael Niedermayer <michaelni@gmx.at>
  25. */
  26. #define CABAC(h) 0
  27. #define UNCHECKED_BITSTREAM_READER 1
  28. #include "internal.h"
  29. #include "avcodec.h"
  30. #include "h264.h"
  31. #include "h264data.h" // FIXME FIXME FIXME
  32. #include "h264_mvpred.h"
  33. #include "golomb.h"
  34. #include "mpegutils.h"
  35. #include "libavutil/avassert.h"
  36. static const uint8_t golomb_to_inter_cbp_gray[16]={
  37. 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
  38. };
  39. static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
  40. 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
  41. };
  42. static const uint8_t chroma_dc_coeff_token_len[4*5]={
  43. 2, 0, 0, 0,
  44. 6, 1, 0, 0,
  45. 6, 6, 3, 0,
  46. 6, 7, 7, 6,
  47. 6, 8, 8, 7,
  48. };
  49. static const uint8_t chroma_dc_coeff_token_bits[4*5]={
  50. 1, 0, 0, 0,
  51. 7, 1, 0, 0,
  52. 4, 6, 1, 0,
  53. 3, 3, 2, 5,
  54. 2, 3, 2, 0,
  55. };
  56. static const uint8_t chroma422_dc_coeff_token_len[4*9]={
  57. 1, 0, 0, 0,
  58. 7, 2, 0, 0,
  59. 7, 7, 3, 0,
  60. 9, 7, 7, 5,
  61. 9, 9, 7, 6,
  62. 10, 10, 9, 7,
  63. 11, 11, 10, 7,
  64. 12, 12, 11, 10,
  65. 13, 12, 12, 11,
  66. };
  67. static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
  68. 1, 0, 0, 0,
  69. 15, 1, 0, 0,
  70. 14, 13, 1, 0,
  71. 7, 12, 11, 1,
  72. 6, 5, 10, 1,
  73. 7, 6, 4, 9,
  74. 7, 6, 5, 8,
  75. 7, 6, 5, 4,
  76. 7, 5, 4, 4,
  77. };
  78. static const uint8_t coeff_token_len[4][4*17]={
  79. {
  80. 1, 0, 0, 0,
  81. 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
  82. 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
  83. 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
  84. 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
  85. },
  86. {
  87. 2, 0, 0, 0,
  88. 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
  89. 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
  90. 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
  91. 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
  92. },
  93. {
  94. 4, 0, 0, 0,
  95. 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
  96. 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
  97. 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
  98. 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
  99. },
  100. {
  101. 6, 0, 0, 0,
  102. 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
  103. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  104. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  105. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  106. }
  107. };
  108. static const uint8_t coeff_token_bits[4][4*17]={
  109. {
  110. 1, 0, 0, 0,
  111. 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
  112. 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
  113. 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
  114. 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
  115. },
  116. {
  117. 3, 0, 0, 0,
  118. 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
  119. 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
  120. 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
  121. 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
  122. },
  123. {
  124. 15, 0, 0, 0,
  125. 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
  126. 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
  127. 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
  128. 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
  129. },
  130. {
  131. 3, 0, 0, 0,
  132. 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
  133. 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
  134. 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
  135. 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
  136. }
  137. };
  138. static const uint8_t total_zeros_len[16][16]= {
  139. {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
  140. {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
  141. {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
  142. {5,3,4,4,3,3,3,4,3,4,5,5,5},
  143. {4,4,4,3,3,3,3,3,4,5,4,5},
  144. {6,5,3,3,3,3,3,3,4,3,6},
  145. {6,5,3,3,3,2,3,4,3,6},
  146. {6,4,5,3,2,2,3,3,6},
  147. {6,6,4,2,2,3,2,5},
  148. {5,5,3,2,2,2,4},
  149. {4,4,3,3,1,3},
  150. {4,4,2,1,3},
  151. {3,3,1,2},
  152. {2,2,1},
  153. {1,1},
  154. };
  155. static const uint8_t total_zeros_bits[16][16]= {
  156. {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
  157. {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
  158. {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
  159. {3,7,5,4,6,5,4,3,3,2,2,1,0},
  160. {5,4,3,7,6,5,4,3,2,1,1,0},
  161. {1,1,7,6,5,4,3,2,1,1,0},
  162. {1,1,5,4,3,3,2,1,1,0},
  163. {1,1,1,3,3,2,2,1,0},
  164. {1,0,1,3,2,1,1,1},
  165. {1,0,1,3,2,1,1},
  166. {0,1,1,2,1,3},
  167. {0,1,1,1,1},
  168. {0,1,1,1},
  169. {0,1,1},
  170. {0,1},
  171. };
  172. static const uint8_t chroma_dc_total_zeros_len[3][4]= {
  173. { 1, 2, 3, 3,},
  174. { 1, 2, 2, 0,},
  175. { 1, 1, 0, 0,},
  176. };
  177. static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
  178. { 1, 1, 1, 0,},
  179. { 1, 1, 0, 0,},
  180. { 1, 0, 0, 0,},
  181. };
  182. static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
  183. { 1, 3, 3, 4, 4, 4, 5, 5 },
  184. { 3, 2, 3, 3, 3, 3, 3 },
  185. { 3, 3, 2, 2, 3, 3 },
  186. { 3, 2, 2, 2, 3 },
  187. { 2, 2, 2, 2 },
  188. { 2, 2, 1 },
  189. { 1, 1 },
  190. };
  191. static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
  192. { 1, 2, 3, 2, 3, 1, 1, 0 },
  193. { 0, 1, 1, 4, 5, 6, 7 },
  194. { 0, 1, 1, 2, 6, 7 },
  195. { 6, 0, 1, 2, 7 },
  196. { 0, 1, 2, 3 },
  197. { 0, 1, 1 },
  198. { 0, 1 },
  199. };
  200. static const uint8_t run_len[7][16]={
  201. {1,1},
  202. {1,2,2},
  203. {2,2,2,2},
  204. {2,2,2,3,3},
  205. {2,2,3,3,3,3},
  206. {2,3,3,3,3,3,3},
  207. {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
  208. };
  209. static const uint8_t run_bits[7][16]={
  210. {1,0},
  211. {1,1,0},
  212. {3,2,1,0},
  213. {3,2,1,1,0},
  214. {3,2,3,2,1,0},
  215. {3,0,1,3,2,5,4},
  216. {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
  217. };
  218. static VLC coeff_token_vlc[4];
  219. static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
  220. static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
  221. static VLC chroma_dc_coeff_token_vlc;
  222. static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
  223. static const int chroma_dc_coeff_token_vlc_table_size = 256;
  224. static VLC chroma422_dc_coeff_token_vlc;
  225. static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
  226. static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
  227. static VLC total_zeros_vlc[15];
  228. static VLC_TYPE total_zeros_vlc_tables[15][512][2];
  229. static const int total_zeros_vlc_tables_size = 512;
  230. static VLC chroma_dc_total_zeros_vlc[3];
  231. static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
  232. static const int chroma_dc_total_zeros_vlc_tables_size = 8;
  233. static VLC chroma422_dc_total_zeros_vlc[7];
  234. static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
  235. static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
  236. static VLC run_vlc[6];
  237. static VLC_TYPE run_vlc_tables[6][8][2];
  238. static const int run_vlc_tables_size = 8;
  239. static VLC run7_vlc;
  240. static VLC_TYPE run7_vlc_table[96][2];
  241. static const int run7_vlc_table_size = 96;
  242. #define LEVEL_TAB_BITS 8
  243. static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
  244. #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
  245. #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
  246. #define COEFF_TOKEN_VLC_BITS 8
  247. #define TOTAL_ZEROS_VLC_BITS 9
  248. #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
  249. #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
  250. #define RUN_VLC_BITS 3
  251. #define RUN7_VLC_BITS 6
  252. /**
  253. * Get the predicted number of non-zero coefficients.
  254. * @param n block index
  255. */
  256. static inline int pred_non_zero_count(H264Context *h, H264SliceContext *sl, int n)
  257. {
  258. const int index8= scan8[n];
  259. const int left = sl->non_zero_count_cache[index8 - 1];
  260. const int top = sl->non_zero_count_cache[index8 - 8];
  261. int i= left + top;
  262. if(i<64) i= (i+1)>>1;
  263. tprintf(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
  264. return i&31;
  265. }
  266. static av_cold void init_cavlc_level_tab(void){
  267. int suffix_length;
  268. unsigned int i;
  269. for(suffix_length=0; suffix_length<7; suffix_length++){
  270. for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
  271. int prefix= LEVEL_TAB_BITS - av_log2(2*i);
  272. if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
  273. int level_code = (prefix << suffix_length) +
  274. (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
  275. int mask = -(level_code&1);
  276. level_code = (((2 + level_code) >> 1) ^ mask) - mask;
  277. cavlc_level_tab[suffix_length][i][0]= level_code;
  278. cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
  279. }else if(prefix + 1 <= LEVEL_TAB_BITS){
  280. cavlc_level_tab[suffix_length][i][0]= prefix+100;
  281. cavlc_level_tab[suffix_length][i][1]= prefix + 1;
  282. }else{
  283. cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
  284. cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
  285. }
  286. }
  287. }
  288. }
  289. av_cold void ff_h264_decode_init_vlc(void){
  290. static int done = 0;
  291. if (!done) {
  292. int i;
  293. int offset;
  294. done = 1;
  295. chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
  296. chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
  297. init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
  298. &chroma_dc_coeff_token_len [0], 1, 1,
  299. &chroma_dc_coeff_token_bits[0], 1, 1,
  300. INIT_VLC_USE_NEW_STATIC);
  301. chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
  302. chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
  303. init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
  304. &chroma422_dc_coeff_token_len [0], 1, 1,
  305. &chroma422_dc_coeff_token_bits[0], 1, 1,
  306. INIT_VLC_USE_NEW_STATIC);
  307. offset = 0;
  308. for(i=0; i<4; i++){
  309. coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
  310. coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
  311. init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
  312. &coeff_token_len [i][0], 1, 1,
  313. &coeff_token_bits[i][0], 1, 1,
  314. INIT_VLC_USE_NEW_STATIC);
  315. offset += coeff_token_vlc_tables_size[i];
  316. }
  317. /*
  318. * This is a one time safety check to make sure that
  319. * the packed static coeff_token_vlc table sizes
  320. * were initialized correctly.
  321. */
  322. av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
  323. for(i=0; i<3; i++){
  324. chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
  325. chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
  326. init_vlc(&chroma_dc_total_zeros_vlc[i],
  327. CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
  328. &chroma_dc_total_zeros_len [i][0], 1, 1,
  329. &chroma_dc_total_zeros_bits[i][0], 1, 1,
  330. INIT_VLC_USE_NEW_STATIC);
  331. }
  332. for(i=0; i<7; i++){
  333. chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
  334. chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
  335. init_vlc(&chroma422_dc_total_zeros_vlc[i],
  336. CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
  337. &chroma422_dc_total_zeros_len [i][0], 1, 1,
  338. &chroma422_dc_total_zeros_bits[i][0], 1, 1,
  339. INIT_VLC_USE_NEW_STATIC);
  340. }
  341. for(i=0; i<15; i++){
  342. total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
  343. total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
  344. init_vlc(&total_zeros_vlc[i],
  345. TOTAL_ZEROS_VLC_BITS, 16,
  346. &total_zeros_len [i][0], 1, 1,
  347. &total_zeros_bits[i][0], 1, 1,
  348. INIT_VLC_USE_NEW_STATIC);
  349. }
  350. for(i=0; i<6; i++){
  351. run_vlc[i].table = run_vlc_tables[i];
  352. run_vlc[i].table_allocated = run_vlc_tables_size;
  353. init_vlc(&run_vlc[i],
  354. RUN_VLC_BITS, 7,
  355. &run_len [i][0], 1, 1,
  356. &run_bits[i][0], 1, 1,
  357. INIT_VLC_USE_NEW_STATIC);
  358. }
  359. run7_vlc.table = run7_vlc_table,
  360. run7_vlc.table_allocated = run7_vlc_table_size;
  361. init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
  362. &run_len [6][0], 1, 1,
  363. &run_bits[6][0], 1, 1,
  364. INIT_VLC_USE_NEW_STATIC);
  365. init_cavlc_level_tab();
  366. }
  367. }
  368. /**
  369. *
  370. */
  371. static inline int get_level_prefix(GetBitContext *gb){
  372. unsigned int buf;
  373. int log;
  374. OPEN_READER(re, gb);
  375. UPDATE_CACHE(re, gb);
  376. buf=GET_CACHE(re, gb);
  377. log= 32 - av_log2(buf);
  378. #ifdef TRACE
  379. print_bin(buf>>(32-log), log);
  380. av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
  381. #endif
  382. LAST_SKIP_BITS(re, gb, log);
  383. CLOSE_READER(re, gb);
  384. return log-1;
  385. }
  386. /**
  387. * Decode a residual block.
  388. * @param n block index
  389. * @param scantable scantable
  390. * @param max_coeff number of coefficients in the block
  391. * @return <0 if an error occurred
  392. */
  393. static int decode_residual(H264Context *h, H264SliceContext *sl,
  394. GetBitContext *gb, int16_t *block, int n,
  395. const uint8_t *scantable, const uint32_t *qmul,
  396. int max_coeff)
  397. {
  398. static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
  399. int level[16];
  400. int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
  401. //FIXME put trailing_onex into the context
  402. if(max_coeff <= 8){
  403. if (max_coeff == 4)
  404. coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
  405. else
  406. coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
  407. total_coeff= coeff_token>>2;
  408. }else{
  409. if(n >= LUMA_DC_BLOCK_INDEX){
  410. total_coeff= pred_non_zero_count(h, sl, (n - LUMA_DC_BLOCK_INDEX)*16);
  411. coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
  412. total_coeff= coeff_token>>2;
  413. }else{
  414. total_coeff= pred_non_zero_count(h, sl, n);
  415. coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
  416. total_coeff= coeff_token>>2;
  417. }
  418. }
  419. sl->non_zero_count_cache[scan8[n]] = total_coeff;
  420. //FIXME set last_non_zero?
  421. if(total_coeff==0)
  422. return 0;
  423. if(total_coeff > (unsigned)max_coeff) {
  424. av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", h->mb_x, h->mb_y, total_coeff);
  425. return -1;
  426. }
  427. trailing_ones= coeff_token&3;
  428. tprintf(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
  429. av_assert2(total_coeff<=16);
  430. i = show_bits(gb, 3);
  431. skip_bits(gb, trailing_ones);
  432. level[0] = 1-((i&4)>>1);
  433. level[1] = 1-((i&2) );
  434. level[2] = 1-((i&1)<<1);
  435. if(trailing_ones<total_coeff) {
  436. int mask, prefix;
  437. int suffix_length = total_coeff > 10 & trailing_ones < 3;
  438. int bitsi= show_bits(gb, LEVEL_TAB_BITS);
  439. int level_code= cavlc_level_tab[suffix_length][bitsi][0];
  440. skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
  441. if(level_code >= 100){
  442. prefix= level_code - 100;
  443. if(prefix == LEVEL_TAB_BITS)
  444. prefix += get_level_prefix(gb);
  445. //first coefficient has suffix_length equal to 0 or 1
  446. if(prefix<14){ //FIXME try to build a large unified VLC table for all this
  447. if(suffix_length)
  448. level_code= (prefix<<1) + get_bits1(gb); //part
  449. else
  450. level_code= prefix; //part
  451. }else if(prefix==14){
  452. if(suffix_length)
  453. level_code= (prefix<<1) + get_bits1(gb); //part
  454. else
  455. level_code= prefix + get_bits(gb, 4); //part
  456. }else{
  457. level_code= 30;
  458. if(prefix>=16){
  459. if(prefix > 25+3){
  460. av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
  461. return -1;
  462. }
  463. level_code += (1<<(prefix-3))-4096;
  464. }
  465. level_code += get_bits(gb, prefix-3); //part
  466. }
  467. if(trailing_ones < 3) level_code += 2;
  468. suffix_length = 2;
  469. mask= -(level_code&1);
  470. level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
  471. }else{
  472. level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
  473. suffix_length = 1 + (level_code + 3U > 6U);
  474. level[trailing_ones]= level_code;
  475. }
  476. //remaining coefficients have suffix_length > 0
  477. for(i=trailing_ones+1;i<total_coeff;i++) {
  478. static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
  479. int bitsi= show_bits(gb, LEVEL_TAB_BITS);
  480. level_code= cavlc_level_tab[suffix_length][bitsi][0];
  481. skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
  482. if(level_code >= 100){
  483. prefix= level_code - 100;
  484. if(prefix == LEVEL_TAB_BITS){
  485. prefix += get_level_prefix(gb);
  486. }
  487. if(prefix<15){
  488. level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
  489. }else{
  490. level_code = 15<<suffix_length;
  491. if (prefix>=16) {
  492. if(prefix > 25+3){
  493. av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
  494. return AVERROR_INVALIDDATA;
  495. }
  496. level_code += (1<<(prefix-3))-4096;
  497. }
  498. level_code += get_bits(gb, prefix-3);
  499. }
  500. mask= -(level_code&1);
  501. level_code= (((2+level_code)>>1) ^ mask) - mask;
  502. }
  503. level[i]= level_code;
  504. suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
  505. }
  506. }
  507. if(total_coeff == max_coeff)
  508. zeros_left=0;
  509. else{
  510. if (max_coeff <= 8) {
  511. if (max_coeff == 4)
  512. zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
  513. CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
  514. else
  515. zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
  516. CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
  517. } else {
  518. zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
  519. }
  520. }
  521. #define STORE_BLOCK(type) \
  522. scantable += zeros_left + total_coeff - 1; \
  523. if(n >= LUMA_DC_BLOCK_INDEX){ \
  524. ((type*)block)[*scantable] = level[0]; \
  525. for(i=1;i<total_coeff && zeros_left > 0;i++) { \
  526. if(zeros_left < 7) \
  527. run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
  528. else \
  529. run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
  530. zeros_left -= run_before; \
  531. scantable -= 1 + run_before; \
  532. ((type*)block)[*scantable]= level[i]; \
  533. } \
  534. for(;i<total_coeff;i++) { \
  535. scantable--; \
  536. ((type*)block)[*scantable]= level[i]; \
  537. } \
  538. }else{ \
  539. ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
  540. for(i=1;i<total_coeff && zeros_left > 0;i++) { \
  541. if(zeros_left < 7) \
  542. run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
  543. else \
  544. run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
  545. zeros_left -= run_before; \
  546. scantable -= 1 + run_before; \
  547. ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
  548. } \
  549. for(;i<total_coeff;i++) { \
  550. scantable--; \
  551. ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
  552. } \
  553. }
  554. if (h->pixel_shift) {
  555. STORE_BLOCK(int32_t)
  556. } else {
  557. STORE_BLOCK(int16_t)
  558. }
  559. if(zeros_left<0){
  560. av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", h->mb_x, h->mb_y);
  561. return -1;
  562. }
  563. return 0;
  564. }
  565. static av_always_inline int decode_luma_residual(H264Context *h, H264SliceContext *sl, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
  566. int i4x4, i8x8;
  567. int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
  568. if(IS_INTRA16x16(mb_type)){
  569. AV_ZERO128(h->mb_luma_dc[p]+0);
  570. AV_ZERO128(h->mb_luma_dc[p]+8);
  571. AV_ZERO128(h->mb_luma_dc[p]+16);
  572. AV_ZERO128(h->mb_luma_dc[p]+24);
  573. if( decode_residual(h, sl, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
  574. return -1; //FIXME continue if partitioned and other return -1 too
  575. }
  576. av_assert2((cbp&15) == 0 || (cbp&15) == 15);
  577. if(cbp&15){
  578. for(i8x8=0; i8x8<4; i8x8++){
  579. for(i4x4=0; i4x4<4; i4x4++){
  580. const int index= i4x4 + 4*i8x8 + p*16;
  581. if( decode_residual(h, sl, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
  582. index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
  583. return -1;
  584. }
  585. }
  586. }
  587. return 0xf;
  588. }else{
  589. fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
  590. return 0;
  591. }
  592. }else{
  593. int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
  594. /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
  595. int new_cbp = 0;
  596. for(i8x8=0; i8x8<4; i8x8++){
  597. if(cbp & (1<<i8x8)){
  598. if(IS_8x8DCT(mb_type)){
  599. int16_t *buf = &h->mb[64*i8x8+256*p << pixel_shift];
  600. uint8_t *nnz;
  601. for(i4x4=0; i4x4<4; i4x4++){
  602. const int index= i4x4 + 4*i8x8 + p*16;
  603. if( decode_residual(h, sl, gb, buf, index, scan8x8+16*i4x4,
  604. h->dequant8_coeff[cqm][qscale], 16) < 0 )
  605. return -1;
  606. }
  607. nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
  608. nnz[0] += nnz[1] + nnz[8] + nnz[9];
  609. new_cbp |= !!nnz[0] << i8x8;
  610. }else{
  611. for(i4x4=0; i4x4<4; i4x4++){
  612. const int index= i4x4 + 4*i8x8 + p*16;
  613. if( decode_residual(h, sl, gb, h->mb + (16*index << pixel_shift), index,
  614. scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
  615. return -1;
  616. }
  617. new_cbp |= sl->non_zero_count_cache[scan8[index]] << i8x8;
  618. }
  619. }
  620. }else{
  621. uint8_t * const nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
  622. nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
  623. }
  624. }
  625. return new_cbp;
  626. }
  627. }
  628. int ff_h264_decode_mb_cavlc(H264Context *h, H264SliceContext *sl)
  629. {
  630. int mb_xy;
  631. int partition_count;
  632. unsigned int mb_type, cbp;
  633. int dct8x8_allowed= h->pps.transform_8x8_mode;
  634. int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
  635. const int pixel_shift = h->pixel_shift;
  636. unsigned local_ref_count[2];
  637. mb_xy = h->mb_xy = h->mb_x + h->mb_y*h->mb_stride;
  638. tprintf(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, h->mb_x, h->mb_y);
  639. cbp = 0; /* avoid warning. FIXME: find a solution without slowing
  640. down the code */
  641. if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
  642. if(h->mb_skip_run==-1)
  643. h->mb_skip_run= get_ue_golomb_long(&h->gb);
  644. if (h->mb_skip_run--) {
  645. if(FRAME_MBAFF(h) && (h->mb_y&1) == 0){
  646. if(h->mb_skip_run==0)
  647. h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
  648. }
  649. decode_mb_skip(h, sl);
  650. return 0;
  651. }
  652. }
  653. if (FRAME_MBAFF(h)) {
  654. if( (h->mb_y&1) == 0 )
  655. h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
  656. }
  657. sl->prev_mb_skipped = 0;
  658. mb_type= get_ue_golomb(&h->gb);
  659. if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
  660. if(mb_type < 23){
  661. partition_count= b_mb_type_info[mb_type].partition_count;
  662. mb_type= b_mb_type_info[mb_type].type;
  663. }else{
  664. mb_type -= 23;
  665. goto decode_intra_mb;
  666. }
  667. } else if (sl->slice_type_nos == AV_PICTURE_TYPE_P) {
  668. if(mb_type < 5){
  669. partition_count= p_mb_type_info[mb_type].partition_count;
  670. mb_type= p_mb_type_info[mb_type].type;
  671. }else{
  672. mb_type -= 5;
  673. goto decode_intra_mb;
  674. }
  675. }else{
  676. av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_I);
  677. if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type)
  678. mb_type--;
  679. decode_intra_mb:
  680. if(mb_type > 25){
  681. av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), h->mb_x, h->mb_y);
  682. return -1;
  683. }
  684. partition_count=0;
  685. cbp= i_mb_type_info[mb_type].cbp;
  686. sl->intra16x16_pred_mode = i_mb_type_info[mb_type].pred_mode;
  687. mb_type= i_mb_type_info[mb_type].type;
  688. }
  689. if(MB_FIELD(h))
  690. mb_type |= MB_TYPE_INTERLACED;
  691. h->slice_table[mb_xy] = sl->slice_num;
  692. if(IS_INTRA_PCM(mb_type)){
  693. const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
  694. h->sps.bit_depth_luma;
  695. // We assume these blocks are very rare so we do not optimize it.
  696. h->intra_pcm_ptr = align_get_bits(&h->gb);
  697. if (get_bits_left(&h->gb) < mb_size) {
  698. av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
  699. return AVERROR_INVALIDDATA;
  700. }
  701. skip_bits_long(&h->gb, mb_size);
  702. // In deblocking, the quantizer is 0
  703. h->cur_pic.qscale_table[mb_xy] = 0;
  704. // All coeffs are present
  705. memset(h->non_zero_count[mb_xy], 16, 48);
  706. h->cur_pic.mb_type[mb_xy] = mb_type;
  707. return 0;
  708. }
  709. local_ref_count[0] = h->ref_count[0] << MB_MBAFF(h);
  710. local_ref_count[1] = h->ref_count[1] << MB_MBAFF(h);
  711. fill_decode_neighbors(h, sl, mb_type);
  712. fill_decode_caches(h, sl, mb_type);
  713. //mb_pred
  714. if(IS_INTRA(mb_type)){
  715. int pred_mode;
  716. // init_top_left_availability(h);
  717. if(IS_INTRA4x4(mb_type)){
  718. int i;
  719. int di = 1;
  720. if(dct8x8_allowed && get_bits1(&h->gb)){
  721. mb_type |= MB_TYPE_8x8DCT;
  722. di = 4;
  723. }
  724. // fill_intra4x4_pred_table(h);
  725. for(i=0; i<16; i+=di){
  726. int mode = pred_intra_mode(h, sl, i);
  727. if(!get_bits1(&h->gb)){
  728. const int rem_mode= get_bits(&h->gb, 3);
  729. mode = rem_mode + (rem_mode >= mode);
  730. }
  731. if(di==4)
  732. fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1);
  733. else
  734. sl->intra4x4_pred_mode_cache[scan8[i]] = mode;
  735. }
  736. write_back_intra_pred_mode(h, sl);
  737. if (ff_h264_check_intra4x4_pred_mode(h, sl) < 0)
  738. return -1;
  739. }else{
  740. sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h, sl, sl->intra16x16_pred_mode, 0);
  741. if (sl->intra16x16_pred_mode < 0)
  742. return -1;
  743. }
  744. if(decode_chroma){
  745. pred_mode= ff_h264_check_intra_pred_mode(h, sl, get_ue_golomb_31(&h->gb), 1);
  746. if(pred_mode < 0)
  747. return -1;
  748. sl->chroma_pred_mode = pred_mode;
  749. } else {
  750. sl->chroma_pred_mode = DC_128_PRED8x8;
  751. }
  752. }else if(partition_count==4){
  753. int i, j, sub_partition_count[4], list, ref[2][4];
  754. if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
  755. for(i=0; i<4; i++){
  756. h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
  757. if(h->sub_mb_type[i] >=13){
  758. av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
  759. return -1;
  760. }
  761. sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
  762. h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
  763. }
  764. if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
  765. ff_h264_pred_direct_motion(h, sl, &mb_type);
  766. sl->ref_cache[0][scan8[4]] =
  767. sl->ref_cache[1][scan8[4]] =
  768. sl->ref_cache[0][scan8[12]] =
  769. sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
  770. }
  771. }else{
  772. av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
  773. for(i=0; i<4; i++){
  774. h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
  775. if(h->sub_mb_type[i] >=4){
  776. av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
  777. return -1;
  778. }
  779. sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
  780. h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
  781. }
  782. }
  783. for(list=0; list<h->list_count; list++){
  784. int ref_count = IS_REF0(mb_type) ? 1 : local_ref_count[list];
  785. for(i=0; i<4; i++){
  786. if(IS_DIRECT(h->sub_mb_type[i])) continue;
  787. if(IS_DIR(h->sub_mb_type[i], 0, list)){
  788. unsigned int tmp;
  789. if(ref_count == 1){
  790. tmp= 0;
  791. }else if(ref_count == 2){
  792. tmp= get_bits1(&h->gb)^1;
  793. }else{
  794. tmp= get_ue_golomb_31(&h->gb);
  795. if(tmp>=ref_count){
  796. av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
  797. return -1;
  798. }
  799. }
  800. ref[list][i]= tmp;
  801. }else{
  802. //FIXME
  803. ref[list][i] = -1;
  804. }
  805. }
  806. }
  807. if(dct8x8_allowed)
  808. dct8x8_allowed = get_dct8x8_allowed(h);
  809. for(list=0; list<h->list_count; list++){
  810. for(i=0; i<4; i++){
  811. if(IS_DIRECT(h->sub_mb_type[i])) {
  812. sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ];
  813. continue;
  814. }
  815. sl->ref_cache[list][ scan8[4*i] ]=sl->ref_cache[list][ scan8[4*i]+1 ]=
  816. sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
  817. if(IS_DIR(h->sub_mb_type[i], 0, list)){
  818. const int sub_mb_type= h->sub_mb_type[i];
  819. const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
  820. for(j=0; j<sub_partition_count[i]; j++){
  821. int mx, my;
  822. const int index= 4*i + block_width*j;
  823. int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ];
  824. pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
  825. mx += get_se_golomb(&h->gb);
  826. my += get_se_golomb(&h->gb);
  827. tprintf(h->avctx, "final mv:%d %d\n", mx, my);
  828. if(IS_SUB_8X8(sub_mb_type)){
  829. mv_cache[ 1 ][0]=
  830. mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
  831. mv_cache[ 1 ][1]=
  832. mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
  833. }else if(IS_SUB_8X4(sub_mb_type)){
  834. mv_cache[ 1 ][0]= mx;
  835. mv_cache[ 1 ][1]= my;
  836. }else if(IS_SUB_4X8(sub_mb_type)){
  837. mv_cache[ 8 ][0]= mx;
  838. mv_cache[ 8 ][1]= my;
  839. }
  840. mv_cache[ 0 ][0]= mx;
  841. mv_cache[ 0 ][1]= my;
  842. }
  843. }else{
  844. uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0];
  845. p[0] = p[1]=
  846. p[8] = p[9]= 0;
  847. }
  848. }
  849. }
  850. }else if(IS_DIRECT(mb_type)){
  851. ff_h264_pred_direct_motion(h, sl, &mb_type);
  852. dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
  853. }else{
  854. int list, mx, my, i;
  855. //FIXME we should set ref_idx_l? to 0 if we use that later ...
  856. if(IS_16X16(mb_type)){
  857. for(list=0; list<h->list_count; list++){
  858. unsigned int val;
  859. if(IS_DIR(mb_type, 0, list)){
  860. if(local_ref_count[list]==1){
  861. val= 0;
  862. } else if(local_ref_count[list]==2){
  863. val= get_bits1(&h->gb)^1;
  864. }else{
  865. val= get_ue_golomb_31(&h->gb);
  866. if (val >= local_ref_count[list]){
  867. av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
  868. return -1;
  869. }
  870. }
  871. fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
  872. }
  873. }
  874. for(list=0; list<h->list_count; list++){
  875. if(IS_DIR(mb_type, 0, list)){
  876. pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
  877. mx += get_se_golomb(&h->gb);
  878. my += get_se_golomb(&h->gb);
  879. tprintf(h->avctx, "final mv:%d %d\n", mx, my);
  880. fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
  881. }
  882. }
  883. }
  884. else if(IS_16X8(mb_type)){
  885. for(list=0; list<h->list_count; list++){
  886. for(i=0; i<2; i++){
  887. unsigned int val;
  888. if(IS_DIR(mb_type, i, list)){
  889. if(local_ref_count[list] == 1) {
  890. val= 0;
  891. } else if(local_ref_count[list] == 2) {
  892. val= get_bits1(&h->gb)^1;
  893. }else{
  894. val= get_ue_golomb_31(&h->gb);
  895. if (val >= local_ref_count[list]){
  896. av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
  897. return -1;
  898. }
  899. }
  900. }else
  901. val= LIST_NOT_USED&0xFF;
  902. fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
  903. }
  904. }
  905. for(list=0; list<h->list_count; list++){
  906. for(i=0; i<2; i++){
  907. unsigned int val;
  908. if(IS_DIR(mb_type, i, list)){
  909. pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
  910. mx += get_se_golomb(&h->gb);
  911. my += get_se_golomb(&h->gb);
  912. tprintf(h->avctx, "final mv:%d %d\n", mx, my);
  913. val= pack16to32(mx,my);
  914. }else
  915. val=0;
  916. fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
  917. }
  918. }
  919. }else{
  920. av_assert2(IS_8X16(mb_type));
  921. for(list=0; list<h->list_count; list++){
  922. for(i=0; i<2; i++){
  923. unsigned int val;
  924. if(IS_DIR(mb_type, i, list)){ //FIXME optimize
  925. if(local_ref_count[list]==1){
  926. val= 0;
  927. } else if(local_ref_count[list]==2){
  928. val= get_bits1(&h->gb)^1;
  929. }else{
  930. val= get_ue_golomb_31(&h->gb);
  931. if (val >= local_ref_count[list]){
  932. av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
  933. return -1;
  934. }
  935. }
  936. }else
  937. val= LIST_NOT_USED&0xFF;
  938. fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
  939. }
  940. }
  941. for(list=0; list<h->list_count; list++){
  942. for(i=0; i<2; i++){
  943. unsigned int val;
  944. if(IS_DIR(mb_type, i, list)){
  945. pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
  946. mx += get_se_golomb(&h->gb);
  947. my += get_se_golomb(&h->gb);
  948. tprintf(h->avctx, "final mv:%d %d\n", mx, my);
  949. val= pack16to32(mx,my);
  950. }else
  951. val=0;
  952. fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
  953. }
  954. }
  955. }
  956. }
  957. if(IS_INTER(mb_type))
  958. write_back_motion(h, sl, mb_type);
  959. if(!IS_INTRA16x16(mb_type)){
  960. cbp= get_ue_golomb(&h->gb);
  961. if(decode_chroma){
  962. if(cbp > 47){
  963. av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
  964. return -1;
  965. }
  966. if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
  967. else cbp= golomb_to_inter_cbp [cbp];
  968. }else{
  969. if(cbp > 15){
  970. av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
  971. return -1;
  972. }
  973. if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
  974. else cbp= golomb_to_inter_cbp_gray[cbp];
  975. }
  976. } else {
  977. if (!decode_chroma && cbp>15) {
  978. av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
  979. return AVERROR_INVALIDDATA;
  980. }
  981. }
  982. if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
  983. mb_type |= MB_TYPE_8x8DCT*get_bits1(&h->gb);
  984. }
  985. h->cbp=
  986. h->cbp_table[mb_xy]= cbp;
  987. h->cur_pic.mb_type[mb_xy] = mb_type;
  988. if(cbp || IS_INTRA16x16(mb_type)){
  989. int i4x4, i8x8, chroma_idx;
  990. int dquant;
  991. int ret;
  992. GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
  993. const uint8_t *scan, *scan8x8;
  994. const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
  995. if(IS_INTERLACED(mb_type)){
  996. scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
  997. scan = sl->qscale ? h->field_scan : h->field_scan_q0;
  998. }else{
  999. scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
  1000. scan = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
  1001. }
  1002. dquant= get_se_golomb(&h->gb);
  1003. sl->qscale += dquant;
  1004. if (((unsigned)sl->qscale) > max_qp){
  1005. if (sl->qscale < 0) sl->qscale += max_qp + 1;
  1006. else sl->qscale -= max_qp+1;
  1007. if (((unsigned)sl->qscale) > max_qp){
  1008. av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, h->mb_x, h->mb_y);
  1009. return -1;
  1010. }
  1011. }
  1012. sl->chroma_qp[0] = get_chroma_qp(h, 0, sl->qscale);
  1013. sl->chroma_qp[1] = get_chroma_qp(h, 1, sl->qscale);
  1014. if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) {
  1015. return -1;
  1016. }
  1017. h->cbp_table[mb_xy] |= ret << 12;
  1018. if (CHROMA444(h)) {
  1019. if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) {
  1020. return -1;
  1021. }
  1022. if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) {
  1023. return -1;
  1024. }
  1025. } else {
  1026. const int num_c8x8 = h->sps.chroma_format_idc;
  1027. if(cbp&0x30){
  1028. for(chroma_idx=0; chroma_idx<2; chroma_idx++)
  1029. if (decode_residual(h, sl, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
  1030. CHROMA_DC_BLOCK_INDEX+chroma_idx,
  1031. CHROMA422(h) ? chroma422_dc_scan : chroma_dc_scan,
  1032. NULL, 4*num_c8x8) < 0) {
  1033. return -1;
  1034. }
  1035. }
  1036. if(cbp&0x20){
  1037. for(chroma_idx=0; chroma_idx<2; chroma_idx++){
  1038. const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
  1039. int16_t *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
  1040. for (i8x8 = 0; i8x8<num_c8x8; i8x8++) {
  1041. for (i4x4 = 0; i4x4 < 4; i4x4++) {
  1042. const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
  1043. if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0)
  1044. return -1;
  1045. mb += 16 << pixel_shift;
  1046. }
  1047. }
  1048. }
  1049. }else{
  1050. fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
  1051. fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
  1052. }
  1053. }
  1054. }else{
  1055. fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
  1056. fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
  1057. fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
  1058. }
  1059. h->cur_pic.qscale_table[mb_xy] = sl->qscale;
  1060. write_back_non_zero_count(h, sl);
  1061. return 0;
  1062. }