You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

830 lines
34KB

  1. /*
  2. * H.26L/H.264/AVC/JVT/14496-10/... motion vector predicion
  3. * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * H.264 / AVC / MPEG4 part10 motion vector predicion.
  24. * @author Michael Niedermayer <michaelni@gmx.at>
  25. */
  26. #ifndef AVCODEC_H264_MVPRED_H
  27. #define AVCODEC_H264_MVPRED_H
  28. #include "internal.h"
  29. #include "avcodec.h"
  30. #include "h264.h"
  31. #include <assert.h>
  32. static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C,
  33. int i, int list, int part_width)
  34. {
  35. const int topright_ref = h->ref_cache[list][i - 8 + part_width];
  36. /* there is no consistent mapping of mvs to neighboring locations that will
  37. * make mbaff happy, so we can't move all this logic to fill_caches */
  38. if (FRAME_MBAFF(h)) {
  39. #define SET_DIAG_MV(MV_OP, REF_OP, XY, Y4) \
  40. const int xy = XY, y4 = Y4; \
  41. const int mb_type = mb_types[xy + (y4 >> 2) * h->mb_stride]; \
  42. if (!USES_LIST(mb_type, list)) \
  43. return LIST_NOT_USED; \
  44. mv = h->cur_pic_ptr->motion_val[list][h->mb2b_xy[xy] + 3 + y4 * h->b_stride]; \
  45. h->mv_cache[list][scan8[0] - 2][0] = mv[0]; \
  46. h->mv_cache[list][scan8[0] - 2][1] = mv[1] MV_OP; \
  47. return h->cur_pic_ptr->ref_index[list][4 * xy + 1 + (y4 & ~1)] REF_OP;
  48. if (topright_ref == PART_NOT_AVAILABLE
  49. && i >= scan8[0] + 8 && (i & 7) == 4
  50. && h->ref_cache[list][scan8[0] - 1] != PART_NOT_AVAILABLE) {
  51. const uint32_t *mb_types = h->cur_pic_ptr->mb_type;
  52. const int16_t *mv;
  53. AV_ZERO32(h->mv_cache[list][scan8[0] - 2]);
  54. *C = h->mv_cache[list][scan8[0] - 2];
  55. if (!MB_FIELD(h) && IS_INTERLACED(h->left_type[0])) {
  56. SET_DIAG_MV(* 2, >> 1, h->left_mb_xy[0] + h->mb_stride,
  57. (h->mb_y & 1) * 2 + (i >> 5));
  58. }
  59. if (MB_FIELD(h) && !IS_INTERLACED(h->left_type[0])) {
  60. // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
  61. SET_DIAG_MV(/ 2, << 1, h->left_mb_xy[i >= 36], ((i >> 2)) & 3);
  62. }
  63. }
  64. #undef SET_DIAG_MV
  65. }
  66. if (topright_ref != PART_NOT_AVAILABLE) {
  67. *C = h->mv_cache[list][i - 8 + part_width];
  68. return topright_ref;
  69. } else {
  70. tprintf(h->avctx, "topright MV not available\n");
  71. *C = h->mv_cache[list][i - 8 - 1];
  72. return h->ref_cache[list][i - 8 - 1];
  73. }
  74. }
  75. /**
  76. * Get the predicted MV.
  77. * @param n the block index
  78. * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
  79. * @param mx the x component of the predicted motion vector
  80. * @param my the y component of the predicted motion vector
  81. */
  82. static av_always_inline void pred_motion(H264Context *const h, int n,
  83. int part_width, int list, int ref,
  84. int *const mx, int *const my)
  85. {
  86. const int index8 = scan8[n];
  87. const int top_ref = h->ref_cache[list][index8 - 8];
  88. const int left_ref = h->ref_cache[list][index8 - 1];
  89. const int16_t *const A = h->mv_cache[list][index8 - 1];
  90. const int16_t *const B = h->mv_cache[list][index8 - 8];
  91. const int16_t *C;
  92. int diagonal_ref, match_count;
  93. assert(part_width == 1 || part_width == 2 || part_width == 4);
  94. /* mv_cache
  95. * B . . A T T T T
  96. * U . . L . . , .
  97. * U . . L . . . .
  98. * U . . L . . , .
  99. * . . . L . . . .
  100. */
  101. diagonal_ref = fetch_diagonal_mv(h, &C, index8, list, part_width);
  102. match_count = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref);
  103. tprintf(h->avctx, "pred_motion match_count=%d\n", match_count);
  104. if (match_count > 1) { //most common
  105. *mx = mid_pred(A[0], B[0], C[0]);
  106. *my = mid_pred(A[1], B[1], C[1]);
  107. } else if (match_count == 1) {
  108. if (left_ref == ref) {
  109. *mx = A[0];
  110. *my = A[1];
  111. } else if (top_ref == ref) {
  112. *mx = B[0];
  113. *my = B[1];
  114. } else {
  115. *mx = C[0];
  116. *my = C[1];
  117. }
  118. } else {
  119. if (top_ref == PART_NOT_AVAILABLE &&
  120. diagonal_ref == PART_NOT_AVAILABLE &&
  121. left_ref != PART_NOT_AVAILABLE) {
  122. *mx = A[0];
  123. *my = A[1];
  124. } else {
  125. *mx = mid_pred(A[0], B[0], C[0]);
  126. *my = mid_pred(A[1], B[1], C[1]);
  127. }
  128. }
  129. tprintf(h->avctx,
  130. "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n",
  131. top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref,
  132. A[0], A[1], ref, *mx, *my, h->mb_x, h->mb_y, n, list);
  133. }
  134. /**
  135. * Get the directionally predicted 16x8 MV.
  136. * @param n the block index
  137. * @param mx the x component of the predicted motion vector
  138. * @param my the y component of the predicted motion vector
  139. */
  140. static av_always_inline void pred_16x8_motion(H264Context *const h,
  141. int n, int list, int ref,
  142. int *const mx, int *const my)
  143. {
  144. if (n == 0) {
  145. const int top_ref = h->ref_cache[list][scan8[0] - 8];
  146. const int16_t *const B = h->mv_cache[list][scan8[0] - 8];
  147. tprintf(h->avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n",
  148. top_ref, B[0], B[1], h->mb_x, h->mb_y, n, list);
  149. if (top_ref == ref) {
  150. *mx = B[0];
  151. *my = B[1];
  152. return;
  153. }
  154. } else {
  155. const int left_ref = h->ref_cache[list][scan8[8] - 1];
  156. const int16_t *const A = h->mv_cache[list][scan8[8] - 1];
  157. tprintf(h->avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n",
  158. left_ref, A[0], A[1], h->mb_x, h->mb_y, n, list);
  159. if (left_ref == ref) {
  160. *mx = A[0];
  161. *my = A[1];
  162. return;
  163. }
  164. }
  165. //RARE
  166. pred_motion(h, n, 4, list, ref, mx, my);
  167. }
  168. /**
  169. * Get the directionally predicted 8x16 MV.
  170. * @param n the block index
  171. * @param mx the x component of the predicted motion vector
  172. * @param my the y component of the predicted motion vector
  173. */
  174. static av_always_inline void pred_8x16_motion(H264Context *const h,
  175. int n, int list, int ref,
  176. int *const mx, int *const my)
  177. {
  178. if (n == 0) {
  179. const int left_ref = h->ref_cache[list][scan8[0] - 1];
  180. const int16_t *const A = h->mv_cache[list][scan8[0] - 1];
  181. tprintf(h->avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n",
  182. left_ref, A[0], A[1], h->mb_x, h->mb_y, n, list);
  183. if (left_ref == ref) {
  184. *mx = A[0];
  185. *my = A[1];
  186. return;
  187. }
  188. } else {
  189. const int16_t *C;
  190. int diagonal_ref;
  191. diagonal_ref = fetch_diagonal_mv(h, &C, scan8[4], list, 2);
  192. tprintf(h->avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n",
  193. diagonal_ref, C[0], C[1], h->mb_x, h->mb_y, n, list);
  194. if (diagonal_ref == ref) {
  195. *mx = C[0];
  196. *my = C[1];
  197. return;
  198. }
  199. }
  200. //RARE
  201. pred_motion(h, n, 2, list, ref, mx, my);
  202. }
  203. #define FIX_MV_MBAFF(type, refn, mvn, idx) \
  204. if (FRAME_MBAFF(h)) { \
  205. if (MB_FIELD(h)) { \
  206. if (!IS_INTERLACED(type)) { \
  207. refn <<= 1; \
  208. AV_COPY32(mvbuf[idx], mvn); \
  209. mvbuf[idx][1] /= 2; \
  210. mvn = mvbuf[idx]; \
  211. } \
  212. } else { \
  213. if (IS_INTERLACED(type)) { \
  214. refn >>= 1; \
  215. AV_COPY32(mvbuf[idx], mvn); \
  216. mvbuf[idx][1] <<= 1; \
  217. mvn = mvbuf[idx]; \
  218. } \
  219. } \
  220. }
  221. static av_always_inline void pred_pskip_motion(H264Context *const h)
  222. {
  223. DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = { 0 };
  224. DECLARE_ALIGNED(4, int16_t, mvbuf)[3][2];
  225. int8_t *ref = h->cur_pic.ref_index[0];
  226. int16_t(*mv)[2] = h->cur_pic.motion_val[0];
  227. int top_ref, left_ref, diagonal_ref, match_count, mx, my;
  228. const int16_t *A, *B, *C;
  229. int b_stride = h->b_stride;
  230. fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
  231. /* To avoid doing an entire fill_decode_caches, we inline the relevant
  232. * parts here.
  233. * FIXME: this is a partial duplicate of the logic in fill_decode_caches,
  234. * but it's faster this way. Is there a way to avoid this duplication?
  235. */
  236. if (USES_LIST(h->left_type[LTOP], 0)) {
  237. left_ref = ref[4 * h->left_mb_xy[LTOP] + 1 + (h->left_block[0] & ~1)];
  238. A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride * h->left_block[0]];
  239. FIX_MV_MBAFF(h->left_type[LTOP], left_ref, A, 0);
  240. if (!(left_ref | AV_RN32A(A)))
  241. goto zeromv;
  242. } else if (h->left_type[LTOP]) {
  243. left_ref = LIST_NOT_USED;
  244. A = zeromv;
  245. } else {
  246. goto zeromv;
  247. }
  248. if (USES_LIST(h->top_type, 0)) {
  249. top_ref = ref[4 * h->top_mb_xy + 2];
  250. B = mv[h->mb2b_xy[h->top_mb_xy] + 3 * b_stride];
  251. FIX_MV_MBAFF(h->top_type, top_ref, B, 1);
  252. if (!(top_ref | AV_RN32A(B)))
  253. goto zeromv;
  254. } else if (h->top_type) {
  255. top_ref = LIST_NOT_USED;
  256. B = zeromv;
  257. } else {
  258. goto zeromv;
  259. }
  260. tprintf(h->avctx, "pred_pskip: (%d) (%d) at %2d %2d\n",
  261. top_ref, left_ref, h->mb_x, h->mb_y);
  262. if (USES_LIST(h->topright_type, 0)) {
  263. diagonal_ref = ref[4 * h->topright_mb_xy + 2];
  264. C = mv[h->mb2b_xy[h->topright_mb_xy] + 3 * b_stride];
  265. FIX_MV_MBAFF(h->topright_type, diagonal_ref, C, 2);
  266. } else if (h->topright_type) {
  267. diagonal_ref = LIST_NOT_USED;
  268. C = zeromv;
  269. } else {
  270. if (USES_LIST(h->topleft_type, 0)) {
  271. diagonal_ref = ref[4 * h->topleft_mb_xy + 1 +
  272. (h->topleft_partition & 2)];
  273. C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride +
  274. (h->topleft_partition & 2 * b_stride)];
  275. FIX_MV_MBAFF(h->topleft_type, diagonal_ref, C, 2);
  276. } else if (h->topleft_type) {
  277. diagonal_ref = LIST_NOT_USED;
  278. C = zeromv;
  279. } else {
  280. diagonal_ref = PART_NOT_AVAILABLE;
  281. C = zeromv;
  282. }
  283. }
  284. match_count = !diagonal_ref + !top_ref + !left_ref;
  285. tprintf(h->avctx, "pred_pskip_motion match_count=%d\n", match_count);
  286. if (match_count > 1) {
  287. mx = mid_pred(A[0], B[0], C[0]);
  288. my = mid_pred(A[1], B[1], C[1]);
  289. } else if (match_count == 1) {
  290. if (!left_ref) {
  291. mx = A[0];
  292. my = A[1];
  293. } else if (!top_ref) {
  294. mx = B[0];
  295. my = B[1];
  296. } else {
  297. mx = C[0];
  298. my = C[1];
  299. }
  300. } else {
  301. mx = mid_pred(A[0], B[0], C[0]);
  302. my = mid_pred(A[1], B[1], C[1]);
  303. }
  304. fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx, my), 4);
  305. return;
  306. zeromv:
  307. fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
  308. return;
  309. }
  310. static void fill_decode_neighbors(H264Context *h, int mb_type)
  311. {
  312. const int mb_xy = h->mb_xy;
  313. int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS];
  314. static const uint8_t left_block_options[4][32] = {
  315. { 0, 1, 2, 3, 7, 10, 8, 11, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 5 * 4, 1 + 9 * 4 },
  316. { 2, 2, 3, 3, 8, 11, 8, 11, 3 + 2 * 4, 3 + 2 * 4, 3 + 3 * 4, 3 + 3 * 4, 1 + 5 * 4, 1 + 9 * 4, 1 + 5 * 4, 1 + 9 * 4 },
  317. { 0, 0, 1, 1, 7, 10, 7, 10, 3 + 0 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 1 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 4 * 4, 1 + 8 * 4 },
  318. { 0, 2, 0, 2, 7, 10, 7, 10, 3 + 0 * 4, 3 + 2 * 4, 3 + 0 * 4, 3 + 2 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 4 * 4, 1 + 8 * 4 }
  319. };
  320. h->topleft_partition = -1;
  321. top_xy = mb_xy - (h->mb_stride << MB_FIELD(h));
  322. /* Wow, what a mess, why didn't they simplify the interlacing & intra
  323. * stuff, I can't imagine that these complex rules are worth it. */
  324. topleft_xy = top_xy - 1;
  325. topright_xy = top_xy + 1;
  326. left_xy[LBOT] = left_xy[LTOP] = mb_xy - 1;
  327. h->left_block = left_block_options[0];
  328. if (FRAME_MBAFF(h)) {
  329. const int left_mb_field_flag = IS_INTERLACED(h->cur_pic.mb_type[mb_xy - 1]);
  330. const int curr_mb_field_flag = IS_INTERLACED(mb_type);
  331. if (h->mb_y & 1) {
  332. if (left_mb_field_flag != curr_mb_field_flag) {
  333. left_xy[LBOT] = left_xy[LTOP] = mb_xy - h->mb_stride - 1;
  334. if (curr_mb_field_flag) {
  335. left_xy[LBOT] += h->mb_stride;
  336. h->left_block = left_block_options[3];
  337. } else {
  338. topleft_xy += h->mb_stride;
  339. /* take top left mv from the middle of the mb, as opposed
  340. * to all other modes which use the bottom right partition */
  341. h->topleft_partition = 0;
  342. h->left_block = left_block_options[1];
  343. }
  344. }
  345. } else {
  346. if (curr_mb_field_flag) {
  347. topleft_xy += h->mb_stride & (((h->cur_pic.mb_type[top_xy - 1] >> 7) & 1) - 1);
  348. topright_xy += h->mb_stride & (((h->cur_pic.mb_type[top_xy + 1] >> 7) & 1) - 1);
  349. top_xy += h->mb_stride & (((h->cur_pic.mb_type[top_xy] >> 7) & 1) - 1);
  350. }
  351. if (left_mb_field_flag != curr_mb_field_flag) {
  352. if (curr_mb_field_flag) {
  353. left_xy[LBOT] += h->mb_stride;
  354. h->left_block = left_block_options[3];
  355. } else {
  356. h->left_block = left_block_options[2];
  357. }
  358. }
  359. }
  360. }
  361. h->topleft_mb_xy = topleft_xy;
  362. h->top_mb_xy = top_xy;
  363. h->topright_mb_xy = topright_xy;
  364. h->left_mb_xy[LTOP] = left_xy[LTOP];
  365. h->left_mb_xy[LBOT] = left_xy[LBOT];
  366. //FIXME do we need all in the context?
  367. h->topleft_type = h->cur_pic.mb_type[topleft_xy];
  368. h->top_type = h->cur_pic.mb_type[top_xy];
  369. h->topright_type = h->cur_pic.mb_type[topright_xy];
  370. h->left_type[LTOP] = h->cur_pic.mb_type[left_xy[LTOP]];
  371. h->left_type[LBOT] = h->cur_pic.mb_type[left_xy[LBOT]];
  372. if (FMO) {
  373. if (h->slice_table[topleft_xy] != h->slice_num)
  374. h->topleft_type = 0;
  375. if (h->slice_table[top_xy] != h->slice_num)
  376. h->top_type = 0;
  377. if (h->slice_table[left_xy[LTOP]] != h->slice_num)
  378. h->left_type[LTOP] = h->left_type[LBOT] = 0;
  379. } else {
  380. if (h->slice_table[topleft_xy] != h->slice_num) {
  381. h->topleft_type = 0;
  382. if (h->slice_table[top_xy] != h->slice_num)
  383. h->top_type = 0;
  384. if (h->slice_table[left_xy[LTOP]] != h->slice_num)
  385. h->left_type[LTOP] = h->left_type[LBOT] = 0;
  386. }
  387. }
  388. if (h->slice_table[topright_xy] != h->slice_num)
  389. h->topright_type = 0;
  390. }
  391. static void fill_decode_caches(H264Context *h, int mb_type)
  392. {
  393. int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS];
  394. int topleft_type, top_type, topright_type, left_type[LEFT_MBS];
  395. const uint8_t *left_block = h->left_block;
  396. int i;
  397. uint8_t *nnz;
  398. uint8_t *nnz_cache;
  399. topleft_xy = h->topleft_mb_xy;
  400. top_xy = h->top_mb_xy;
  401. topright_xy = h->topright_mb_xy;
  402. left_xy[LTOP] = h->left_mb_xy[LTOP];
  403. left_xy[LBOT] = h->left_mb_xy[LBOT];
  404. topleft_type = h->topleft_type;
  405. top_type = h->top_type;
  406. topright_type = h->topright_type;
  407. left_type[LTOP] = h->left_type[LTOP];
  408. left_type[LBOT] = h->left_type[LBOT];
  409. if (!IS_SKIP(mb_type)) {
  410. if (IS_INTRA(mb_type)) {
  411. int type_mask = h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
  412. h->topleft_samples_available =
  413. h->top_samples_available =
  414. h->left_samples_available = 0xFFFF;
  415. h->topright_samples_available = 0xEEEA;
  416. if (!(top_type & type_mask)) {
  417. h->topleft_samples_available = 0xB3FF;
  418. h->top_samples_available = 0x33FF;
  419. h->topright_samples_available = 0x26EA;
  420. }
  421. if (IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[LTOP])) {
  422. if (IS_INTERLACED(mb_type)) {
  423. if (!(left_type[LTOP] & type_mask)) {
  424. h->topleft_samples_available &= 0xDFFF;
  425. h->left_samples_available &= 0x5FFF;
  426. }
  427. if (!(left_type[LBOT] & type_mask)) {
  428. h->topleft_samples_available &= 0xFF5F;
  429. h->left_samples_available &= 0xFF5F;
  430. }
  431. } else {
  432. int left_typei = h->cur_pic.mb_type[left_xy[LTOP] + h->mb_stride];
  433. assert(left_xy[LTOP] == left_xy[LBOT]);
  434. if (!((left_typei & type_mask) && (left_type[LTOP] & type_mask))) {
  435. h->topleft_samples_available &= 0xDF5F;
  436. h->left_samples_available &= 0x5F5F;
  437. }
  438. }
  439. } else {
  440. if (!(left_type[LTOP] & type_mask)) {
  441. h->topleft_samples_available &= 0xDF5F;
  442. h->left_samples_available &= 0x5F5F;
  443. }
  444. }
  445. if (!(topleft_type & type_mask))
  446. h->topleft_samples_available &= 0x7FFF;
  447. if (!(topright_type & type_mask))
  448. h->topright_samples_available &= 0xFBFF;
  449. if (IS_INTRA4x4(mb_type)) {
  450. if (IS_INTRA4x4(top_type)) {
  451. AV_COPY32(h->intra4x4_pred_mode_cache + 4 + 8 * 0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]);
  452. } else {
  453. h->intra4x4_pred_mode_cache[4 + 8 * 0] =
  454. h->intra4x4_pred_mode_cache[5 + 8 * 0] =
  455. h->intra4x4_pred_mode_cache[6 + 8 * 0] =
  456. h->intra4x4_pred_mode_cache[7 + 8 * 0] = 2 - 3 * !(top_type & type_mask);
  457. }
  458. for (i = 0; i < 2; i++) {
  459. if (IS_INTRA4x4(left_type[LEFT(i)])) {
  460. int8_t *mode = h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]];
  461. h->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] = mode[6 - left_block[0 + 2 * i]];
  462. h->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = mode[6 - left_block[1 + 2 * i]];
  463. } else {
  464. h->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] =
  465. h->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = 2 - 3 * !(left_type[LEFT(i)] & type_mask);
  466. }
  467. }
  468. }
  469. }
  470. /*
  471. * 0 . T T. T T T T
  472. * 1 L . .L . . . .
  473. * 2 L . .L . . . .
  474. * 3 . T TL . . . .
  475. * 4 L . .L . . . .
  476. * 5 L . .. . . . .
  477. */
  478. /* FIXME: constraint_intra_pred & partitioning & nnz
  479. * (let us hope this is just a typo in the spec) */
  480. nnz_cache = h->non_zero_count_cache;
  481. if (top_type) {
  482. nnz = h->non_zero_count[top_xy];
  483. AV_COPY32(&nnz_cache[4 + 8 * 0], &nnz[4 * 3]);
  484. if (!h->chroma_y_shift) {
  485. AV_COPY32(&nnz_cache[4 + 8 * 5], &nnz[4 * 7]);
  486. AV_COPY32(&nnz_cache[4 + 8 * 10], &nnz[4 * 11]);
  487. } else {
  488. AV_COPY32(&nnz_cache[4 + 8 * 5], &nnz[4 * 5]);
  489. AV_COPY32(&nnz_cache[4 + 8 * 10], &nnz[4 * 9]);
  490. }
  491. } else {
  492. uint32_t top_empty = CABAC(h) && !IS_INTRA(mb_type) ? 0 : 0x40404040;
  493. AV_WN32A(&nnz_cache[4 + 8 * 0], top_empty);
  494. AV_WN32A(&nnz_cache[4 + 8 * 5], top_empty);
  495. AV_WN32A(&nnz_cache[4 + 8 * 10], top_empty);
  496. }
  497. for (i = 0; i < 2; i++) {
  498. if (left_type[LEFT(i)]) {
  499. nnz = h->non_zero_count[left_xy[LEFT(i)]];
  500. nnz_cache[3 + 8 * 1 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i]];
  501. nnz_cache[3 + 8 * 2 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i]];
  502. if (CHROMA444(h)) {
  503. nnz_cache[3 + 8 * 6 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] + 4 * 4];
  504. nnz_cache[3 + 8 * 7 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] + 4 * 4];
  505. nnz_cache[3 + 8 * 11 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] + 8 * 4];
  506. nnz_cache[3 + 8 * 12 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] + 8 * 4];
  507. } else if (CHROMA422(h)) {
  508. nnz_cache[3 + 8 * 6 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] - 2 + 4 * 4];
  509. nnz_cache[3 + 8 * 7 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] - 2 + 4 * 4];
  510. nnz_cache[3 + 8 * 11 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] - 2 + 8 * 4];
  511. nnz_cache[3 + 8 * 12 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] - 2 + 8 * 4];
  512. } else {
  513. nnz_cache[3 + 8 * 6 + 8 * i] = nnz[left_block[8 + 4 + 2 * i]];
  514. nnz_cache[3 + 8 * 11 + 8 * i] = nnz[left_block[8 + 5 + 2 * i]];
  515. }
  516. } else {
  517. nnz_cache[3 + 8 * 1 + 2 * 8 * i] =
  518. nnz_cache[3 + 8 * 2 + 2 * 8 * i] =
  519. nnz_cache[3 + 8 * 6 + 2 * 8 * i] =
  520. nnz_cache[3 + 8 * 7 + 2 * 8 * i] =
  521. nnz_cache[3 + 8 * 11 + 2 * 8 * i] =
  522. nnz_cache[3 + 8 * 12 + 2 * 8 * i] = CABAC(h) && !IS_INTRA(mb_type) ? 0 : 64;
  523. }
  524. }
  525. if (CABAC(h)) {
  526. // top_cbp
  527. if (top_type)
  528. h->top_cbp = h->cbp_table[top_xy];
  529. else
  530. h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
  531. // left_cbp
  532. if (left_type[LTOP]) {
  533. h->left_cbp = (h->cbp_table[left_xy[LTOP]] & 0x7F0) |
  534. ((h->cbp_table[left_xy[LTOP]] >> (left_block[0] & (~1))) & 2) |
  535. (((h->cbp_table[left_xy[LBOT]] >> (left_block[2] & (~1))) & 2) << 2);
  536. } else {
  537. h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
  538. }
  539. }
  540. }
  541. if (IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)) {
  542. int list;
  543. int b_stride = h->b_stride;
  544. for (list = 0; list < h->list_count; list++) {
  545. int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
  546. int8_t *ref = h->cur_pic.ref_index[list];
  547. int16_t(*mv_cache)[2] = &h->mv_cache[list][scan8[0]];
  548. int16_t(*mv)[2] = h->cur_pic.motion_val[list];
  549. if (!USES_LIST(mb_type, list))
  550. continue;
  551. assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred));
  552. if (USES_LIST(top_type, list)) {
  553. const int b_xy = h->mb2b_xy[top_xy] + 3 * b_stride;
  554. AV_COPY128(mv_cache[0 - 1 * 8], mv[b_xy + 0]);
  555. ref_cache[0 - 1 * 8] =
  556. ref_cache[1 - 1 * 8] = ref[4 * top_xy + 2];
  557. ref_cache[2 - 1 * 8] =
  558. ref_cache[3 - 1 * 8] = ref[4 * top_xy + 3];
  559. } else {
  560. AV_ZERO128(mv_cache[0 - 1 * 8]);
  561. AV_WN32A(&ref_cache[0 - 1 * 8],
  562. ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE) & 0xFF) * 0x01010101u);
  563. }
  564. if (mb_type & (MB_TYPE_16x8 | MB_TYPE_8x8)) {
  565. for (i = 0; i < 2; i++) {
  566. int cache_idx = -1 + i * 2 * 8;
  567. if (USES_LIST(left_type[LEFT(i)], list)) {
  568. const int b_xy = h->mb2b_xy[left_xy[LEFT(i)]] + 3;
  569. const int b8_xy = 4 * left_xy[LEFT(i)] + 1;
  570. AV_COPY32(mv_cache[cache_idx],
  571. mv[b_xy + b_stride * left_block[0 + i * 2]]);
  572. AV_COPY32(mv_cache[cache_idx + 8],
  573. mv[b_xy + b_stride * left_block[1 + i * 2]]);
  574. ref_cache[cache_idx] = ref[b8_xy + (left_block[0 + i * 2] & ~1)];
  575. ref_cache[cache_idx + 8] = ref[b8_xy + (left_block[1 + i * 2] & ~1)];
  576. } else {
  577. AV_ZERO32(mv_cache[cache_idx]);
  578. AV_ZERO32(mv_cache[cache_idx + 8]);
  579. ref_cache[cache_idx] =
  580. ref_cache[cache_idx + 8] = (left_type[LEFT(i)]) ? LIST_NOT_USED
  581. : PART_NOT_AVAILABLE;
  582. }
  583. }
  584. } else {
  585. if (USES_LIST(left_type[LTOP], list)) {
  586. const int b_xy = h->mb2b_xy[left_xy[LTOP]] + 3;
  587. const int b8_xy = 4 * left_xy[LTOP] + 1;
  588. AV_COPY32(mv_cache[-1], mv[b_xy + b_stride * left_block[0]]);
  589. ref_cache[-1] = ref[b8_xy + (left_block[0] & ~1)];
  590. } else {
  591. AV_ZERO32(mv_cache[-1]);
  592. ref_cache[-1] = left_type[LTOP] ? LIST_NOT_USED
  593. : PART_NOT_AVAILABLE;
  594. }
  595. }
  596. if (USES_LIST(topright_type, list)) {
  597. const int b_xy = h->mb2b_xy[topright_xy] + 3 * b_stride;
  598. AV_COPY32(mv_cache[4 - 1 * 8], mv[b_xy]);
  599. ref_cache[4 - 1 * 8] = ref[4 * topright_xy + 2];
  600. } else {
  601. AV_ZERO32(mv_cache[4 - 1 * 8]);
  602. ref_cache[4 - 1 * 8] = topright_type ? LIST_NOT_USED
  603. : PART_NOT_AVAILABLE;
  604. }
  605. if (ref_cache[4 - 1 * 8] < 0) {
  606. if (USES_LIST(topleft_type, list)) {
  607. const int b_xy = h->mb2b_xy[topleft_xy] + 3 + b_stride +
  608. (h->topleft_partition & 2 * b_stride);
  609. const int b8_xy = 4 * topleft_xy + 1 + (h->topleft_partition & 2);
  610. AV_COPY32(mv_cache[-1 - 1 * 8], mv[b_xy]);
  611. ref_cache[-1 - 1 * 8] = ref[b8_xy];
  612. } else {
  613. AV_ZERO32(mv_cache[-1 - 1 * 8]);
  614. ref_cache[-1 - 1 * 8] = topleft_type ? LIST_NOT_USED
  615. : PART_NOT_AVAILABLE;
  616. }
  617. }
  618. if ((mb_type & (MB_TYPE_SKIP | MB_TYPE_DIRECT2)) && !FRAME_MBAFF(h))
  619. continue;
  620. if (!(mb_type & (MB_TYPE_SKIP | MB_TYPE_DIRECT2))) {
  621. uint8_t(*mvd_cache)[2] = &h->mvd_cache[list][scan8[0]];
  622. uint8_t(*mvd)[2] = h->mvd_table[list];
  623. ref_cache[2 + 8 * 0] =
  624. ref_cache[2 + 8 * 2] = PART_NOT_AVAILABLE;
  625. AV_ZERO32(mv_cache[2 + 8 * 0]);
  626. AV_ZERO32(mv_cache[2 + 8 * 2]);
  627. if (CABAC(h)) {
  628. if (USES_LIST(top_type, list)) {
  629. const int b_xy = h->mb2br_xy[top_xy];
  630. AV_COPY64(mvd_cache[0 - 1 * 8], mvd[b_xy + 0]);
  631. } else {
  632. AV_ZERO64(mvd_cache[0 - 1 * 8]);
  633. }
  634. if (USES_LIST(left_type[LTOP], list)) {
  635. const int b_xy = h->mb2br_xy[left_xy[LTOP]] + 6;
  636. AV_COPY16(mvd_cache[-1 + 0 * 8], mvd[b_xy - left_block[0]]);
  637. AV_COPY16(mvd_cache[-1 + 1 * 8], mvd[b_xy - left_block[1]]);
  638. } else {
  639. AV_ZERO16(mvd_cache[-1 + 0 * 8]);
  640. AV_ZERO16(mvd_cache[-1 + 1 * 8]);
  641. }
  642. if (USES_LIST(left_type[LBOT], list)) {
  643. const int b_xy = h->mb2br_xy[left_xy[LBOT]] + 6;
  644. AV_COPY16(mvd_cache[-1 + 2 * 8], mvd[b_xy - left_block[2]]);
  645. AV_COPY16(mvd_cache[-1 + 3 * 8], mvd[b_xy - left_block[3]]);
  646. } else {
  647. AV_ZERO16(mvd_cache[-1 + 2 * 8]);
  648. AV_ZERO16(mvd_cache[-1 + 3 * 8]);
  649. }
  650. AV_ZERO16(mvd_cache[2 + 8 * 0]);
  651. AV_ZERO16(mvd_cache[2 + 8 * 2]);
  652. if (h->slice_type_nos == AV_PICTURE_TYPE_B) {
  653. uint8_t *direct_cache = &h->direct_cache[scan8[0]];
  654. uint8_t *direct_table = h->direct_table;
  655. fill_rectangle(direct_cache, 4, 4, 8, MB_TYPE_16x16 >> 1, 1);
  656. if (IS_DIRECT(top_type)) {
  657. AV_WN32A(&direct_cache[-1 * 8],
  658. 0x01010101u * (MB_TYPE_DIRECT2 >> 1));
  659. } else if (IS_8X8(top_type)) {
  660. int b8_xy = 4 * top_xy;
  661. direct_cache[0 - 1 * 8] = direct_table[b8_xy + 2];
  662. direct_cache[2 - 1 * 8] = direct_table[b8_xy + 3];
  663. } else {
  664. AV_WN32A(&direct_cache[-1 * 8],
  665. 0x01010101 * (MB_TYPE_16x16 >> 1));
  666. }
  667. if (IS_DIRECT(left_type[LTOP]))
  668. direct_cache[-1 + 0 * 8] = MB_TYPE_DIRECT2 >> 1;
  669. else if (IS_8X8(left_type[LTOP]))
  670. direct_cache[-1 + 0 * 8] = direct_table[4 * left_xy[LTOP] + 1 + (left_block[0] & ~1)];
  671. else
  672. direct_cache[-1 + 0 * 8] = MB_TYPE_16x16 >> 1;
  673. if (IS_DIRECT(left_type[LBOT]))
  674. direct_cache[-1 + 2 * 8] = MB_TYPE_DIRECT2 >> 1;
  675. else if (IS_8X8(left_type[LBOT]))
  676. direct_cache[-1 + 2 * 8] = direct_table[4 * left_xy[LBOT] + 1 + (left_block[2] & ~1)];
  677. else
  678. direct_cache[-1 + 2 * 8] = MB_TYPE_16x16 >> 1;
  679. }
  680. }
  681. }
  682. #define MAP_MVS \
  683. MAP_F2F(scan8[0] - 1 - 1 * 8, topleft_type) \
  684. MAP_F2F(scan8[0] + 0 - 1 * 8, top_type) \
  685. MAP_F2F(scan8[0] + 1 - 1 * 8, top_type) \
  686. MAP_F2F(scan8[0] + 2 - 1 * 8, top_type) \
  687. MAP_F2F(scan8[0] + 3 - 1 * 8, top_type) \
  688. MAP_F2F(scan8[0] + 4 - 1 * 8, topright_type) \
  689. MAP_F2F(scan8[0] - 1 + 0 * 8, left_type[LTOP]) \
  690. MAP_F2F(scan8[0] - 1 + 1 * 8, left_type[LTOP]) \
  691. MAP_F2F(scan8[0] - 1 + 2 * 8, left_type[LBOT]) \
  692. MAP_F2F(scan8[0] - 1 + 3 * 8, left_type[LBOT])
  693. if (FRAME_MBAFF(h)) {
  694. if (MB_FIELD(h)) {
  695. #define MAP_F2F(idx, mb_type) \
  696. if (!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0) { \
  697. h->ref_cache[list][idx] <<= 1; \
  698. h->mv_cache[list][idx][1] /= 2; \
  699. h->mvd_cache[list][idx][1] >>= 1; \
  700. }
  701. MAP_MVS
  702. } else {
  703. #undef MAP_F2F
  704. #define MAP_F2F(idx, mb_type) \
  705. if (IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0) { \
  706. h->ref_cache[list][idx] >>= 1; \
  707. h->mv_cache[list][idx][1] <<= 1; \
  708. h->mvd_cache[list][idx][1] <<= 1; \
  709. }
  710. MAP_MVS
  711. #undef MAP_F2F
  712. }
  713. }
  714. }
  715. }
  716. h->neighbor_transform_size = !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[LTOP]);
  717. }
  718. /**
  719. * decodes a P_SKIP or B_SKIP macroblock
  720. */
  721. static void av_unused decode_mb_skip(H264Context *h)
  722. {
  723. const int mb_xy = h->mb_xy;
  724. int mb_type = 0;
  725. memset(h->non_zero_count[mb_xy], 0, 48);
  726. if (MB_FIELD(h))
  727. mb_type |= MB_TYPE_INTERLACED;
  728. if (h->slice_type_nos == AV_PICTURE_TYPE_B) {
  729. // just for fill_caches. pred_direct_motion will set the real mb_type
  730. mb_type |= MB_TYPE_L0L1 | MB_TYPE_DIRECT2 | MB_TYPE_SKIP;
  731. if (h->direct_spatial_mv_pred) {
  732. fill_decode_neighbors(h, mb_type);
  733. fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ...
  734. }
  735. ff_h264_pred_direct_motion(h, &mb_type);
  736. mb_type |= MB_TYPE_SKIP;
  737. } else {
  738. mb_type |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_SKIP;
  739. fill_decode_neighbors(h, mb_type);
  740. pred_pskip_motion(h);
  741. }
  742. write_back_motion(h, mb_type);
  743. h->cur_pic.mb_type[mb_xy] = mb_type;
  744. h->cur_pic.qscale_table[mb_xy] = h->qscale;
  745. h->slice_table[mb_xy] = h->slice_num;
  746. h->prev_mb_skipped = 1;
  747. }
  748. #endif /* AVCODEC_H264_MVPRED_H */