You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1242 lines
56KB

  1. /*
  2. * VC-1 and WMV3 decoder
  3. * Copyright (c) 2011 Mashiat Sarker Shakkhar
  4. * Copyright (c) 2006-2007 Konstantin Shishkov
  5. * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
  6. *
  7. * This file is part of FFmpeg.
  8. *
  9. * FFmpeg is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2.1 of the License, or (at your option) any later version.
  13. *
  14. * FFmpeg is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with FFmpeg; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. */
  23. /**
  24. * @file
  25. * VC-1 and WMV3 loopfilter
  26. */
  27. #include "avcodec.h"
  28. #include "mpegvideo.h"
  29. #include "vc1.h"
  30. #include "vc1dsp.h"
  31. static av_always_inline void vc1_h_overlap_filter(VC1Context *v, int16_t (*left_block)[64],
  32. int16_t (*right_block)[64], int left_fieldtx,
  33. int right_fieldtx, int block_num)
  34. {
  35. switch (block_num) {
  36. case 0:
  37. v->vc1dsp.vc1_h_s_overlap(left_block[2],
  38. right_block[0],
  39. left_fieldtx ^ right_fieldtx ? 16 - 8 * left_fieldtx : 8,
  40. left_fieldtx ^ right_fieldtx ? 16 - 8 * right_fieldtx : 8,
  41. left_fieldtx || right_fieldtx ? 0 : 1);
  42. break;
  43. case 1:
  44. v->vc1dsp.vc1_h_s_overlap(right_block[0],
  45. right_block[2],
  46. 8,
  47. 8,
  48. right_fieldtx ? 0 : 1);
  49. break;
  50. case 2:
  51. v->vc1dsp.vc1_h_s_overlap(!left_fieldtx && right_fieldtx ? left_block[2] + 8 : left_block[3],
  52. left_fieldtx && !right_fieldtx ? right_block[0] + 8 : right_block[1],
  53. left_fieldtx ^ right_fieldtx ? 16 - 8 * left_fieldtx : 8,
  54. left_fieldtx ^ right_fieldtx ? 16 - 8 * right_fieldtx : 8,
  55. left_fieldtx || right_fieldtx ? 2 : 1);
  56. break;
  57. case 3:
  58. v->vc1dsp.vc1_h_s_overlap(right_block[1],
  59. right_block[3],
  60. 8,
  61. 8,
  62. right_fieldtx ? 2 : 1);
  63. break;
  64. case 4:
  65. case 5:
  66. v->vc1dsp.vc1_h_s_overlap(left_block[block_num], right_block[block_num], 8, 8, 1);
  67. break;
  68. }
  69. }
  70. static av_always_inline void vc1_v_overlap_filter(VC1Context *v, int16_t (*top_block)[64],
  71. int16_t (*bottom_block)[64], int block_num)
  72. {
  73. switch (block_num) {
  74. case 0:
  75. v->vc1dsp.vc1_v_s_overlap(top_block[1], bottom_block[0]);
  76. break;
  77. case 1:
  78. v->vc1dsp.vc1_v_s_overlap(top_block[3], bottom_block[2]);
  79. break;
  80. case 2:
  81. v->vc1dsp.vc1_v_s_overlap(bottom_block[0], bottom_block[1]);
  82. break;
  83. case 3:
  84. v->vc1dsp.vc1_v_s_overlap(bottom_block[2], bottom_block[3]);
  85. break;
  86. case 4:
  87. case 5:
  88. v->vc1dsp.vc1_v_s_overlap(top_block[block_num], bottom_block[block_num]);
  89. break;
  90. }
  91. }
  92. void ff_vc1_i_overlap_filter(VC1Context *v)
  93. {
  94. MpegEncContext *s = &v->s;
  95. int16_t (*topleft_blk)[64], (*top_blk)[64], (*left_blk)[64], (*cur_blk)[64];
  96. int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
  97. int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
  98. int i;
  99. topleft_blk = v->block[v->topleft_blk_idx];
  100. top_blk = v->block[v->top_blk_idx];
  101. left_blk = v->block[v->left_blk_idx];
  102. cur_blk = v->block[v->cur_blk_idx];
  103. /* Within a MB, the horizontal overlap always runs before the vertical.
  104. * To accomplish that, we run the H on the left and internal vertical
  105. * borders of the currently decoded MB. Then, we wait for the next overlap
  106. * iteration to do H overlap on the right edge of this MB, before moving
  107. * over and running the V overlap on the top and internal horizontal
  108. * borders. Therefore, the H overlap trails by one MB col and the
  109. * V overlap trails by one MB row. This is reflected in the time at which
  110. * we run the put_pixels loop, i.e. delayed by one row and one column. */
  111. for (i = 0; i < block_count; i++) {
  112. if (s->mb_x == 0 && (i & 5) != 1)
  113. continue;
  114. if (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
  115. (v->condover == CONDOVER_ALL ||
  116. (v->over_flags_plane[mb_pos] &&
  117. ((i & 5) == 1 || v->over_flags_plane[mb_pos - 1])))))
  118. vc1_h_overlap_filter(v,
  119. s->mb_x ? left_blk : cur_blk, cur_blk,
  120. v->fcm == ILACE_FRAME && s->mb_x && v->fieldtx_plane[mb_pos - 1],
  121. v->fcm == ILACE_FRAME && v->fieldtx_plane[mb_pos],
  122. i);
  123. }
  124. if (v->fcm != ILACE_FRAME)
  125. for (i = 0; i < block_count; i++) {
  126. if (s->first_slice_line && !(i & 2))
  127. continue;
  128. if (s->mb_x &&
  129. (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
  130. (v->condover == CONDOVER_ALL ||
  131. (v->over_flags_plane[mb_pos - 1] &&
  132. ((i & 2) || v->over_flags_plane[mb_pos - 1 - s->mb_stride]))))))
  133. vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
  134. if (s->mb_x == s->mb_width - 1 &&
  135. (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
  136. (v->condover == CONDOVER_ALL ||
  137. (v->over_flags_plane[mb_pos] &&
  138. ((i & 2) || v->over_flags_plane[mb_pos - s->mb_stride]))))))
  139. vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
  140. }
  141. }
  142. void ff_vc1_p_overlap_filter(VC1Context *v)
  143. {
  144. MpegEncContext *s = &v->s;
  145. int16_t (*topleft_blk)[64], (*top_blk)[64], (*left_blk)[64], (*cur_blk)[64];
  146. int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
  147. int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
  148. int i;
  149. topleft_blk = v->block[v->topleft_blk_idx];
  150. top_blk = v->block[v->top_blk_idx];
  151. left_blk = v->block[v->left_blk_idx];
  152. cur_blk = v->block[v->cur_blk_idx];
  153. for (i = 0; i < block_count; i++) {
  154. if (s->mb_x == 0 && (i & 5) != 1)
  155. continue;
  156. if (v->mb_type[0][s->block_index[i]] && v->mb_type[0][s->block_index[i] - 1])
  157. vc1_h_overlap_filter(v,
  158. s->mb_x ? left_blk : cur_blk, cur_blk,
  159. v->fcm == ILACE_FRAME && s->mb_x && v->fieldtx_plane[mb_pos - 1],
  160. v->fcm == ILACE_FRAME && v->fieldtx_plane[mb_pos],
  161. i);
  162. }
  163. if (v->fcm != ILACE_FRAME)
  164. for (i = 0; i < block_count; i++) {
  165. if (s->first_slice_line && !(i & 2))
  166. continue;
  167. if (s->mb_x && v->mb_type[0][s->block_index[i] - 2 + (i > 3)] &&
  168. v->mb_type[0][s->block_index[i] - s->block_wrap[i] - 2 + (i > 3)])
  169. vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
  170. if (s->mb_x == s->mb_width - 1)
  171. if (v->mb_type[0][s->block_index[i]] &&
  172. v->mb_type[0][s->block_index[i] - s->block_wrap[i]])
  173. vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
  174. }
  175. }
  176. #define LEFT_EDGE (1 << 0)
  177. #define RIGHT_EDGE (1 << 1)
  178. #define TOP_EDGE (1 << 2)
  179. #define BOTTOM_EDGE (1 << 3)
  180. static av_always_inline void vc1_i_h_loop_filter(VC1Context *v, uint8_t *dest,
  181. uint32_t flags, int block_num)
  182. {
  183. MpegEncContext *s = &v->s;
  184. int pq = v->pq;
  185. uint8_t *dst;
  186. if (block_num & 2)
  187. return;
  188. if (!(flags & LEFT_EDGE) || (block_num & 5) == 1) {
  189. if (block_num > 3)
  190. dst = dest;
  191. else
  192. dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
  193. if (v->fcm == ILACE_FRAME)
  194. if (block_num > 3) {
  195. v->vc1dsp.vc1_h_loop_filter4(dst, 2 * s->uvlinesize, pq);
  196. v->vc1dsp.vc1_h_loop_filter4(dst + s->uvlinesize, 2 * s->uvlinesize, pq);
  197. } else {
  198. v->vc1dsp.vc1_h_loop_filter8(dst, 2 * s->linesize, pq);
  199. v->vc1dsp.vc1_h_loop_filter8(dst + s->linesize, 2 * s->linesize, pq);
  200. }
  201. else
  202. if (block_num > 3)
  203. v->vc1dsp.vc1_h_loop_filter8(dst, s->uvlinesize, pq);
  204. else
  205. v->vc1dsp.vc1_h_loop_filter16(dst, s->linesize, pq);
  206. }
  207. }
  208. static av_always_inline void vc1_i_v_loop_filter(VC1Context *v, uint8_t *dest,
  209. uint32_t flags, uint8_t fieldtx,
  210. int block_num)
  211. {
  212. MpegEncContext *s = &v->s;
  213. int pq = v->pq;
  214. uint8_t *dst;
  215. if ((block_num & 5) == 1)
  216. return;
  217. if (!(flags & TOP_EDGE) || block_num & 2) {
  218. if (block_num > 3)
  219. dst = dest;
  220. else
  221. dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
  222. if (v->fcm == ILACE_FRAME) {
  223. if (block_num > 3) {
  224. v->vc1dsp.vc1_v_loop_filter8(dst, 2 * s->uvlinesize, pq);
  225. v->vc1dsp.vc1_v_loop_filter8(dst + s->uvlinesize, 2 * s->uvlinesize, pq);
  226. } else if (block_num < 2 || !fieldtx) {
  227. v->vc1dsp.vc1_v_loop_filter16(dst, 2 * s->linesize, pq);
  228. v->vc1dsp.vc1_v_loop_filter16(dst + s->linesize, 2 * s->linesize, pq);
  229. }
  230. } else
  231. if (block_num > 3)
  232. v->vc1dsp.vc1_v_loop_filter8(dst, s->uvlinesize, pq);
  233. else
  234. v->vc1dsp.vc1_v_loop_filter16(dst, s->linesize, pq);
  235. }
  236. }
  237. void ff_vc1_i_loop_filter(VC1Context *v)
  238. {
  239. MpegEncContext *s = &v->s;
  240. int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
  241. int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
  242. uint8_t *dest, fieldtx;
  243. uint32_t flags = 0;
  244. int i;
  245. /* Within a MB, the vertical loop filter always runs before the horizontal.
  246. * To accomplish that, we run the V loop filter on top and internal
  247. * horizontal borders of the last overlap filtered MB. Then, we wait for
  248. * the loop filter iteration on the next row to do V loop filter on the
  249. * bottom edge of this MB, before moving over and running the H loop
  250. * filter on the left and internal vertical borders. Therefore, the loop
  251. * filter trails by one row and one column relative to the overlap filter
  252. * and two rows and two columns relative to the decoding loop. */
  253. if (!s->first_slice_line) {
  254. dest = s->dest[0] - 16 * s->linesize - 16;
  255. flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
  256. if (s->mb_x) {
  257. fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
  258. for (i = 0; i < block_count; i++)
  259. vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, fieldtx, i);
  260. }
  261. if (s->mb_x == v->end_mb_x - 1) {
  262. dest += 16;
  263. fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
  264. for (i = 0; i < block_count; i++)
  265. vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, flags, fieldtx, i);
  266. }
  267. }
  268. if (s->mb_y == s->end_mb_y - 1) {
  269. dest = s->dest[0] - 16;
  270. flags = s->first_slice_line ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
  271. if (s->mb_x) {
  272. fieldtx = v->fieldtx_plane[mb_pos - 1];
  273. for (i = 0; i < block_count; i++)
  274. vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, fieldtx, i);
  275. }
  276. if (s->mb_x == v->end_mb_x - 1) {
  277. dest += 16;
  278. fieldtx = v->fieldtx_plane[mb_pos];
  279. for (i = 0; i < block_count; i++)
  280. vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, flags, fieldtx, i);
  281. }
  282. }
  283. if (s->mb_y >= s->start_mb_y + 2) {
  284. dest = s->dest[0] - 32 * s->linesize - 16;
  285. if (s->mb_x) {
  286. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  287. for (i = 0; i < block_count; i++)
  288. vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest, flags, i);
  289. }
  290. if (s->mb_x == v->end_mb_x - 1) {
  291. dest += 16;
  292. flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
  293. for (i = 0; i < block_count; i++)
  294. vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest, flags, i);
  295. }
  296. }
  297. if (s->mb_y == s->end_mb_y - 1) {
  298. if (s->mb_y >= s->start_mb_y + 1) {
  299. dest = s->dest[0] - 16 * s->linesize - 16;
  300. if (s->mb_x) {
  301. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  302. for (i = 0; i < block_count; i++)
  303. vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, i);
  304. }
  305. if (s->mb_x == v->end_mb_x - 1) {
  306. flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
  307. dest += 16;
  308. for (i = 0; i < block_count; i++)
  309. vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, flags, i);
  310. }
  311. }
  312. dest = s->dest[0] - 16;
  313. if (s->mb_x) {
  314. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  315. for (i = 0; i < block_count; i++)
  316. vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, i);
  317. }
  318. if (s->mb_x == v->end_mb_x - 1) {
  319. dest += 16;
  320. flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
  321. for (i = 0; i < block_count; i++)
  322. vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, flags, i);
  323. }
  324. }
  325. }
  326. static av_always_inline void vc1_p_h_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
  327. uint8_t *is_intra, int16_t (*mv)[2], uint8_t *mv_f,
  328. int *ttblk, uint32_t flags, int block_num)
  329. {
  330. MpegEncContext *s = &v->s;
  331. int pq = v->pq;
  332. uint32_t left_cbp = cbp[0] >> (block_num * 4), right_cbp;
  333. uint8_t left_is_intra, right_is_intra;
  334. int tt;
  335. int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
  336. uint8_t *dst;
  337. if (block_num > 3)
  338. dst = dest;
  339. else
  340. dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
  341. if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
  342. left_is_intra = is_intra[0] & (1 << block_num);
  343. if (block_num > 3) {
  344. right_is_intra = is_intra[1] & (1 << block_num);
  345. right_cbp = cbp[1] >> (block_num * 4);
  346. } else if (block_num & 1) {
  347. right_is_intra = is_intra[1] & (1 << block_num - 1);
  348. right_cbp = cbp[1] >> ((block_num - 1) * 4);
  349. } else {
  350. right_is_intra = is_intra[0] & (1 << block_num + 1);
  351. right_cbp = cbp[0] >> ((block_num + 1) * 4);
  352. }
  353. if (left_is_intra || right_is_intra ||
  354. mv[0][0] != mv[1][0] || mv[0][1] != mv[1][1] ||
  355. (v->fcm == ILACE_FIELD && mv_f[0] != mv_f[1]))
  356. v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
  357. else {
  358. idx = (left_cbp | (right_cbp >> 1)) & 5;
  359. if (idx & 1)
  360. v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 8, linesize, pq);
  361. if (idx & 4)
  362. v->vc1dsp.vc1_h_loop_filter4(dst + 8, linesize, pq);
  363. }
  364. }
  365. tt = ttblk[0] >> (block_num * 4) & 0xf;
  366. if (tt == TT_4X4 || tt == TT_4X8) {
  367. if (left_cbp & 3)
  368. v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
  369. if (left_cbp & 12)
  370. v->vc1dsp.vc1_h_loop_filter4(dst + 4, linesize, pq);
  371. }
  372. }
  373. static av_always_inline void vc1_p_v_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
  374. uint8_t *is_intra, int16_t (*mv)[2], uint8_t *mv_f,
  375. int *ttblk, uint32_t flags, int block_num)
  376. {
  377. MpegEncContext *s = &v->s;
  378. int pq = v->pq;
  379. uint32_t top_cbp = cbp[0] >> (block_num * 4), bottom_cbp;
  380. uint8_t top_is_intra, bottom_is_intra;
  381. int tt;
  382. int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
  383. uint8_t *dst;
  384. if (block_num > 3)
  385. dst = dest;
  386. else
  387. dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
  388. if(!(flags & BOTTOM_EDGE) || block_num < 2) {
  389. top_is_intra = is_intra[0] & (1 << block_num);
  390. if (block_num > 3) {
  391. bottom_is_intra = is_intra[s->mb_stride] & (1 << block_num);
  392. bottom_cbp = cbp[s->mb_stride] >> (block_num * 4);
  393. } else if (block_num < 2) {
  394. bottom_is_intra = is_intra[0] & (1 << block_num + 2);
  395. bottom_cbp = cbp[0] >> ((block_num + 2) * 4);
  396. } else {
  397. bottom_is_intra = is_intra[s->mb_stride] & (1 << block_num - 2);
  398. bottom_cbp = cbp[s->mb_stride] >> ((block_num - 2) * 4);
  399. }
  400. if (top_is_intra || bottom_is_intra ||
  401. mv[0][0] != mv[block_num > 3 ? s->mb_stride : s->b8_stride][0] ||
  402. mv[0][1] != mv[block_num > 3 ? s->mb_stride : s->b8_stride][1] ||
  403. (v->fcm == ILACE_FIELD && mv_f[0] != mv_f[block_num > 3 ? s->mb_stride : s->b8_stride]))
  404. v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, linesize, pq);
  405. else {
  406. idx = (top_cbp | (bottom_cbp >> 2)) & 3;
  407. if (idx & 1)
  408. v->vc1dsp.vc1_v_loop_filter4(dst + 8 * linesize + 4, linesize, pq);
  409. if (idx & 2)
  410. v->vc1dsp.vc1_v_loop_filter4(dst + 8 * linesize, linesize, pq);
  411. }
  412. }
  413. tt = ttblk[0] >> (block_num * 4) & 0xf;
  414. if (tt == TT_4X4 || tt == TT_8X4) {
  415. if (top_cbp & 5)
  416. v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
  417. if (top_cbp & 10)
  418. v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize, linesize, pq);
  419. }
  420. }
  421. void ff_vc1_p_loop_filter(VC1Context *v)
  422. {
  423. MpegEncContext *s = &v->s;
  424. int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
  425. uint8_t *dest;
  426. uint32_t *cbp;
  427. uint8_t *is_intra;
  428. int16_t (*uvmv)[2];
  429. int *ttblk;
  430. uint32_t flags;
  431. int i;
  432. /* Within a MB, the vertical loop filter always runs before the horizontal.
  433. * To accomplish that, we run the V loop filter on all applicable
  434. * horizontal borders of the MB above the last overlap filtered MB. Then,
  435. * we wait for the next loop filter iteration to do H loop filter on all
  436. * applicable vertical borders of this MB. Therefore, the loop filter
  437. * trails by one row and one column relative to the overlap filter and two
  438. * rows and two columns relative to the decoding loop. */
  439. if (s->mb_y >= s->start_mb_y + 2) {
  440. if (s->mb_x) {
  441. dest = s->dest[0] - 32 * s->linesize - 16;
  442. cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 1];
  443. is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 1];
  444. uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 1];
  445. ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
  446. flags = s->mb_y == s->start_mb_y + 2 ? TOP_EDGE : 0;
  447. for (i = 0; i < block_count; i++)
  448. vc1_p_v_loop_filter(v,
  449. i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
  450. cbp,
  451. is_intra,
  452. i > 3 ? uvmv :
  453. &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
  454. i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 1 + v->mb_off] :
  455. &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
  456. ttblk,
  457. flags,
  458. i);
  459. }
  460. if (s->mb_x == s->mb_width - 1) {
  461. dest = s->dest[0] - 32 * s->linesize;
  462. cbp = &v->cbp[s->mb_x - 2 * s->mb_stride];
  463. is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride];
  464. uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride];
  465. ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
  466. flags = s->mb_y == s->start_mb_y + 2 ? TOP_EDGE : 0;
  467. for (i = 0; i < block_count; i++)
  468. vc1_p_v_loop_filter(v,
  469. i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
  470. cbp,
  471. is_intra,
  472. i > 3 ? uvmv :
  473. &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
  474. i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride + v->mb_off] :
  475. &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
  476. ttblk,
  477. flags,
  478. i);
  479. }
  480. }
  481. if (s->mb_y == s->end_mb_y - 1) {
  482. if (s->mb_x) {
  483. if (s->mb_y >= s->start_mb_y + 1) {
  484. dest = s->dest[0] - 16 * s->linesize - 16;
  485. cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
  486. is_intra = &v->is_intra[s->mb_x - s->mb_stride - 1];
  487. uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
  488. ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
  489. flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
  490. for (i = 0; i < block_count; i++)
  491. vc1_p_v_loop_filter(v,
  492. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
  493. cbp,
  494. is_intra,
  495. i > 3 ? uvmv :
  496. &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
  497. i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 1 + v->mb_off] :
  498. &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
  499. ttblk,
  500. flags,
  501. i);
  502. }
  503. dest = s->dest[0] - 16;
  504. cbp = &v->cbp[s->mb_x - 1];
  505. is_intra = &v->is_intra[s->mb_x - 1];
  506. uvmv = &v->luma_mv[s->mb_x - 1];
  507. ttblk = &v->ttblk[s->mb_x - 1];
  508. flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
  509. for (i = 0; i < block_count; i++)
  510. vc1_p_v_loop_filter(v,
  511. i > 3 ? s->dest[i - 3] - 8 : dest,
  512. cbp,
  513. is_intra,
  514. i > 3 ? uvmv :
  515. &s->current_picture.motion_val[0][s->block_index[i] - 2 + v->blocks_off],
  516. i > 3 ? &v->mv_f[0][s->block_index[i] - 1 + v->mb_off] :
  517. &v->mv_f[0][s->block_index[i] - 2 + v->blocks_off],
  518. ttblk,
  519. flags,
  520. i);
  521. }
  522. if (s->mb_x == s->mb_width - 1) {
  523. if (s->mb_y >= s->start_mb_y + 1) {
  524. dest = s->dest[0] - 16 * s->linesize;
  525. cbp = &v->cbp[s->mb_x - s->mb_stride];
  526. is_intra = &v->is_intra[s->mb_x - s->mb_stride];
  527. uvmv = &v->luma_mv[s->mb_x - s->mb_stride];
  528. ttblk = &v->ttblk[s->mb_x - s->mb_stride];
  529. flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
  530. for (i = 0; i < block_count; i++)
  531. vc1_p_v_loop_filter(v,
  532. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
  533. cbp,
  534. is_intra,
  535. i > 3 ? uvmv :
  536. &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
  537. i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride + v->mb_off] :
  538. &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
  539. ttblk,
  540. flags,
  541. i);
  542. }
  543. dest = s->dest[0];
  544. cbp = &v->cbp[s->mb_x];
  545. is_intra = &v->is_intra[s->mb_x];
  546. uvmv = &v->luma_mv[s->mb_x];
  547. ttblk = &v->ttblk[s->mb_x];
  548. flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
  549. for (i = 0; i < block_count; i++)
  550. vc1_p_v_loop_filter(v,
  551. i > 3 ? s->dest[i - 3] : dest,
  552. cbp,
  553. is_intra,
  554. i > 3 ? uvmv :
  555. &s->current_picture.motion_val[0][s->block_index[i] + v->blocks_off],
  556. i > 3 ? &v->mv_f[0][s->block_index[i] + v->mb_off] :
  557. &v->mv_f[0][s->block_index[i] + v->blocks_off],
  558. ttblk,
  559. flags,
  560. i);
  561. }
  562. }
  563. if (s->mb_y >= s->start_mb_y + 2) {
  564. if (s->mb_x >= 2) {
  565. dest = s->dest[0] - 32 * s->linesize - 32;
  566. cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 2];
  567. is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 2];
  568. uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 2];
  569. ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 2];
  570. flags = s->mb_x == 2 ? LEFT_EDGE : 0;
  571. for (i = 0; i < block_count; i++)
  572. vc1_p_h_loop_filter(v,
  573. i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 16 : dest,
  574. cbp,
  575. is_intra,
  576. i > 3 ? uvmv :
  577. &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 4 + v->blocks_off],
  578. i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 2 + v->mb_off] :
  579. &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 4 + v->blocks_off],
  580. ttblk,
  581. flags,
  582. i);
  583. }
  584. if (s->mb_x == s->mb_width - 1) {
  585. if (s->mb_x >= 1) {
  586. dest = s->dest[0] - 32 * s->linesize - 16;
  587. cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 1];
  588. is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 1];
  589. uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 1];
  590. ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
  591. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  592. for (i = 0; i < block_count; i++)
  593. vc1_p_h_loop_filter(v,
  594. i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
  595. cbp,
  596. is_intra,
  597. i > 3 ? uvmv :
  598. &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
  599. i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 1 + v->mb_off] :
  600. &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
  601. ttblk,
  602. flags,
  603. i);
  604. }
  605. dest = s->dest[0] - 32 * s->linesize;
  606. cbp = &v->cbp[s->mb_x - 2 * s->mb_stride];
  607. is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride];
  608. uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride];
  609. ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
  610. flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
  611. for (i = 0; i < block_count; i++)
  612. vc1_p_h_loop_filter(v,
  613. i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
  614. cbp,
  615. is_intra,
  616. i > 3 ? uvmv :
  617. &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
  618. i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride + v->mb_off] :
  619. &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
  620. ttblk,
  621. flags,
  622. i);
  623. }
  624. }
  625. if (s->mb_y == s->end_mb_y - 1) {
  626. if (s->mb_y >= s->start_mb_y + 1) {
  627. if (s->mb_x >= 2) {
  628. dest = s->dest[0] - 16 * s->linesize - 32;
  629. cbp = &v->cbp[s->mb_x - s->mb_stride - 2];
  630. is_intra = &v->is_intra[s->mb_x - s->mb_stride - 2];
  631. uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 2];
  632. ttblk = &v->ttblk[s->mb_x - s->mb_stride - 2];
  633. flags = s->mb_x == 2 ? LEFT_EDGE : 0;
  634. for (i = 0; i < block_count; i++)
  635. vc1_p_h_loop_filter(v,
  636. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 16 : dest,
  637. cbp,
  638. is_intra,
  639. i > 3 ? uvmv :
  640. &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 4 + v->blocks_off],
  641. i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 2 + v->mb_off] :
  642. &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 4 + v->blocks_off],
  643. ttblk,
  644. flags,
  645. i);
  646. }
  647. if (s->mb_x == s->mb_width - 1) {
  648. if (s->mb_x >= 1) {
  649. dest = s->dest[0] - 16 * s->linesize - 16;
  650. cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
  651. is_intra = &v->is_intra[s->mb_x - s->mb_stride - 1];
  652. uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
  653. ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
  654. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  655. for (i = 0; i < block_count; i++)
  656. vc1_p_h_loop_filter(v,
  657. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
  658. cbp,
  659. is_intra,
  660. i > 3 ? uvmv :
  661. &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
  662. i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 1 + v->mb_off] :
  663. &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
  664. ttblk,
  665. flags,
  666. i);
  667. }
  668. dest = s->dest[0] - 16 * s->linesize;
  669. cbp = &v->cbp[s->mb_x - s->mb_stride];
  670. is_intra = &v->is_intra[s->mb_x - s->mb_stride];
  671. uvmv = &v->luma_mv[s->mb_x - s->mb_stride];
  672. ttblk = &v->ttblk[s->mb_x - s->mb_stride];
  673. flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
  674. for (i = 0; i < block_count; i++)
  675. vc1_p_h_loop_filter(v,
  676. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
  677. cbp,
  678. is_intra,
  679. i > 3 ? uvmv :
  680. &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
  681. i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride + v->mb_off] :
  682. &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
  683. ttblk,
  684. flags,
  685. i);
  686. }
  687. }
  688. if (s->mb_x >= 2) {
  689. dest = s->dest[0] - 32;
  690. cbp = &v->cbp[s->mb_x - 2];
  691. is_intra = &v->is_intra[s->mb_x - 2];
  692. uvmv = &v->luma_mv[s->mb_x - 2];
  693. ttblk = &v->ttblk[s->mb_x - 2];
  694. flags = s->mb_x == 2 ? LEFT_EDGE : 0;
  695. for (i = 0; i < block_count; i++)
  696. vc1_p_h_loop_filter(v,
  697. i > 3 ? s->dest[i - 3] - 16 : dest,
  698. cbp,
  699. is_intra,
  700. i > 3 ? uvmv :
  701. &s->current_picture.motion_val[0][s->block_index[i] - 4 + v->blocks_off],
  702. i > 3 ? &v->mv_f[0][s->block_index[i] - 2 + v->mb_off] :
  703. &v->mv_f[0][s->block_index[i] - 4 + v->blocks_off],
  704. ttblk,
  705. flags,
  706. i);
  707. }
  708. if (s->mb_x == s->mb_width - 1) {
  709. if (s->mb_x >= 1) {
  710. dest = s->dest[0] - 16;
  711. cbp = &v->cbp[s->mb_x - 1];
  712. is_intra = &v->is_intra[s->mb_x - 1];
  713. uvmv = &v->luma_mv[s->mb_x - 1];
  714. ttblk = &v->ttblk[s->mb_x - 1];
  715. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  716. for (i = 0; i < block_count; i++)
  717. vc1_p_h_loop_filter(v,
  718. i > 3 ? s->dest[i - 3] - 8 : dest,
  719. cbp,
  720. is_intra,
  721. i > 3 ? uvmv :
  722. &s->current_picture.motion_val[0][s->block_index[i] - 2 + v->blocks_off],
  723. i > 3 ? &v->mv_f[0][s->block_index[i] - 1 + v->mb_off] :
  724. &v->mv_f[0][s->block_index[i] - 2 + v->blocks_off],
  725. ttblk,
  726. flags,
  727. i);
  728. }
  729. dest = s->dest[0];
  730. cbp = &v->cbp[s->mb_x];
  731. is_intra = &v->is_intra[s->mb_x];
  732. uvmv = &v->luma_mv[s->mb_x];
  733. ttblk = &v->ttblk[s->mb_x];
  734. flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
  735. for (i = 0; i < block_count; i++)
  736. vc1_p_h_loop_filter(v,
  737. i > 3 ? s->dest[i - 3] : dest,
  738. cbp,
  739. is_intra,
  740. i > 3 ? uvmv :
  741. &s->current_picture.motion_val[0][s->block_index[i] + v->blocks_off],
  742. i > 3 ? &v->mv_f[0][s->block_index[i] + v->mb_off] :
  743. &v->mv_f[0][s->block_index[i] + v->blocks_off],
  744. ttblk,
  745. flags,
  746. i);
  747. }
  748. }
  749. }
  750. static av_always_inline void vc1_p_h_intfr_loop_filter(VC1Context *v, uint8_t *dest, int *ttblk,
  751. uint32_t flags, uint8_t fieldtx, int block_num)
  752. {
  753. MpegEncContext *s = &v->s;
  754. int pq = v->pq;
  755. int tt;
  756. int linesize = block_num > 3 ? s->uvlinesize : s->linesize;
  757. uint8_t *dst;
  758. if (block_num > 3)
  759. dst = dest;
  760. else
  761. dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
  762. tt = ttblk[0] >> (block_num * 4) & 0xf;
  763. if (block_num < 4) {
  764. if (fieldtx) {
  765. if (block_num < 2) {
  766. if (tt == TT_4X4 || tt == TT_4X8)
  767. v->vc1dsp.vc1_h_loop_filter8(dst + 4, 2 * linesize, pq);
  768. if (!(flags & RIGHT_EDGE) || block_num == 0)
  769. v->vc1dsp.vc1_h_loop_filter8(dst + 8, 2 * linesize, pq);
  770. } else {
  771. if (tt == TT_4X4 || tt == TT_4X8)
  772. v->vc1dsp.vc1_h_loop_filter8(dst - 7 * linesize + 4, 2 * linesize, pq);
  773. if (!(flags & RIGHT_EDGE) || block_num == 2)
  774. v->vc1dsp.vc1_h_loop_filter8(dst - 7 * linesize + 8, 2 * linesize, pq);
  775. }
  776. } else {
  777. if(tt == TT_4X4 || tt == TT_4X8) {
  778. v->vc1dsp.vc1_h_loop_filter4(dst + 4, 2 * linesize, pq);
  779. v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 4, 2 * linesize, pq);
  780. }
  781. if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
  782. v->vc1dsp.vc1_h_loop_filter4(dst + 8, 2 * linesize, pq);
  783. v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 8, 2 * linesize, pq);
  784. }
  785. }
  786. } else {
  787. if (tt == TT_4X4 || tt == TT_4X8) {
  788. v->vc1dsp.vc1_h_loop_filter4(dst + 4, 2 * linesize, pq);
  789. v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 4, 2 * linesize, pq);
  790. }
  791. if (!(flags & RIGHT_EDGE)) {
  792. v->vc1dsp.vc1_h_loop_filter4(dst + 8, 2 * linesize, pq);
  793. v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 8, 2 * linesize, pq);
  794. }
  795. }
  796. }
  797. static av_always_inline void vc1_p_v_intfr_loop_filter(VC1Context *v, uint8_t *dest, int *ttblk,
  798. uint32_t flags, uint8_t fieldtx, int block_num)
  799. {
  800. MpegEncContext *s = &v->s;
  801. int pq = v->pq;
  802. int tt;
  803. int linesize = block_num > 3 ? s->uvlinesize : s->linesize;
  804. uint8_t *dst;
  805. if (block_num > 3)
  806. dst = dest;
  807. else
  808. dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
  809. tt = ttblk[0] >> (block_num * 4) & 0xf;
  810. if (block_num < 4) {
  811. if (fieldtx) {
  812. if (block_num < 2) {
  813. if (tt == TT_4X4 || tt == TT_8X4)
  814. v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
  815. if (!(flags & BOTTOM_EDGE))
  816. v->vc1dsp.vc1_v_loop_filter8(dst + 16 * linesize, 2 * linesize, pq);
  817. } else {
  818. if (tt == TT_4X4 || tt == TT_8X4)
  819. v->vc1dsp.vc1_v_loop_filter8(dst + linesize, 2 * linesize, pq);
  820. if (!(flags & BOTTOM_EDGE))
  821. v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
  822. }
  823. } else {
  824. if (block_num < 2) {
  825. if (!(flags & TOP_EDGE) && (tt == TT_4X4 || tt == TT_8X4)) {
  826. v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
  827. v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
  828. }
  829. v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
  830. v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
  831. } else if (!(flags & BOTTOM_EDGE)) {
  832. if (tt == TT_4X4 || tt == TT_8X4) {
  833. v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
  834. v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
  835. }
  836. v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
  837. v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
  838. }
  839. }
  840. } else {
  841. if (!(flags & BOTTOM_EDGE)) {
  842. if (!(flags & TOP_EDGE) && (tt == TT_4X4 || tt == TT_8X4)) {
  843. v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
  844. v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
  845. }
  846. v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
  847. v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
  848. }
  849. }
  850. }
  851. void ff_vc1_p_intfr_loop_filter(VC1Context *v)
  852. {
  853. MpegEncContext *s = &v->s;
  854. int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
  855. int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
  856. uint8_t *dest;
  857. int *ttblk;
  858. uint32_t flags;
  859. uint8_t fieldtx;
  860. int i;
  861. /* Within a MB, the vertical loop filter always runs before the horizontal.
  862. * To accomplish that, we run the V loop filter on all applicable
  863. * horizontal borders of the MB above the last overlap filtered MB. Then,
  864. * we wait for the loop filter iteration on the next row and next column to
  865. * do H loop filter on all applicable vertical borders of this MB.
  866. * Therefore, the loop filter trails by two rows and one column relative to
  867. * the overlap filter and two rows and two columns relative to the decoding
  868. * loop. */
  869. if (s->mb_x) {
  870. if (s->mb_y >= s->start_mb_y + 1) {
  871. dest = s->dest[0] - 16 * s->linesize - 16;
  872. ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
  873. flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
  874. fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
  875. for (i = 0; i < block_count; i++)
  876. vc1_p_v_intfr_loop_filter(v,
  877. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
  878. ttblk,
  879. flags,
  880. fieldtx,
  881. i);
  882. }
  883. }
  884. if (s->mb_x == s->mb_width - 1) {
  885. if (s->mb_y >= s->start_mb_y + 1) {
  886. dest = s->dest[0] - 16 * s->linesize;
  887. ttblk = &v->ttblk[s->mb_x - s->mb_stride];
  888. flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
  889. fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
  890. for (i = 0; i < block_count; i++)
  891. vc1_p_v_intfr_loop_filter(v,
  892. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
  893. ttblk,
  894. flags,
  895. fieldtx,
  896. i);
  897. }
  898. }
  899. if (s->mb_y == s->end_mb_y - 1) {
  900. if (s->mb_x) {
  901. dest = s->dest[0] - 16;
  902. ttblk = &v->ttblk[s->mb_x - 1];
  903. flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
  904. fieldtx = v->fieldtx_plane[mb_pos - 1];
  905. for (i = 0; i < block_count; i++)
  906. vc1_p_v_intfr_loop_filter(v,
  907. i > 3 ? s->dest[i - 3] - 8 : dest,
  908. ttblk,
  909. flags,
  910. fieldtx,
  911. i);
  912. }
  913. if (s->mb_x == s->mb_width - 1) {
  914. dest = s->dest[0];
  915. ttblk = &v->ttblk[s->mb_x];
  916. flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
  917. fieldtx = v->fieldtx_plane[mb_pos];
  918. for (i = 0; i < block_count; i++)
  919. vc1_p_v_intfr_loop_filter(v,
  920. i > 3 ? s->dest[i - 3] : dest,
  921. ttblk,
  922. flags,
  923. fieldtx,
  924. i);
  925. }
  926. }
  927. if (s->mb_y >= s->start_mb_y + 2) {
  928. if (s->mb_x >= 2) {
  929. dest = s->dest[0] - 32 * s->linesize - 32;
  930. ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 2];
  931. flags = s->mb_x == 2 ? LEFT_EDGE : 0;
  932. fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride - 2];
  933. for (i = 0; i < block_count; i++)
  934. vc1_p_h_intfr_loop_filter(v,
  935. i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 16 : dest,
  936. ttblk,
  937. flags,
  938. fieldtx,
  939. i);
  940. }
  941. if (s->mb_x == s->mb_width - 1) {
  942. if (s->mb_x >= 1) {
  943. dest = s->dest[0] - 32 * s->linesize - 16;
  944. ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
  945. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  946. fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride - 1];
  947. for (i = 0; i < block_count; i++)
  948. vc1_p_h_intfr_loop_filter(v,
  949. i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
  950. ttblk,
  951. flags,
  952. fieldtx,
  953. i);
  954. }
  955. dest = s->dest[0] - 32 * s->linesize;
  956. ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
  957. flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
  958. fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride];
  959. for (i = 0; i < block_count; i++)
  960. vc1_p_h_intfr_loop_filter(v,
  961. i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
  962. ttblk,
  963. flags,
  964. fieldtx,
  965. i);
  966. }
  967. }
  968. if (s->mb_y == s->end_mb_y - 1) {
  969. if (s->mb_y >= s->start_mb_y + 1) {
  970. if (s->mb_x >= 2) {
  971. dest = s->dest[0] - 16 * s->linesize - 32;
  972. ttblk = &v->ttblk[s->mb_x - s->mb_stride - 2];
  973. flags = s->mb_x == 2 ? LEFT_EDGE : 0;
  974. fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 2];
  975. for (i = 0; i < block_count; i++)
  976. vc1_p_h_intfr_loop_filter(v,
  977. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 16 : dest,
  978. ttblk,
  979. flags,
  980. fieldtx,
  981. i);
  982. }
  983. if (s->mb_x == s->mb_width - 1) {
  984. if (s->mb_x >= 1) {
  985. dest = s->dest[0] - 16 * s->linesize - 16;
  986. ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
  987. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  988. fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
  989. for (i = 0; i < block_count; i++)
  990. vc1_p_h_intfr_loop_filter(v,
  991. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
  992. ttblk,
  993. flags,
  994. fieldtx,
  995. i);
  996. }
  997. dest = s->dest[0] - 16 * s->linesize;
  998. ttblk = &v->ttblk[s->mb_x - s->mb_stride];
  999. flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
  1000. fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
  1001. for (i = 0; i < block_count; i++)
  1002. vc1_p_h_intfr_loop_filter(v,
  1003. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
  1004. ttblk,
  1005. flags,
  1006. fieldtx,
  1007. i);
  1008. }
  1009. }
  1010. if (s->mb_x >= 2) {
  1011. dest = s->dest[0] - 32;
  1012. ttblk = &v->ttblk[s->mb_x - 2];
  1013. flags = s->mb_x == 2 ? LEFT_EDGE : 0;
  1014. fieldtx = v->fieldtx_plane[mb_pos - 2];
  1015. for (i = 0; i < block_count; i++)
  1016. vc1_p_h_intfr_loop_filter(v,
  1017. i > 3 ? s->dest[i - 3] - 16 : dest,
  1018. ttblk,
  1019. flags,
  1020. fieldtx,
  1021. i);
  1022. }
  1023. if (s->mb_x == s->mb_width - 1) {
  1024. if (s->mb_x >= 1) {
  1025. dest = s->dest[0] - 16;
  1026. ttblk = &v->ttblk[s->mb_x - 1];
  1027. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  1028. fieldtx = v->fieldtx_plane[mb_pos - 1];
  1029. for (i = 0; i < block_count; i++)
  1030. vc1_p_h_intfr_loop_filter(v,
  1031. i > 3 ? s->dest[i - 3] - 8 : dest,
  1032. ttblk,
  1033. flags,
  1034. fieldtx,
  1035. i);
  1036. }
  1037. dest = s->dest[0];
  1038. ttblk = &v->ttblk[s->mb_x];
  1039. flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
  1040. fieldtx = v->fieldtx_plane[mb_pos];
  1041. for (i = 0; i < block_count; i++)
  1042. vc1_p_h_intfr_loop_filter(v,
  1043. i > 3 ? s->dest[i - 3] : dest,
  1044. ttblk,
  1045. flags,
  1046. fieldtx,
  1047. i);
  1048. }
  1049. }
  1050. }
  1051. static av_always_inline void vc1_b_h_intfi_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
  1052. int *ttblk, uint32_t flags, int block_num)
  1053. {
  1054. MpegEncContext *s = &v->s;
  1055. int pq = v->pq;
  1056. uint8_t *dst;
  1057. uint32_t block_cbp = cbp[0] >> (block_num * 4);
  1058. int tt;
  1059. int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
  1060. if (block_num > 3)
  1061. dst = dest;
  1062. else
  1063. dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
  1064. if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
  1065. if (block_num > 3)
  1066. v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
  1067. else
  1068. v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
  1069. }
  1070. tt = ttblk[0] >> (block_num * 4) & 0xf;
  1071. if (tt == TT_4X4 || tt == TT_4X8) {
  1072. idx = (block_cbp | (block_cbp >> 1)) & 5;
  1073. if (idx & 1)
  1074. v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
  1075. if (idx & 4)
  1076. v->vc1dsp.vc1_h_loop_filter4(dst + 4, linesize, pq);
  1077. }
  1078. }
  1079. static av_always_inline void vc1_b_v_intfi_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
  1080. int *ttblk, uint32_t flags, int block_num)
  1081. {
  1082. MpegEncContext *s = &v->s;
  1083. int pq = v->pq;
  1084. uint8_t *dst;
  1085. uint32_t block_cbp = cbp[0] >> (block_num * 4);
  1086. int tt;
  1087. int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
  1088. if (block_num > 3)
  1089. dst = dest;
  1090. else
  1091. dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
  1092. if(!(flags & BOTTOM_EDGE) || block_num < 2)
  1093. v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, linesize, pq);
  1094. tt = ttblk[0] >> (block_num * 4) & 0xf;
  1095. if (tt == TT_4X4 || tt == TT_8X4) {
  1096. idx = (block_cbp | (block_cbp >> 2)) & 3;
  1097. if (idx & 1)
  1098. v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
  1099. if (idx & 2)
  1100. v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize, linesize, pq);
  1101. }
  1102. }
  1103. void ff_vc1_b_intfi_loop_filter(VC1Context *v)
  1104. {
  1105. MpegEncContext *s = &v->s;
  1106. int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
  1107. uint8_t *dest;
  1108. uint32_t *cbp;
  1109. int *ttblk;
  1110. uint32_t flags = 0;
  1111. int i;
  1112. /* Within a MB, the vertical loop filter always runs before the horizontal.
  1113. * To accomplish that, we run the V loop filter on all applicable
  1114. * horizontal borders of the MB above the currently decoded MB. Then,
  1115. * we wait for the next loop filter iteration to do H loop filter on all
  1116. * applicable vertical borders of this MB. Therefore, the loop filter
  1117. * trails by one row and one column relative to the decoding loop. */
  1118. if (!s->first_slice_line) {
  1119. dest = s->dest[0] - 16 * s->linesize;
  1120. cbp = &v->cbp[s->mb_x - s->mb_stride];
  1121. ttblk = &v->ttblk[s->mb_x - s->mb_stride];
  1122. flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
  1123. for (i = 0; i < block_count; i++)
  1124. vc1_b_v_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, cbp, ttblk, flags, i);
  1125. }
  1126. if (s->mb_y == s->end_mb_y - 1) {
  1127. dest = s->dest[0];
  1128. cbp = &v->cbp[s->mb_x];
  1129. ttblk = &v->ttblk[s->mb_x];
  1130. flags = s->first_slice_line ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
  1131. for (i = 0; i < block_count; i++)
  1132. vc1_b_v_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, cbp, ttblk, flags, i);
  1133. }
  1134. if (!s->first_slice_line) {
  1135. dest = s->dest[0] - 16 * s->linesize - 16;
  1136. cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
  1137. ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
  1138. if (s->mb_x) {
  1139. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  1140. for (i = 0; i < block_count; i++)
  1141. vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, cbp, ttblk, flags, i);
  1142. }
  1143. if (s->mb_x == s->mb_width - 1) {
  1144. dest += 16;
  1145. cbp++;
  1146. ttblk++;
  1147. flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
  1148. for (i = 0; i < block_count; i++)
  1149. vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, cbp, ttblk, flags, i);
  1150. }
  1151. }
  1152. if (s->mb_y == s->end_mb_y - 1) {
  1153. dest = s->dest[0] - 16;
  1154. cbp = &v->cbp[s->mb_x - 1];
  1155. ttblk = &v->ttblk[s->mb_x - 1];
  1156. if (s->mb_x) {
  1157. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  1158. for (i = 0; i < block_count; i++)
  1159. vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, cbp, ttblk, flags, i);
  1160. }
  1161. if (s->mb_x == s->mb_width - 1) {
  1162. dest += 16;
  1163. cbp++;
  1164. ttblk++;
  1165. flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
  1166. for (i = 0; i < block_count; i++)
  1167. vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, cbp, ttblk, flags, i);
  1168. }
  1169. }
  1170. }