You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1199 lines
55KB

  1. /*
  2. * VC-1 and WMV3 decoder
  3. * Copyright (c) 2011 Mashiat Sarker Shakkhar
  4. * Copyright (c) 2006-2007 Konstantin Shishkov
  5. * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
  6. *
  7. * This file is part of FFmpeg.
  8. *
  9. * FFmpeg is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2.1 of the License, or (at your option) any later version.
  13. *
  14. * FFmpeg is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with FFmpeg; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. */
  23. /**
  24. * @file
  25. * VC-1 and WMV3 loopfilter
  26. */
  27. #include "avcodec.h"
  28. #include "mpegvideo.h"
  29. #include "vc1.h"
  30. #include "vc1dsp.h"
  31. void ff_vc1_loop_filter_iblk(VC1Context *v, int pq)
  32. {
  33. MpegEncContext *s = &v->s;
  34. int j;
  35. if (!s->first_slice_line) {
  36. v->vc1dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq);
  37. if (s->mb_x)
  38. v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
  39. v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
  40. if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
  41. for (j = 0; j < 2; j++) {
  42. v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1], s->uvlinesize, pq);
  43. if (s->mb_x)
  44. v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
  45. }
  46. }
  47. v->vc1dsp.vc1_v_loop_filter16(s->dest[0] + 8 * s->linesize, s->linesize, pq);
  48. if (s->mb_y == s->end_mb_y - 1) {
  49. if (s->mb_x) {
  50. v->vc1dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq);
  51. if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
  52. v->vc1dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq);
  53. v->vc1dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq);
  54. }
  55. }
  56. v->vc1dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq);
  57. }
  58. }
  59. static av_always_inline void vc1_h_overlap_filter(VC1Context *v, int16_t (*left_block)[64],
  60. int16_t (*right_block)[64], int block_num)
  61. {
  62. if (left_block != right_block || (block_num & 5) == 1) {
  63. if (block_num > 3)
  64. v->vc1dsp.vc1_h_s_overlap(left_block[block_num], right_block[block_num]);
  65. else if (block_num & 1)
  66. v->vc1dsp.vc1_h_s_overlap(right_block[block_num - 1], right_block[block_num]);
  67. else
  68. v->vc1dsp.vc1_h_s_overlap(left_block[block_num + 1], right_block[block_num]);
  69. }
  70. }
  71. static av_always_inline void vc1_v_overlap_filter(VC1Context *v, int16_t (*top_block)[64],
  72. int16_t (*bottom_block)[64], int block_num)
  73. {
  74. if (top_block != bottom_block || block_num & 2) {
  75. if (block_num > 3)
  76. v->vc1dsp.vc1_v_s_overlap(top_block[block_num], bottom_block[block_num]);
  77. else if (block_num & 2)
  78. v->vc1dsp.vc1_v_s_overlap(bottom_block[block_num - 2], bottom_block[block_num]);
  79. else
  80. v->vc1dsp.vc1_v_s_overlap(top_block[block_num + 2], bottom_block[block_num]);
  81. }
  82. }
  83. void ff_vc1_i_overlap_filter(VC1Context *v)
  84. {
  85. MpegEncContext *s = &v->s;
  86. int16_t (*topleft_blk)[64], (*top_blk)[64], (*left_blk)[64], (*cur_blk)[64];
  87. int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
  88. int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
  89. int i;
  90. topleft_blk = v->block[v->topleft_blk_idx];
  91. top_blk = v->block[v->top_blk_idx];
  92. left_blk = v->block[v->left_blk_idx];
  93. cur_blk = v->block[v->cur_blk_idx];
  94. /* Within a MB, the horizontal overlap always runs before the vertical.
  95. * To accomplish that, we run the H on the left and internal vertical
  96. * borders of the currently decoded MB. Then, we wait for the next overlap
  97. * iteration to do H overlap on the right edge of this MB, before moving
  98. * over and running the V overlap on the top and internal horizontal
  99. * borders. Therefore, the H overlap trails by one MB col and the
  100. * V overlap trails by one MB row. This is reflected in the time at which
  101. * we run the put_pixels loop, i.e. delayed by one row and one column. */
  102. for (i = 0; i < block_count; i++)
  103. if (v->pq >= 9 || v->condover == CONDOVER_ALL ||
  104. (v->over_flags_plane[mb_pos] && ((i & 5) == 1 || (s->mb_x && v->over_flags_plane[mb_pos - 1]))))
  105. vc1_h_overlap_filter(v, s->mb_x ? left_blk : cur_blk, cur_blk, i);
  106. if (v->fcm != ILACE_FRAME)
  107. for (i = 0; i < block_count; i++) {
  108. if (s->mb_x && (v->pq >= 9 || v->condover == CONDOVER_ALL ||
  109. (v->over_flags_plane[mb_pos - 1] &&
  110. ((i & 2) || (!s->first_slice_line && v->over_flags_plane[mb_pos - 1 - s->mb_stride])))))
  111. vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
  112. if (s->mb_x == s->mb_width - 1)
  113. if (v->pq >= 9 || v->condover == CONDOVER_ALL ||
  114. (v->over_flags_plane[mb_pos] &&
  115. ((i & 2) || (!s->first_slice_line && v->over_flags_plane[mb_pos - s->mb_stride]))))
  116. vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
  117. }
  118. }
  119. void ff_vc1_p_overlap_filter(VC1Context *v)
  120. {
  121. MpegEncContext *s = &v->s;
  122. int16_t (*topleft_blk)[64], (*top_blk)[64], (*left_blk)[64], (*cur_blk)[64];
  123. int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
  124. int i;
  125. topleft_blk = v->block[v->topleft_blk_idx];
  126. top_blk = v->block[v->top_blk_idx];
  127. left_blk = v->block[v->left_blk_idx];
  128. cur_blk = v->block[v->cur_blk_idx];
  129. for (i = 0; i < block_count; i++)
  130. if (v->mb_type[0][s->block_index[i]] && (s->mb_x == 0 || v->mb_type[0][s->block_index[i] - 1]))
  131. vc1_h_overlap_filter(v, s->mb_x ? left_blk : cur_blk, cur_blk, i);
  132. if (v->fcm != ILACE_FRAME)
  133. for (i = 0; i < block_count; i++) {
  134. if (s->mb_x && v->mb_type[0][s->block_index[i] - 1] &&
  135. (s->first_slice_line || v->mb_type[0][s->block_index[i] - s->block_wrap[i] - 1]))
  136. vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
  137. if (s->mb_x == s->mb_width - 1)
  138. if (v->mb_type[0][s->block_index[i]] &&
  139. (s->first_slice_line || v->mb_type[0][s->block_index[i] - s->block_wrap[i]]))
  140. vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
  141. }
  142. }
  143. #define LEFT_EDGE (1 << 0)
  144. #define RIGHT_EDGE (1 << 1)
  145. #define TOP_EDGE (1 << 2)
  146. #define BOTTOM_EDGE (1 << 3)
  147. static av_always_inline void vc1_i_h_loop_filter(VC1Context *v, uint8_t *dest,
  148. uint32_t flags, int block_num)
  149. {
  150. MpegEncContext *s = &v->s;
  151. int pq = v->pq;
  152. uint8_t *dst;
  153. if (block_num & 2)
  154. return;
  155. if (!(flags & LEFT_EDGE) || (block_num & 5) == 1) {
  156. if (block_num > 3)
  157. dst = dest;
  158. else
  159. dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
  160. if (v->fcm == ILACE_FRAME)
  161. if (block_num > 3) {
  162. v->vc1dsp.vc1_h_loop_filter4(dst, 2 * s->uvlinesize, pq);
  163. v->vc1dsp.vc1_h_loop_filter4(dst + s->uvlinesize, 2 * s->uvlinesize, pq);
  164. } else {
  165. v->vc1dsp.vc1_h_loop_filter8(dst, 2 * s->linesize, pq);
  166. v->vc1dsp.vc1_h_loop_filter8(dst + s->linesize, 2 * s->linesize, pq);
  167. }
  168. else
  169. if (block_num > 3)
  170. v->vc1dsp.vc1_h_loop_filter8(dst, s->uvlinesize, pq);
  171. else
  172. v->vc1dsp.vc1_h_loop_filter16(dst, s->linesize, pq);
  173. }
  174. }
  175. static av_always_inline void vc1_i_v_loop_filter(VC1Context *v, uint8_t *dest,
  176. uint32_t flags, uint8_t fieldtx,
  177. int block_num)
  178. {
  179. MpegEncContext *s = &v->s;
  180. int pq = v->pq;
  181. uint8_t *dst;
  182. if ((block_num & 5) == 1)
  183. return;
  184. if (!(flags & TOP_EDGE) || block_num & 2) {
  185. if (block_num > 3)
  186. dst = dest;
  187. else
  188. dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
  189. if (v->fcm == ILACE_FRAME) {
  190. if (block_num > 3) {
  191. v->vc1dsp.vc1_v_loop_filter8(dst, 2 * s->uvlinesize, pq);
  192. v->vc1dsp.vc1_v_loop_filter8(dst + s->uvlinesize, 2 * s->uvlinesize, pq);
  193. } else if (block_num < 2 || !fieldtx) {
  194. v->vc1dsp.vc1_v_loop_filter16(dst, 2 * s->linesize, pq);
  195. v->vc1dsp.vc1_v_loop_filter16(dst + s->linesize, 2 * s->linesize, pq);
  196. }
  197. } else
  198. if (block_num > 3)
  199. v->vc1dsp.vc1_v_loop_filter8(dst, s->uvlinesize, pq);
  200. else
  201. v->vc1dsp.vc1_v_loop_filter16(dst, s->linesize, pq);
  202. }
  203. }
  204. void ff_vc1_i_loop_filter(VC1Context *v)
  205. {
  206. MpegEncContext *s = &v->s;
  207. int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
  208. int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
  209. uint8_t *dest, fieldtx;
  210. uint32_t flags = 0;
  211. int i;
  212. /* Within a MB, the vertical loop filter always runs before the horizontal.
  213. * To accomplish that, we run the V loop filter on top and internal
  214. * horizontal borders of the last overlap filtered MB. Then, we wait for
  215. * the loop filter iteration on the next row to do V loop filter on the
  216. * bottom edge of this MB, before moving over and running the H loop
  217. * filter on the left and internal vertical borders. Therefore, the loop
  218. * filter trails by one row and one column relative to the overlap filter
  219. * and two rows and two colums relative to the decoding loop. */
  220. if (!s->first_slice_line) {
  221. dest = s->dest[0] - 16 * s->linesize - 16;
  222. flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
  223. if (s->mb_x) {
  224. fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
  225. for (i = 0; i < block_count; i++)
  226. vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, fieldtx, i);
  227. }
  228. if (s->mb_x == s->mb_width - 1) {
  229. dest += 16;
  230. fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
  231. for (i = 0; i < block_count; i++)
  232. vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, flags, fieldtx, i);
  233. }
  234. }
  235. if (s->mb_y == s->end_mb_y - 1) {
  236. dest = s->dest[0] - 16;
  237. flags = s->first_slice_line ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
  238. if (s->mb_x) {
  239. fieldtx = v->fieldtx_plane[mb_pos - 1];
  240. for (i = 0; i < block_count; i++)
  241. vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, fieldtx, i);
  242. }
  243. if (s->mb_x == s->mb_width - 1) {
  244. dest += 16;
  245. fieldtx = v->fieldtx_plane[mb_pos];
  246. for (i = 0; i < block_count; i++)
  247. vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, flags, fieldtx, i);
  248. }
  249. }
  250. if (s->mb_y >= s->start_mb_y + 2) {
  251. dest = s->dest[0] - 32 * s->linesize - 16;
  252. if (s->mb_x) {
  253. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  254. for (i = 0; i < block_count; i++)
  255. vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest, flags, i);
  256. }
  257. if (s->mb_x == s->mb_width - 1) {
  258. dest += 16;
  259. flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
  260. for (i = 0; i < block_count; i++)
  261. vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest, flags, i);
  262. }
  263. }
  264. if (s->mb_y == s->end_mb_y - 1) {
  265. if (s->mb_y >= s->start_mb_y + 1) {
  266. dest = s->dest[0] - 16 * s->linesize - 16;
  267. if (s->mb_x) {
  268. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  269. for (i = 0; i < block_count; i++)
  270. vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, i);
  271. }
  272. if (s->mb_x == s->mb_width - 1) {
  273. flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
  274. dest += 16;
  275. for (i = 0; i < block_count; i++)
  276. vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, flags, i);
  277. }
  278. }
  279. dest = s->dest[0] - 16;
  280. if (s->mb_x) {
  281. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  282. for (i = 0; i < block_count; i++)
  283. vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, i);
  284. }
  285. if (s->mb_x == s->mb_width - 1) {
  286. dest += 16;
  287. flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
  288. for (i = 0; i < block_count; i++)
  289. vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, flags, i);
  290. }
  291. }
  292. }
  293. static av_always_inline void vc1_p_h_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
  294. uint8_t *is_intra, int16_t (*mv)[2], uint8_t *mv_f,
  295. int *ttblk, uint32_t flags, int block_num)
  296. {
  297. MpegEncContext *s = &v->s;
  298. int pq = v->pq;
  299. uint32_t left_cbp = cbp[0] >> (block_num * 4), right_cbp;
  300. uint8_t left_is_intra, right_is_intra;
  301. int tt;
  302. int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
  303. uint8_t *dst;
  304. if (block_num > 3)
  305. dst = dest;
  306. else
  307. dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
  308. if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
  309. left_is_intra = is_intra[0] & (1 << block_num);
  310. if (block_num > 3) {
  311. right_is_intra = is_intra[1] & (1 << block_num);
  312. right_cbp = cbp[1] >> (block_num * 4);
  313. } else if (block_num & 1) {
  314. right_is_intra = is_intra[1] & (1 << block_num - 1);
  315. right_cbp = cbp[1] >> ((block_num - 1) * 4);
  316. } else {
  317. right_is_intra = is_intra[0] & (1 << block_num + 1);
  318. right_cbp = cbp[0] >> ((block_num + 1) * 4);
  319. }
  320. if (left_is_intra || right_is_intra ||
  321. mv[0][0] != mv[1][0] || mv[0][1] != mv[1][1] ||
  322. (v->fcm == ILACE_FIELD && mv_f[0] != mv_f[1]))
  323. v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
  324. else {
  325. idx = (left_cbp | (right_cbp >> 1)) & 5;
  326. if (idx & 1)
  327. v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 8, linesize, pq);
  328. if (idx & 4)
  329. v->vc1dsp.vc1_h_loop_filter4(dst + 8, linesize, pq);
  330. }
  331. }
  332. tt = ttblk[0] >> (block_num * 4) & 0xf;
  333. if (tt == TT_4X4 || tt == TT_4X8) {
  334. if (left_cbp & 3)
  335. v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
  336. if (left_cbp & 12)
  337. v->vc1dsp.vc1_h_loop_filter4(dst + 4, linesize, pq);
  338. }
  339. }
  340. static av_always_inline void vc1_p_v_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
  341. uint8_t *is_intra, int16_t (*mv)[2], uint8_t *mv_f,
  342. int *ttblk, uint32_t flags, int block_num)
  343. {
  344. MpegEncContext *s = &v->s;
  345. int pq = v->pq;
  346. uint32_t top_cbp = cbp[0] >> (block_num * 4), bottom_cbp;
  347. uint8_t top_is_intra, bottom_is_intra;
  348. int tt;
  349. int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
  350. uint8_t *dst;
  351. if (block_num > 3)
  352. dst = dest;
  353. else
  354. dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
  355. if(!(flags & BOTTOM_EDGE) || block_num < 2) {
  356. top_is_intra = is_intra[0] & (1 << block_num);
  357. if (block_num > 3) {
  358. bottom_is_intra = is_intra[s->mb_stride] & (1 << block_num);
  359. bottom_cbp = cbp[s->mb_stride] >> (block_num * 4);
  360. } else if (block_num < 2) {
  361. bottom_is_intra = is_intra[0] & (1 << block_num + 2);
  362. bottom_cbp = cbp[0] >> ((block_num + 2) * 4);
  363. } else {
  364. bottom_is_intra = is_intra[s->mb_stride] & (1 << block_num - 2);
  365. bottom_cbp = cbp[s->mb_stride] >> ((block_num - 2) * 4);
  366. }
  367. if (top_is_intra || bottom_is_intra ||
  368. mv[0][0] != mv[block_num > 3 ? s->mb_stride : s->b8_stride][0] ||
  369. mv[0][1] != mv[block_num > 3 ? s->mb_stride : s->b8_stride][1] ||
  370. (v->fcm == ILACE_FIELD && mv_f[0] != mv_f[block_num > 3 ? s->mb_stride : s->b8_stride]))
  371. v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, linesize, pq);
  372. else {
  373. idx = (top_cbp | (bottom_cbp >> 2)) & 3;
  374. if (idx & 1)
  375. v->vc1dsp.vc1_v_loop_filter4(dst + 8 * linesize + 4, linesize, pq);
  376. if (idx & 2)
  377. v->vc1dsp.vc1_v_loop_filter4(dst + 8 * linesize, linesize, pq);
  378. }
  379. }
  380. tt = ttblk[0] >> (block_num * 4) & 0xf;
  381. if (tt == TT_4X4 || tt == TT_8X4) {
  382. if (top_cbp & 5)
  383. v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
  384. if (top_cbp & 10)
  385. v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize, linesize, pq);
  386. }
  387. }
  388. void ff_vc1_p_loop_filter(VC1Context *v)
  389. {
  390. MpegEncContext *s = &v->s;
  391. int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
  392. uint8_t *dest;
  393. uint32_t *cbp;
  394. uint8_t *is_intra;
  395. int16_t (*uvmv)[2];
  396. int *ttblk;
  397. uint32_t flags;
  398. int i;
  399. /* Within a MB, the vertical loop filter always runs before the horizontal.
  400. * To accomplish that, we run the V loop filter on all applicable
  401. * horizontal borders of the MB above the last overlap filtered MB. Then,
  402. * we wait for the next loop filter iteration to do H loop filter on all
  403. * applicable vertical borders of this MB. Therefore, the loop filter
  404. * trails by one row and one column relative to the overlap filter and two
  405. * rows and two colums relative to the decoding loop. */
  406. if (s->mb_y >= s->start_mb_y + 2) {
  407. if (s->mb_x) {
  408. dest = s->dest[0] - 32 * s->linesize - 16;
  409. cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 1];
  410. is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 1];
  411. uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 1];
  412. ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
  413. flags = s->mb_y == s->start_mb_y + 2 ? TOP_EDGE : 0;
  414. for (i = 0; i < block_count; i++)
  415. vc1_p_v_loop_filter(v,
  416. i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
  417. cbp,
  418. is_intra,
  419. i > 3 ? uvmv :
  420. &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
  421. i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 1 + v->mb_off] :
  422. &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
  423. ttblk,
  424. flags,
  425. i);
  426. }
  427. if (s->mb_x == s->mb_width - 1) {
  428. dest = s->dest[0] - 32 * s->linesize;
  429. cbp = &v->cbp[s->mb_x - 2 * s->mb_stride];
  430. is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride];
  431. uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride];
  432. ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
  433. flags = s->mb_y == s->start_mb_y + 2 ? TOP_EDGE : 0;
  434. for (i = 0; i < block_count; i++)
  435. vc1_p_v_loop_filter(v,
  436. i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
  437. cbp,
  438. is_intra,
  439. i > 3 ? uvmv :
  440. &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
  441. i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride + v->mb_off] :
  442. &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
  443. ttblk,
  444. flags,
  445. i);
  446. }
  447. }
  448. if (s->mb_y == s->end_mb_y - 1) {
  449. if (s->mb_x) {
  450. if (s->mb_y >= s->start_mb_y + 1) {
  451. dest = s->dest[0] - 16 * s->linesize - 16;
  452. cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
  453. is_intra = &v->is_intra[s->mb_x - s->mb_stride - 1];
  454. uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
  455. ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
  456. flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
  457. for (i = 0; i < block_count; i++)
  458. vc1_p_v_loop_filter(v,
  459. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
  460. cbp,
  461. is_intra,
  462. i > 3 ? uvmv :
  463. &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
  464. i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 1 + v->mb_off] :
  465. &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
  466. ttblk,
  467. flags,
  468. i);
  469. }
  470. dest = s->dest[0] - 16;
  471. cbp = &v->cbp[s->mb_x - 1];
  472. is_intra = &v->is_intra[s->mb_x - 1];
  473. uvmv = &v->luma_mv[s->mb_x - 1];
  474. ttblk = &v->ttblk[s->mb_x - 1];
  475. flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
  476. for (i = 0; i < block_count; i++)
  477. vc1_p_v_loop_filter(v,
  478. i > 3 ? s->dest[i - 3] - 8 : dest,
  479. cbp,
  480. is_intra,
  481. i > 3 ? uvmv :
  482. &s->current_picture.motion_val[0][s->block_index[i] - 2 + v->blocks_off],
  483. i > 3 ? &v->mv_f[0][s->block_index[i] - 1 + v->mb_off] :
  484. &v->mv_f[0][s->block_index[i] - 2 + v->blocks_off],
  485. ttblk,
  486. flags,
  487. i);
  488. }
  489. if (s->mb_x == s->mb_width - 1) {
  490. if (s->mb_y >= s->start_mb_y + 1) {
  491. dest = s->dest[0] - 16 * s->linesize;
  492. cbp = &v->cbp[s->mb_x - s->mb_stride];
  493. is_intra = &v->is_intra[s->mb_x - s->mb_stride];
  494. uvmv = &v->luma_mv[s->mb_x - s->mb_stride];
  495. ttblk = &v->ttblk[s->mb_x - s->mb_stride];
  496. flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
  497. for (i = 0; i < block_count; i++)
  498. vc1_p_v_loop_filter(v,
  499. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
  500. cbp,
  501. is_intra,
  502. i > 3 ? uvmv :
  503. &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
  504. i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride + v->mb_off] :
  505. &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
  506. ttblk,
  507. flags,
  508. i);
  509. }
  510. dest = s->dest[0];
  511. cbp = &v->cbp[s->mb_x];
  512. is_intra = &v->is_intra[s->mb_x];
  513. uvmv = &v->luma_mv[s->mb_x];
  514. ttblk = &v->ttblk[s->mb_x];
  515. flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
  516. for (i = 0; i < block_count; i++)
  517. vc1_p_v_loop_filter(v,
  518. i > 3 ? s->dest[i - 3] : dest,
  519. cbp,
  520. is_intra,
  521. i > 3 ? uvmv :
  522. &s->current_picture.motion_val[0][s->block_index[i] + v->blocks_off],
  523. i > 3 ? &v->mv_f[0][s->block_index[i] + v->mb_off] :
  524. &v->mv_f[0][s->block_index[i] + v->blocks_off],
  525. ttblk,
  526. flags,
  527. i);
  528. }
  529. }
  530. if (s->mb_y >= s->start_mb_y + 2) {
  531. if (s->mb_x >= 2) {
  532. dest = s->dest[0] - 32 * s->linesize - 32;
  533. cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 2];
  534. is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 2];
  535. uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 2];
  536. ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 2];
  537. flags = s->mb_x == 2 ? LEFT_EDGE : 0;
  538. for (i = 0; i < block_count; i++)
  539. vc1_p_h_loop_filter(v,
  540. i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 16 : dest,
  541. cbp,
  542. is_intra,
  543. i > 3 ? uvmv :
  544. &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 4 + v->blocks_off],
  545. i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 2 + v->mb_off] :
  546. &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 4 + v->blocks_off],
  547. ttblk,
  548. flags,
  549. i);
  550. }
  551. if (s->mb_x == s->mb_width - 1) {
  552. if (s->mb_x >= 1) {
  553. dest = s->dest[0] - 32 * s->linesize - 16;
  554. cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 1];
  555. is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 1];
  556. uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 1];
  557. ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
  558. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  559. for (i = 0; i < block_count; i++)
  560. vc1_p_h_loop_filter(v,
  561. i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
  562. cbp,
  563. is_intra,
  564. i > 3 ? uvmv :
  565. &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
  566. i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 1 + v->mb_off] :
  567. &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
  568. ttblk,
  569. flags,
  570. i);
  571. }
  572. dest = s->dest[0] - 32 * s->linesize;
  573. cbp = &v->cbp[s->mb_x - 2 * s->mb_stride];
  574. is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride];
  575. uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride];
  576. ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
  577. flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
  578. for (i = 0; i < block_count; i++)
  579. vc1_p_h_loop_filter(v,
  580. i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
  581. cbp,
  582. is_intra,
  583. i > 3 ? uvmv :
  584. &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
  585. i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride + v->mb_off] :
  586. &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
  587. ttblk,
  588. flags,
  589. i);
  590. }
  591. }
  592. if (s->mb_y == s->end_mb_y - 1) {
  593. if (s->mb_y >= s->start_mb_y + 1) {
  594. if (s->mb_x >= 2) {
  595. dest = s->dest[0] - 16 * s->linesize - 32;
  596. cbp = &v->cbp[s->mb_x - s->mb_stride - 2];
  597. is_intra = &v->is_intra[s->mb_x - s->mb_stride - 2];
  598. uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 2];
  599. ttblk = &v->ttblk[s->mb_x - s->mb_stride - 2];
  600. flags = s->mb_x == 2 ? LEFT_EDGE : 0;
  601. for (i = 0; i < block_count; i++)
  602. vc1_p_h_loop_filter(v,
  603. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 16 : dest,
  604. cbp,
  605. is_intra,
  606. i > 3 ? uvmv :
  607. &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 4 + v->blocks_off],
  608. i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 2 + v->mb_off] :
  609. &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 4 + v->blocks_off],
  610. ttblk,
  611. flags,
  612. i);
  613. }
  614. if (s->mb_x == s->mb_width - 1) {
  615. if (s->mb_x >= 1) {
  616. dest = s->dest[0] - 16 * s->linesize - 16;
  617. cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
  618. is_intra = &v->is_intra[s->mb_x - s->mb_stride - 1];
  619. uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
  620. ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
  621. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  622. for (i = 0; i < block_count; i++)
  623. vc1_p_h_loop_filter(v,
  624. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
  625. cbp,
  626. is_intra,
  627. i > 3 ? uvmv :
  628. &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
  629. i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 1 + v->mb_off] :
  630. &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
  631. ttblk,
  632. flags,
  633. i);
  634. }
  635. dest = s->dest[0] - 16 * s->linesize;
  636. cbp = &v->cbp[s->mb_x - s->mb_stride];
  637. is_intra = &v->is_intra[s->mb_x - s->mb_stride];
  638. uvmv = &v->luma_mv[s->mb_x - s->mb_stride];
  639. ttblk = &v->ttblk[s->mb_x - s->mb_stride];
  640. flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
  641. for (i = 0; i < block_count; i++)
  642. vc1_p_h_loop_filter(v,
  643. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
  644. cbp,
  645. is_intra,
  646. i > 3 ? uvmv :
  647. &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
  648. i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride + v->mb_off] :
  649. &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
  650. ttblk,
  651. flags,
  652. i);
  653. }
  654. }
  655. if (s->mb_x >= 2) {
  656. dest = s->dest[0] - 32;
  657. cbp = &v->cbp[s->mb_x - 2];
  658. is_intra = &v->is_intra[s->mb_x - 2];
  659. uvmv = &v->luma_mv[s->mb_x - 2];
  660. ttblk = &v->ttblk[s->mb_x - 2];
  661. flags = s->mb_x == 2 ? LEFT_EDGE : 0;
  662. for (i = 0; i < block_count; i++)
  663. vc1_p_h_loop_filter(v,
  664. i > 3 ? s->dest[i - 3] - 16 : dest,
  665. cbp,
  666. is_intra,
  667. i > 3 ? uvmv :
  668. &s->current_picture.motion_val[0][s->block_index[i] - 4 + v->blocks_off],
  669. i > 3 ? &v->mv_f[0][s->block_index[i] - 2 + v->mb_off] :
  670. &v->mv_f[0][s->block_index[i] - 4 + v->blocks_off],
  671. ttblk,
  672. flags,
  673. i);
  674. }
  675. if (s->mb_x == s->mb_width - 1) {
  676. if (s->mb_x >= 1) {
  677. dest = s->dest[0] - 16;
  678. cbp = &v->cbp[s->mb_x - 1];
  679. is_intra = &v->is_intra[s->mb_x - 1];
  680. uvmv = &v->luma_mv[s->mb_x - 1];
  681. ttblk = &v->ttblk[s->mb_x - 1];
  682. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  683. for (i = 0; i < block_count; i++)
  684. vc1_p_h_loop_filter(v,
  685. i > 3 ? s->dest[i - 3] - 8 : dest,
  686. cbp,
  687. is_intra,
  688. i > 3 ? uvmv :
  689. &s->current_picture.motion_val[0][s->block_index[i] - 2 + v->blocks_off],
  690. i > 3 ? &v->mv_f[0][s->block_index[i] - 1 + v->mb_off] :
  691. &v->mv_f[0][s->block_index[i] - 2 + v->blocks_off],
  692. ttblk,
  693. flags,
  694. i);
  695. }
  696. dest = s->dest[0];
  697. cbp = &v->cbp[s->mb_x];
  698. is_intra = &v->is_intra[s->mb_x];
  699. uvmv = &v->luma_mv[s->mb_x];
  700. ttblk = &v->ttblk[s->mb_x];
  701. flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
  702. for (i = 0; i < block_count; i++)
  703. vc1_p_h_loop_filter(v,
  704. i > 3 ? s->dest[i - 3] : dest,
  705. cbp,
  706. is_intra,
  707. i > 3 ? uvmv :
  708. &s->current_picture.motion_val[0][s->block_index[i] + v->blocks_off],
  709. i > 3 ? &v->mv_f[0][s->block_index[i] + v->mb_off] :
  710. &v->mv_f[0][s->block_index[i] + v->blocks_off],
  711. ttblk,
  712. flags,
  713. i);
  714. }
  715. }
  716. }
  717. static av_always_inline void vc1_p_h_intfr_loop_filter(VC1Context *v, uint8_t *dest, int *ttblk,
  718. uint32_t flags, uint8_t fieldtx, int block_num)
  719. {
  720. MpegEncContext *s = &v->s;
  721. int pq = v->pq;
  722. int tt;
  723. int linesize = block_num > 3 ? s->uvlinesize : s->linesize;
  724. uint8_t *dst;
  725. if (block_num > 3)
  726. dst = dest;
  727. else
  728. dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
  729. tt = ttblk[0] >> (block_num * 4) & 0xf;
  730. if (block_num < 4) {
  731. if (fieldtx) {
  732. if (block_num < 2) {
  733. if (tt == TT_4X4 || tt == TT_4X8)
  734. v->vc1dsp.vc1_h_loop_filter8(dst + 4, 2 * linesize, pq);
  735. if (!(flags & RIGHT_EDGE) || block_num == 0)
  736. v->vc1dsp.vc1_h_loop_filter8(dst + 8, 2 * linesize, pq);
  737. } else {
  738. if (tt == TT_4X4 || tt == TT_4X8)
  739. v->vc1dsp.vc1_h_loop_filter8(dst - 7 * linesize + 4, 2 * linesize, pq);
  740. if (!(flags & RIGHT_EDGE) || block_num == 2)
  741. v->vc1dsp.vc1_h_loop_filter8(dst - 7 * linesize + 8, 2 * linesize, pq);
  742. }
  743. } else {
  744. if(tt == TT_4X4 || tt == TT_4X8) {
  745. v->vc1dsp.vc1_h_loop_filter4(dst + 4, 2 * linesize, pq);
  746. v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 4, 2 * linesize, pq);
  747. }
  748. if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
  749. v->vc1dsp.vc1_h_loop_filter4(dst + 8, 2 * linesize, pq);
  750. v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 8, 2 * linesize, pq);
  751. }
  752. }
  753. } else {
  754. if (tt == TT_4X4 || tt == TT_4X8) {
  755. v->vc1dsp.vc1_h_loop_filter4(dst + 4, 2 * linesize, pq);
  756. v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 4, 2 * linesize, pq);
  757. }
  758. if (!(flags & RIGHT_EDGE)) {
  759. v->vc1dsp.vc1_h_loop_filter4(dst + 8, 2 * linesize, pq);
  760. v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 8, 2 * linesize, pq);
  761. }
  762. }
  763. }
  764. static av_always_inline void vc1_p_v_intfr_loop_filter(VC1Context *v, uint8_t *dest, int *ttblk,
  765. uint32_t flags, uint8_t fieldtx, int block_num)
  766. {
  767. MpegEncContext *s = &v->s;
  768. int pq = v->pq;
  769. int tt;
  770. int linesize = block_num > 3 ? s->uvlinesize : s->linesize;
  771. uint8_t *dst;
  772. if (block_num > 3)
  773. dst = dest;
  774. else
  775. dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
  776. tt = ttblk[0] >> (block_num * 4) & 0xf;
  777. if (block_num < 4) {
  778. if (fieldtx) {
  779. if (block_num < 2) {
  780. if (tt == TT_4X4 || tt == TT_8X4)
  781. v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
  782. if (!(flags & BOTTOM_EDGE))
  783. v->vc1dsp.vc1_v_loop_filter8(dst + 16 * linesize, 2 * linesize, pq);
  784. } else {
  785. if (tt == TT_4X4 || tt == TT_8X4)
  786. v->vc1dsp.vc1_v_loop_filter8(dst + linesize, 2 * linesize, pq);
  787. if (!(flags & BOTTOM_EDGE))
  788. v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
  789. }
  790. } else {
  791. if (block_num < 2) {
  792. if (!(flags & TOP_EDGE) && (tt == TT_4X4 || tt == TT_8X4)) {
  793. v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
  794. v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
  795. }
  796. v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
  797. v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
  798. } else if (!(flags & BOTTOM_EDGE)) {
  799. if (tt == TT_4X4 || tt == TT_8X4) {
  800. v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
  801. v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
  802. }
  803. v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
  804. v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
  805. }
  806. }
  807. } else {
  808. if (!(flags & BOTTOM_EDGE)) {
  809. if (!(flags & TOP_EDGE) && (tt == TT_4X4 || tt == TT_8X4)) {
  810. v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
  811. v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
  812. }
  813. v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
  814. v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
  815. }
  816. }
  817. }
  818. void ff_vc1_p_intfr_loop_filter(VC1Context *v)
  819. {
  820. MpegEncContext *s = &v->s;
  821. int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
  822. int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
  823. uint8_t *dest;
  824. int *ttblk;
  825. uint32_t flags;
  826. uint8_t fieldtx;
  827. int i;
  828. /* Within a MB, the vertical loop filter always runs before the horizontal.
  829. * To accomplish that, we run the V loop filter on all applicable
  830. * horizontal borders of the MB above the last overlap filtered MB. Then,
  831. * we wait for the loop filter iteration on the next row and next column to
  832. * do H loop filter on all applicable vertical borders of this MB.
  833. * Therefore, the loop filter trails by two rows and one column relative to
  834. * the overlap filter and two rows and two colums relative to the decoding
  835. * loop. */
  836. if (s->mb_x) {
  837. if (s->mb_y >= s->start_mb_y + 1) {
  838. dest = s->dest[0] - 16 * s->linesize - 16;
  839. ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
  840. flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
  841. fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
  842. for (i = 0; i < block_count; i++)
  843. vc1_p_v_intfr_loop_filter(v,
  844. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
  845. ttblk,
  846. flags,
  847. fieldtx,
  848. i);
  849. }
  850. }
  851. if (s->mb_x == s->mb_width - 1) {
  852. if (s->mb_y >= s->start_mb_y + 1) {
  853. dest = s->dest[0] - 16 * s->linesize;
  854. ttblk = &v->ttblk[s->mb_x - s->mb_stride];
  855. flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
  856. fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
  857. for (i = 0; i < block_count; i++)
  858. vc1_p_v_intfr_loop_filter(v,
  859. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
  860. ttblk,
  861. flags,
  862. fieldtx,
  863. i);
  864. }
  865. }
  866. if (s->mb_y == s->end_mb_y - 1) {
  867. if (s->mb_x) {
  868. dest = s->dest[0] - 16;
  869. ttblk = &v->ttblk[s->mb_x - 1];
  870. flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
  871. fieldtx = v->fieldtx_plane[mb_pos - 1];
  872. for (i = 0; i < block_count; i++)
  873. vc1_p_v_intfr_loop_filter(v,
  874. i > 3 ? s->dest[i - 3] - 8 : dest,
  875. ttblk,
  876. flags,
  877. fieldtx,
  878. i);
  879. }
  880. if (s->mb_x == s->mb_width - 1) {
  881. dest = s->dest[0];
  882. ttblk = &v->ttblk[s->mb_x];
  883. flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
  884. fieldtx = v->fieldtx_plane[mb_pos];
  885. for (i = 0; i < block_count; i++)
  886. vc1_p_v_intfr_loop_filter(v,
  887. i > 3 ? s->dest[i - 3] : dest,
  888. ttblk,
  889. flags,
  890. fieldtx,
  891. i);
  892. }
  893. }
  894. if (s->mb_y >= s->start_mb_y + 2) {
  895. if (s->mb_x >= 2) {
  896. dest = s->dest[0] - 32 * s->linesize - 32;
  897. ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 2];
  898. flags = s->mb_x == 2 ? LEFT_EDGE : 0;
  899. fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride - 2];
  900. for (i = 0; i < block_count; i++)
  901. vc1_p_h_intfr_loop_filter(v,
  902. i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 16 : dest,
  903. ttblk,
  904. flags,
  905. fieldtx,
  906. i);
  907. }
  908. if (s->mb_x == s->mb_width - 1) {
  909. if (s->mb_x >= 1) {
  910. dest = s->dest[0] - 32 * s->linesize - 16;
  911. ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
  912. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  913. fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride - 1];
  914. for (i = 0; i < block_count; i++)
  915. vc1_p_h_intfr_loop_filter(v,
  916. i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
  917. ttblk,
  918. flags,
  919. fieldtx,
  920. i);
  921. }
  922. dest = s->dest[0] - 32 * s->linesize;
  923. ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
  924. flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
  925. fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride];
  926. for (i = 0; i < block_count; i++)
  927. vc1_p_h_intfr_loop_filter(v,
  928. i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
  929. ttblk,
  930. flags,
  931. fieldtx,
  932. i);
  933. }
  934. }
  935. if (s->mb_y == s->end_mb_y - 1) {
  936. if (s->mb_y >= s->start_mb_y + 1) {
  937. if (s->mb_x >= 2) {
  938. dest = s->dest[0] - 16 * s->linesize - 32;
  939. ttblk = &v->ttblk[s->mb_x - s->mb_stride - 2];
  940. flags = s->mb_x == 2 ? LEFT_EDGE : 0;
  941. fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 2];
  942. for (i = 0; i < block_count; i++)
  943. vc1_p_h_intfr_loop_filter(v,
  944. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 16 : dest,
  945. ttblk,
  946. flags,
  947. fieldtx,
  948. i);
  949. }
  950. if (s->mb_x == s->mb_width - 1) {
  951. if (s->mb_x >= 1) {
  952. dest = s->dest[0] - 16 * s->linesize - 16;
  953. ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
  954. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  955. fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
  956. for (i = 0; i < block_count; i++)
  957. vc1_p_h_intfr_loop_filter(v,
  958. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
  959. ttblk,
  960. flags,
  961. fieldtx,
  962. i);
  963. }
  964. dest = s->dest[0] - 16 * s->linesize;
  965. ttblk = &v->ttblk[s->mb_x - s->mb_stride];
  966. flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
  967. fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
  968. for (i = 0; i < block_count; i++)
  969. vc1_p_h_intfr_loop_filter(v,
  970. i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
  971. ttblk,
  972. flags,
  973. fieldtx,
  974. i);
  975. }
  976. }
  977. if (s->mb_x >= 2) {
  978. dest = s->dest[0] - 32;
  979. ttblk = &v->ttblk[s->mb_x - 2];
  980. flags = s->mb_x == 2 ? LEFT_EDGE : 0;
  981. fieldtx = v->fieldtx_plane[mb_pos - 2];
  982. for (i = 0; i < block_count; i++)
  983. vc1_p_h_intfr_loop_filter(v,
  984. i > 3 ? s->dest[i - 3] - 16 : dest,
  985. ttblk,
  986. flags,
  987. fieldtx,
  988. i);
  989. }
  990. if (s->mb_x == s->mb_width - 1) {
  991. if (s->mb_x >= 1) {
  992. dest = s->dest[0] - 16;
  993. ttblk = &v->ttblk[s->mb_x - 1];
  994. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  995. fieldtx = v->fieldtx_plane[mb_pos - 1];
  996. for (i = 0; i < block_count; i++)
  997. vc1_p_h_intfr_loop_filter(v,
  998. i > 3 ? s->dest[i - 3] - 8 : dest,
  999. ttblk,
  1000. flags,
  1001. fieldtx,
  1002. i);
  1003. }
  1004. dest = s->dest[0];
  1005. ttblk = &v->ttblk[s->mb_x];
  1006. flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
  1007. fieldtx = v->fieldtx_plane[mb_pos];
  1008. for (i = 0; i < block_count; i++)
  1009. vc1_p_h_intfr_loop_filter(v,
  1010. i > 3 ? s->dest[i - 3] : dest,
  1011. ttblk,
  1012. flags,
  1013. fieldtx,
  1014. i);
  1015. }
  1016. }
  1017. }
  1018. static av_always_inline void vc1_b_h_intfi_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
  1019. int *ttblk, uint32_t flags, int block_num)
  1020. {
  1021. MpegEncContext *s = &v->s;
  1022. int pq = v->pq;
  1023. uint8_t *dst;
  1024. uint32_t block_cbp = cbp[0] >> (block_num * 4);
  1025. int tt;
  1026. int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
  1027. if (block_num > 3)
  1028. dst = dest;
  1029. else
  1030. dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
  1031. if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
  1032. if (block_num > 3)
  1033. v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
  1034. else
  1035. v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
  1036. }
  1037. tt = ttblk[0] >> (block_num * 4) & 0xf;
  1038. if (tt == TT_4X4 || tt == TT_4X8) {
  1039. idx = (block_cbp | (block_cbp >> 1)) & 5;
  1040. if (idx & 1)
  1041. v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
  1042. if (idx & 4)
  1043. v->vc1dsp.vc1_h_loop_filter4(dst + 4, linesize, pq);
  1044. }
  1045. }
  1046. static av_always_inline void vc1_b_v_intfi_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
  1047. int *ttblk, uint32_t flags, int block_num)
  1048. {
  1049. MpegEncContext *s = &v->s;
  1050. int pq = v->pq;
  1051. uint8_t *dst;
  1052. uint32_t block_cbp = cbp[0] >> (block_num * 4);
  1053. int tt;
  1054. int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
  1055. if (block_num > 3)
  1056. dst = dest;
  1057. else
  1058. dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
  1059. if(!(flags & BOTTOM_EDGE) || block_num < 2)
  1060. v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, linesize, pq);
  1061. tt = ttblk[0] >> (block_num * 4) & 0xf;
  1062. if (tt == TT_4X4 || tt == TT_8X4) {
  1063. idx = (block_cbp | (block_cbp >> 2)) & 3;
  1064. if (idx & 1)
  1065. v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
  1066. if (idx & 2)
  1067. v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize, linesize, pq);
  1068. }
  1069. }
  1070. void ff_vc1_b_intfi_loop_filter(VC1Context *v)
  1071. {
  1072. MpegEncContext *s = &v->s;
  1073. int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
  1074. uint8_t *dest;
  1075. uint32_t *cbp;
  1076. int *ttblk;
  1077. uint32_t flags = 0;
  1078. int i;
  1079. /* Within a MB, the vertical loop filter always runs before the horizontal.
  1080. * To accomplish that, we run the V loop filter on all applicable
  1081. * horizontal borders of the MB above the currently decoded MB. Then,
  1082. * we wait for the next loop filter iteration to do H loop filter on all
  1083. * applicable vertical borders of this MB. Therefore, the loop filter
  1084. * trails by one row and one column relative to the decoding loop. */
  1085. if (!s->first_slice_line) {
  1086. dest = s->dest[0] - 16 * s->linesize;
  1087. cbp = &v->cbp[s->mb_x - s->mb_stride];
  1088. ttblk = &v->ttblk[s->mb_x - s->mb_stride];
  1089. flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
  1090. for (i = 0; i < block_count; i++)
  1091. vc1_b_v_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, cbp, ttblk, flags, i);
  1092. }
  1093. if (s->mb_y == s->end_mb_y - 1) {
  1094. dest = s->dest[0];
  1095. cbp = &v->cbp[s->mb_x];
  1096. ttblk = &v->ttblk[s->mb_x];
  1097. flags = s->first_slice_line ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
  1098. for (i = 0; i < block_count; i++)
  1099. vc1_b_v_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, cbp, ttblk, flags, i);
  1100. }
  1101. if (!s->first_slice_line) {
  1102. dest = s->dest[0] - 16 * s->linesize - 16;
  1103. cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
  1104. ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
  1105. if (s->mb_x) {
  1106. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  1107. for (i = 0; i < block_count; i++)
  1108. vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, cbp, ttblk, flags, i);
  1109. }
  1110. if (s->mb_x == s->mb_width - 1) {
  1111. dest += 16;
  1112. cbp++;
  1113. ttblk++;
  1114. flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
  1115. for (i = 0; i < block_count; i++)
  1116. vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, cbp, ttblk, flags, i);
  1117. }
  1118. }
  1119. if (s->mb_y == s->end_mb_y - 1) {
  1120. dest = s->dest[0] - 16;
  1121. cbp = &v->cbp[s->mb_x - 1];
  1122. ttblk = &v->ttblk[s->mb_x - 1];
  1123. if (s->mb_x) {
  1124. flags = s->mb_x == 1 ? LEFT_EDGE : 0;
  1125. for (i = 0; i < block_count; i++)
  1126. vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, cbp, ttblk, flags, i);
  1127. }
  1128. if (s->mb_x == s->mb_width - 1) {
  1129. dest += 16;
  1130. cbp++;
  1131. ttblk++;
  1132. flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
  1133. for (i = 0; i < block_count; i++)
  1134. vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, cbp, ttblk, flags, i);
  1135. }
  1136. }
  1137. }