You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

360 lines
16KB

  1. /*
  2. * VC-1 and WMV3 decoder
  3. * Copyright (c) 2011 Mashiat Sarker Shakkhar
  4. * Copyright (c) 2006-2007 Konstantin Shishkov
  5. * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
  6. *
  7. * This file is part of FFmpeg.
  8. *
  9. * FFmpeg is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2.1 of the License, or (at your option) any later version.
  13. *
  14. * FFmpeg is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with FFmpeg; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. */
  23. /**
  24. * @file
  25. * VC-1 and WMV3 loopfilter
  26. */
  27. #include "avcodec.h"
  28. #include "mpegvideo.h"
  29. #include "vc1.h"
  30. #include "vc1dsp.h"
  31. void ff_vc1_loop_filter_iblk(VC1Context *v, int pq)
  32. {
  33. MpegEncContext *s = &v->s;
  34. int j;
  35. if (!s->first_slice_line) {
  36. v->vc1dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq);
  37. if (s->mb_x)
  38. v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
  39. v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
  40. if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
  41. for (j = 0; j < 2; j++) {
  42. v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1], s->uvlinesize, pq);
  43. if (s->mb_x)
  44. v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
  45. }
  46. }
  47. v->vc1dsp.vc1_v_loop_filter16(s->dest[0] + 8 * s->linesize, s->linesize, pq);
  48. if (s->mb_y == s->end_mb_y - 1) {
  49. if (s->mb_x) {
  50. v->vc1dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq);
  51. if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
  52. v->vc1dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq);
  53. v->vc1dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq);
  54. }
  55. }
  56. v->vc1dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq);
  57. }
  58. }
  59. void ff_vc1_loop_filter_iblk_delayed(VC1Context *v, int pq)
  60. {
  61. MpegEncContext *s = &v->s;
  62. int j;
  63. /* The loopfilter runs 1 row and 1 column behind the overlap filter, which
  64. * means it runs two rows/cols behind the decoding loop. */
  65. if (!s->first_slice_line) {
  66. if (s->mb_x) {
  67. if (s->mb_y >= s->start_mb_y + 2) {
  68. v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
  69. if (s->mb_x >= 2)
  70. v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 16, s->linesize, pq);
  71. v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 8, s->linesize, pq);
  72. if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
  73. for (j = 0; j < 2; j++) {
  74. v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
  75. if (s->mb_x >= 2) {
  76. v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize - 8, s->uvlinesize, pq);
  77. }
  78. }
  79. }
  80. v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize - 16, s->linesize, pq);
  81. }
  82. if (s->mb_x == s->mb_width - 1) {
  83. if (s->mb_y >= s->start_mb_y + 2) {
  84. v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
  85. if (s->mb_x)
  86. v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize, s->linesize, pq);
  87. v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize + 8, s->linesize, pq);
  88. if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
  89. for (j = 0; j < 2; j++) {
  90. v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
  91. if (s->mb_x >= 2) {
  92. v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize, s->uvlinesize, pq);
  93. }
  94. }
  95. }
  96. v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize, s->linesize, pq);
  97. }
  98. if (s->mb_y == s->end_mb_y) {
  99. if (s->mb_x) {
  100. if (s->mb_x >= 2)
  101. v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
  102. v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 8, s->linesize, pq);
  103. if (s->mb_x >= 2 && (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))) {
  104. for (j = 0; j < 2; j++) {
  105. v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
  106. }
  107. }
  108. }
  109. if (s->mb_x == s->mb_width - 1) {
  110. if (s->mb_x)
  111. v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
  112. v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
  113. if (s->mb_x && (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))) {
  114. for (j = 0; j < 2; j++) {
  115. v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
  116. }
  117. }
  118. }
  119. }
  120. }
  121. }
  122. void ff_vc1_smooth_overlap_filter_iblk(VC1Context *v)
  123. {
  124. MpegEncContext *s = &v->s;
  125. int mb_pos;
  126. if (v->condover == CONDOVER_NONE)
  127. return;
  128. mb_pos = s->mb_x + s->mb_y * s->mb_stride;
  129. /* Within a MB, the horizontal overlap always runs before the vertical.
  130. * To accomplish that, we run the H on left and internal borders of the
  131. * currently decoded MB. Then, we wait for the next overlap iteration
  132. * to do H overlap on the right edge of this MB, before moving over and
  133. * running the V overlap. Therefore, the V overlap makes us trail by one
  134. * MB col and the H overlap filter makes us trail by one MB row. This
  135. * is reflected in the time at which we run the put_pixels loop. */
  136. if (v->condover == CONDOVER_ALL || v->pq >= 9 || v->over_flags_plane[mb_pos]) {
  137. if (s->mb_x && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
  138. v->over_flags_plane[mb_pos - 1])) {
  139. v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][1],
  140. v->block[v->cur_blk_idx][0]);
  141. v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][3],
  142. v->block[v->cur_blk_idx][2]);
  143. if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
  144. v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][4],
  145. v->block[v->cur_blk_idx][4]);
  146. v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][5],
  147. v->block[v->cur_blk_idx][5]);
  148. }
  149. }
  150. v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][0],
  151. v->block[v->cur_blk_idx][1]);
  152. v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][2],
  153. v->block[v->cur_blk_idx][3]);
  154. if (s->mb_x == s->mb_width - 1) {
  155. if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
  156. v->over_flags_plane[mb_pos - s->mb_stride])) {
  157. v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][2],
  158. v->block[v->cur_blk_idx][0]);
  159. v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][3],
  160. v->block[v->cur_blk_idx][1]);
  161. if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
  162. v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][4],
  163. v->block[v->cur_blk_idx][4]);
  164. v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][5],
  165. v->block[v->cur_blk_idx][5]);
  166. }
  167. }
  168. v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][0],
  169. v->block[v->cur_blk_idx][2]);
  170. v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][1],
  171. v->block[v->cur_blk_idx][3]);
  172. }
  173. }
  174. if (s->mb_x && (v->condover == CONDOVER_ALL || v->over_flags_plane[mb_pos - 1])) {
  175. if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
  176. v->over_flags_plane[mb_pos - s->mb_stride - 1])) {
  177. v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][2],
  178. v->block[v->left_blk_idx][0]);
  179. v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][3],
  180. v->block[v->left_blk_idx][1]);
  181. if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
  182. v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][4],
  183. v->block[v->left_blk_idx][4]);
  184. v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][5],
  185. v->block[v->left_blk_idx][5]);
  186. }
  187. }
  188. v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][0],
  189. v->block[v->left_blk_idx][2]);
  190. v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][1],
  191. v->block[v->left_blk_idx][3]);
  192. }
  193. }
  194. static av_always_inline void vc1_apply_p_v_loop_filter(VC1Context *v, int block_num)
  195. {
  196. MpegEncContext *s = &v->s;
  197. int mb_cbp = v->cbp[s->mb_x - s->mb_stride],
  198. block_cbp = mb_cbp >> (block_num * 4), bottom_cbp,
  199. mb_is_intra = v->is_intra[s->mb_x - s->mb_stride],
  200. block_is_intra = mb_is_intra >> block_num, bottom_is_intra;
  201. int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize, ttblk;
  202. uint8_t *dst;
  203. if (block_num > 3) {
  204. dst = s->dest[block_num - 3];
  205. } else {
  206. dst = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 8) * linesize;
  207. }
  208. if (s->mb_y != s->end_mb_y || block_num < 2) {
  209. int16_t (*mv)[2];
  210. int mv_stride;
  211. if (block_num > 3) {
  212. bottom_cbp = v->cbp[s->mb_x] >> (block_num * 4);
  213. bottom_is_intra = v->is_intra[s->mb_x] >> block_num;
  214. mv = &v->luma_mv[s->mb_x - s->mb_stride];
  215. mv_stride = s->mb_stride;
  216. } else {
  217. bottom_cbp = (block_num < 2) ? (mb_cbp >> ((block_num + 2) * 4))
  218. : (v->cbp[s->mb_x] >> ((block_num - 2) * 4));
  219. bottom_is_intra = (block_num < 2) ? (mb_is_intra >> (block_num + 2))
  220. : (v->is_intra[s->mb_x] >> (block_num - 2));
  221. mv_stride = s->b8_stride;
  222. mv = &s->current_picture.motion_val[0][s->block_index[block_num] - 2 * mv_stride];
  223. }
  224. if (bottom_is_intra & 1 || block_is_intra & 1 ||
  225. mv[0][0] != mv[mv_stride][0] || mv[0][1] != mv[mv_stride][1]) {
  226. v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
  227. } else {
  228. idx = ((bottom_cbp >> 2) | block_cbp) & 3;
  229. if (idx == 3) {
  230. v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
  231. } else if (idx) {
  232. if (idx == 1)
  233. v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq);
  234. else
  235. v->vc1dsp.vc1_v_loop_filter4(dst, linesize, v->pq);
  236. }
  237. }
  238. }
  239. dst -= 4 * linesize;
  240. ttblk = (v->ttblk[s->mb_x - s->mb_stride] >> (block_num * 4)) & 0xF;
  241. if (ttblk == TT_4X4 || ttblk == TT_8X4) {
  242. idx = (block_cbp | (block_cbp >> 2)) & 3;
  243. if (idx == 3) {
  244. v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
  245. } else if (idx) {
  246. if (idx == 1)
  247. v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq);
  248. else
  249. v->vc1dsp.vc1_v_loop_filter4(dst, linesize, v->pq);
  250. }
  251. }
  252. }
  253. static av_always_inline void vc1_apply_p_h_loop_filter(VC1Context *v, int block_num)
  254. {
  255. MpegEncContext *s = &v->s;
  256. int mb_cbp = v->cbp[s->mb_x - 1 - s->mb_stride],
  257. block_cbp = mb_cbp >> (block_num * 4), right_cbp,
  258. mb_is_intra = v->is_intra[s->mb_x - 1 - s->mb_stride],
  259. block_is_intra = mb_is_intra >> block_num, right_is_intra;
  260. int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize, ttblk;
  261. uint8_t *dst;
  262. if (block_num > 3) {
  263. dst = s->dest[block_num - 3] - 8 * linesize;
  264. } else {
  265. dst = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 16) * linesize - 8;
  266. }
  267. if (s->mb_x != s->mb_width || !(block_num & 5)) {
  268. int16_t (*mv)[2];
  269. if (block_num > 3) {
  270. right_cbp = v->cbp[s->mb_x - s->mb_stride] >> (block_num * 4);
  271. right_is_intra = v->is_intra[s->mb_x - s->mb_stride] >> block_num;
  272. mv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
  273. } else {
  274. right_cbp = (block_num & 1) ? (v->cbp[s->mb_x - s->mb_stride] >> ((block_num - 1) * 4))
  275. : (mb_cbp >> ((block_num + 1) * 4));
  276. right_is_intra = (block_num & 1) ? (v->is_intra[s->mb_x - s->mb_stride] >> (block_num - 1))
  277. : (mb_is_intra >> (block_num + 1));
  278. mv = &s->current_picture.motion_val[0][s->block_index[block_num] - s->b8_stride * 2 - 2];
  279. }
  280. if (block_is_intra & 1 || right_is_intra & 1 || mv[0][0] != mv[1][0] || mv[0][1] != mv[1][1]) {
  281. v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
  282. } else {
  283. idx = ((right_cbp >> 1) | block_cbp) & 5; // FIXME check
  284. if (idx == 5) {
  285. v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
  286. } else if (idx) {
  287. if (idx == 1)
  288. v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize, linesize, v->pq);
  289. else
  290. v->vc1dsp.vc1_h_loop_filter4(dst, linesize, v->pq);
  291. }
  292. }
  293. }
  294. dst -= 4;
  295. ttblk = (v->ttblk[s->mb_x - s->mb_stride - 1] >> (block_num * 4)) & 0xf;
  296. if (ttblk == TT_4X4 || ttblk == TT_4X8) {
  297. idx = (block_cbp | (block_cbp >> 1)) & 5;
  298. if (idx == 5) {
  299. v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
  300. } else if (idx) {
  301. if (idx == 1)
  302. v->vc1dsp.vc1_h_loop_filter4(dst + linesize * 4, linesize, v->pq);
  303. else
  304. v->vc1dsp.vc1_h_loop_filter4(dst, linesize, v->pq);
  305. }
  306. }
  307. }
  308. void ff_vc1_apply_p_loop_filter(VC1Context *v)
  309. {
  310. MpegEncContext *s = &v->s;
  311. int i;
  312. int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
  313. for (i = 0; i < block_count; i++) {
  314. vc1_apply_p_v_loop_filter(v, i);
  315. }
  316. /* V always precedes H, therefore we run H one MB before V;
  317. * at the end of a row, we catch up to complete the row */
  318. if (s->mb_x) {
  319. for (i = 0; i < block_count; i++) {
  320. vc1_apply_p_h_loop_filter(v, i);
  321. }
  322. if (s->mb_x == s->mb_width - 1) {
  323. s->mb_x++;
  324. ff_update_block_index(s);
  325. for (i = 0; i < block_count; i++) {
  326. vc1_apply_p_h_loop_filter(v, i);
  327. }
  328. }
  329. }
  330. }