You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

329 lines
12KB

  1. /*
  2. * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  3. * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * H.264 / AVC / MPEG-4 part10 DSP functions.
  24. * @author Michael Niedermayer <michaelni@gmx.at>
  25. */
  26. #include "bit_depth_template.c"
  27. #define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom )
  28. #define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
  29. #define H264_WEIGHT(W) \
  30. static void FUNCC(weight_h264_pixels ## W)(uint8_t *_block, int stride, int height, \
  31. int log2_denom, int weight, int offset) \
  32. { \
  33. int y; \
  34. pixel *block = (pixel*)_block; \
  35. stride >>= sizeof(pixel)-1; \
  36. offset = (unsigned)offset << (log2_denom + (BIT_DEPTH-8)); \
  37. if(log2_denom) offset += 1<<(log2_denom-1); \
  38. for (y = 0; y < height; y++, block += stride) { \
  39. op_scale1(0); \
  40. op_scale1(1); \
  41. if(W==2) continue; \
  42. op_scale1(2); \
  43. op_scale1(3); \
  44. if(W==4) continue; \
  45. op_scale1(4); \
  46. op_scale1(5); \
  47. op_scale1(6); \
  48. op_scale1(7); \
  49. if(W==8) continue; \
  50. op_scale1(8); \
  51. op_scale1(9); \
  52. op_scale1(10); \
  53. op_scale1(11); \
  54. op_scale1(12); \
  55. op_scale1(13); \
  56. op_scale1(14); \
  57. op_scale1(15); \
  58. } \
  59. } \
  60. static void FUNCC(biweight_h264_pixels ## W)(uint8_t *_dst, uint8_t *_src, int stride, int height, \
  61. int log2_denom, int weightd, int weights, int offset) \
  62. { \
  63. int y; \
  64. pixel *dst = (pixel*)_dst; \
  65. pixel *src = (pixel*)_src; \
  66. stride >>= sizeof(pixel)-1; \
  67. offset = (unsigned)offset << (BIT_DEPTH-8); \
  68. offset = (unsigned)((offset + 1) | 1) << log2_denom; \
  69. for (y = 0; y < height; y++, dst += stride, src += stride) { \
  70. op_scale2(0); \
  71. op_scale2(1); \
  72. if(W==2) continue; \
  73. op_scale2(2); \
  74. op_scale2(3); \
  75. if(W==4) continue; \
  76. op_scale2(4); \
  77. op_scale2(5); \
  78. op_scale2(6); \
  79. op_scale2(7); \
  80. if(W==8) continue; \
  81. op_scale2(8); \
  82. op_scale2(9); \
  83. op_scale2(10); \
  84. op_scale2(11); \
  85. op_scale2(12); \
  86. op_scale2(13); \
  87. op_scale2(14); \
  88. op_scale2(15); \
  89. } \
  90. }
  91. H264_WEIGHT(16)
  92. H264_WEIGHT(8)
  93. H264_WEIGHT(4)
  94. H264_WEIGHT(2)
  95. #undef op_scale1
  96. #undef op_scale2
  97. #undef H264_WEIGHT
  98. static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma)(uint8_t *p_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
  99. {
  100. pixel *pix = (pixel*)p_pix;
  101. int i, d;
  102. xstride >>= sizeof(pixel)-1;
  103. ystride >>= sizeof(pixel)-1;
  104. alpha <<= BIT_DEPTH - 8;
  105. beta <<= BIT_DEPTH - 8;
  106. for( i = 0; i < 4; i++ ) {
  107. const int tc_orig = tc0[i] * (1 << (BIT_DEPTH - 8));
  108. if( tc_orig < 0 ) {
  109. pix += inner_iters*ystride;
  110. continue;
  111. }
  112. for( d = 0; d < inner_iters; d++ ) {
  113. const int p0 = pix[-1*xstride];
  114. const int p1 = pix[-2*xstride];
  115. const int p2 = pix[-3*xstride];
  116. const int q0 = pix[0];
  117. const int q1 = pix[1*xstride];
  118. const int q2 = pix[2*xstride];
  119. if( FFABS( p0 - q0 ) < alpha &&
  120. FFABS( p1 - p0 ) < beta &&
  121. FFABS( q1 - q0 ) < beta ) {
  122. int tc = tc_orig;
  123. int i_delta;
  124. if( FFABS( p2 - p0 ) < beta ) {
  125. if(tc_orig)
  126. pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc_orig, tc_orig );
  127. tc++;
  128. }
  129. if( FFABS( q2 - q0 ) < beta ) {
  130. if(tc_orig)
  131. pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc_orig, tc_orig );
  132. tc++;
  133. }
  134. i_delta = av_clip( (((q0 - p0 ) * 4) + (p1 - q1) + 4) >> 3, -tc, tc );
  135. pix[-xstride] = av_clip_pixel( p0 + i_delta ); /* p0' */
  136. pix[0] = av_clip_pixel( q0 - i_delta ); /* q0' */
  137. }
  138. pix += ystride;
  139. }
  140. }
  141. }
  142. static void FUNCC(h264_v_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
  143. {
  144. FUNCC(h264_loop_filter_luma)(pix, stride, sizeof(pixel), 4, alpha, beta, tc0);
  145. }
  146. static void FUNCC(h264_h_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
  147. {
  148. FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0);
  149. }
  150. static void FUNCC(h264_h_loop_filter_luma_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
  151. {
  152. FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0);
  153. }
  154. static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma_intra)(uint8_t *p_pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
  155. {
  156. pixel *pix = (pixel*)p_pix;
  157. int d;
  158. xstride >>= sizeof(pixel)-1;
  159. ystride >>= sizeof(pixel)-1;
  160. alpha <<= BIT_DEPTH - 8;
  161. beta <<= BIT_DEPTH - 8;
  162. for( d = 0; d < 4 * inner_iters; d++ ) {
  163. const int p2 = pix[-3*xstride];
  164. const int p1 = pix[-2*xstride];
  165. const int p0 = pix[-1*xstride];
  166. const int q0 = pix[ 0*xstride];
  167. const int q1 = pix[ 1*xstride];
  168. const int q2 = pix[ 2*xstride];
  169. if( FFABS( p0 - q0 ) < alpha &&
  170. FFABS( p1 - p0 ) < beta &&
  171. FFABS( q1 - q0 ) < beta ) {
  172. if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
  173. if( FFABS( p2 - p0 ) < beta)
  174. {
  175. const int p3 = pix[-4*xstride];
  176. /* p0', p1', p2' */
  177. pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
  178. pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
  179. pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
  180. } else {
  181. /* p0' */
  182. pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
  183. }
  184. if( FFABS( q2 - q0 ) < beta)
  185. {
  186. const int q3 = pix[3*xstride];
  187. /* q0', q1', q2' */
  188. pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
  189. pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
  190. pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
  191. } else {
  192. /* q0' */
  193. pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
  194. }
  195. }else{
  196. /* p0', q0' */
  197. pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
  198. pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
  199. }
  200. }
  201. pix += ystride;
  202. }
  203. }
  204. static void FUNCC(h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta)
  205. {
  206. FUNCC(h264_loop_filter_luma_intra)(pix, stride, sizeof(pixel), 4, alpha, beta);
  207. }
  208. static void FUNCC(h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta)
  209. {
  210. FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta);
  211. }
  212. static void FUNCC(h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta)
  213. {
  214. FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta);
  215. }
  216. static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma)(uint8_t *p_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
  217. {
  218. pixel *pix = (pixel*)p_pix;
  219. int i, d;
  220. alpha <<= BIT_DEPTH - 8;
  221. beta <<= BIT_DEPTH - 8;
  222. xstride >>= sizeof(pixel)-1;
  223. ystride >>= sizeof(pixel)-1;
  224. for( i = 0; i < 4; i++ ) {
  225. const int tc = ((tc0[i] - 1U) << (BIT_DEPTH - 8)) + 1;
  226. if( tc <= 0 ) {
  227. pix += inner_iters*ystride;
  228. continue;
  229. }
  230. for( d = 0; d < inner_iters; d++ ) {
  231. const int p0 = pix[-1*xstride];
  232. const int p1 = pix[-2*xstride];
  233. const int q0 = pix[0];
  234. const int q1 = pix[1*xstride];
  235. if( FFABS( p0 - q0 ) < alpha &&
  236. FFABS( p1 - p0 ) < beta &&
  237. FFABS( q1 - q0 ) < beta ) {
  238. int delta = av_clip( ((q0 - p0) * 4 + (p1 - q1) + 4) >> 3, -tc, tc );
  239. pix[-xstride] = av_clip_pixel( p0 + delta ); /* p0' */
  240. pix[0] = av_clip_pixel( q0 - delta ); /* q0' */
  241. }
  242. pix += ystride;
  243. }
  244. }
  245. }
  246. static void FUNCC(h264_v_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
  247. {
  248. FUNCC(h264_loop_filter_chroma)(pix, stride, sizeof(pixel), 2, alpha, beta, tc0);
  249. }
  250. static void FUNCC(h264_h_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
  251. {
  252. FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0);
  253. }
  254. static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
  255. {
  256. FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0);
  257. }
  258. static void FUNCC(h264_h_loop_filter_chroma422)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
  259. {
  260. FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0);
  261. }
  262. static void FUNCC(h264_h_loop_filter_chroma422_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
  263. {
  264. FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0);
  265. }
  266. static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *p_pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
  267. {
  268. pixel *pix = (pixel*)p_pix;
  269. int d;
  270. xstride >>= sizeof(pixel)-1;
  271. ystride >>= sizeof(pixel)-1;
  272. alpha <<= BIT_DEPTH - 8;
  273. beta <<= BIT_DEPTH - 8;
  274. for( d = 0; d < 4 * inner_iters; d++ ) {
  275. const int p0 = pix[-1*xstride];
  276. const int p1 = pix[-2*xstride];
  277. const int q0 = pix[0];
  278. const int q1 = pix[1*xstride];
  279. if( FFABS( p0 - q0 ) < alpha &&
  280. FFABS( p1 - p0 ) < beta &&
  281. FFABS( q1 - q0 ) < beta ) {
  282. pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
  283. pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
  284. }
  285. pix += ystride;
  286. }
  287. }
  288. static void FUNCC(h264_v_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta)
  289. {
  290. FUNCC(h264_loop_filter_chroma_intra)(pix, stride, sizeof(pixel), 2, alpha, beta);
  291. }
  292. static void FUNCC(h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta)
  293. {
  294. FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta);
  295. }
  296. static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta)
  297. {
  298. FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta);
  299. }
  300. static void FUNCC(h264_h_loop_filter_chroma422_intra)(uint8_t *pix, int stride, int alpha, int beta)
  301. {
  302. FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta);
  303. }
  304. static void FUNCC(h264_h_loop_filter_chroma422_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta)
  305. {
  306. FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta);
  307. }