You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1247 lines
50KB

  1. /*
  2. * DSP utils
  3. * Copyright (c) 2000, 2001 Fabrice Bellard
  4. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  5. *
  6. * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
  7. *
  8. * This file is part of Libav.
  9. *
  10. * Libav is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Lesser General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2.1 of the License, or (at your option) any later version.
  14. *
  15. * Libav is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Lesser General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Lesser General Public
  21. * License along with Libav; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23. */
  24. /**
  25. * @file
  26. * DSP utils
  27. */
  28. #include "bit_depth_template.c"
  29. static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  30. {
  31. int i;
  32. for(i=0; i<h; i++)
  33. {
  34. AV_WN2P(dst , AV_RN2P(src ));
  35. dst+=dstStride;
  36. src+=srcStride;
  37. }
  38. }
  39. static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  40. {
  41. int i;
  42. for(i=0; i<h; i++)
  43. {
  44. AV_WN4P(dst , AV_RN4P(src ));
  45. dst+=dstStride;
  46. src+=srcStride;
  47. }
  48. }
  49. static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  50. {
  51. int i;
  52. for(i=0; i<h; i++)
  53. {
  54. AV_WN4P(dst , AV_RN4P(src ));
  55. AV_WN4P(dst+4*sizeof(pixel), AV_RN4P(src+4*sizeof(pixel)));
  56. dst+=dstStride;
  57. src+=srcStride;
  58. }
  59. }
  60. static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  61. {
  62. int i;
  63. for(i=0; i<h; i++)
  64. {
  65. AV_WN4P(dst , AV_RN4P(src ));
  66. AV_WN4P(dst+ 4*sizeof(pixel), AV_RN4P(src+ 4*sizeof(pixel)));
  67. AV_WN4P(dst+ 8*sizeof(pixel), AV_RN4P(src+ 8*sizeof(pixel)));
  68. AV_WN4P(dst+12*sizeof(pixel), AV_RN4P(src+12*sizeof(pixel)));
  69. dst+=dstStride;
  70. src+=srcStride;
  71. }
  72. }
  73. /* draw the edges of width 'w' of an image of size width, height */
  74. //FIXME check that this is ok for mpeg4 interlaced
  75. static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, int w, int h, int sides)
  76. {
  77. pixel *buf = (pixel*)_buf;
  78. int wrap = _wrap / sizeof(pixel);
  79. pixel *ptr, *last_line;
  80. int i;
  81. /* left and right */
  82. ptr = buf;
  83. for(i=0;i<height;i++) {
  84. #if BIT_DEPTH > 8
  85. int j;
  86. for (j = 0; j < w; j++) {
  87. ptr[j-w] = ptr[0];
  88. ptr[j+width] = ptr[width-1];
  89. }
  90. #else
  91. memset(ptr - w, ptr[0], w);
  92. memset(ptr + width, ptr[width-1], w);
  93. #endif
  94. ptr += wrap;
  95. }
  96. /* top and bottom + corners */
  97. buf -= w;
  98. last_line = buf + (height - 1) * wrap;
  99. if (sides & EDGE_TOP)
  100. for(i = 0; i < h; i++)
  101. memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top
  102. if (sides & EDGE_BOTTOM)
  103. for (i = 0; i < h; i++)
  104. memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
  105. }
  106. /**
  107. * Copy a rectangular area of samples to a temporary buffer and replicate the border samples.
  108. * @param buf destination buffer
  109. * @param src source buffer
  110. * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
  111. * @param block_w width of block
  112. * @param block_h height of block
  113. * @param src_x x coordinate of the top left sample of the block in the source buffer
  114. * @param src_y y coordinate of the top left sample of the block in the source buffer
  115. * @param w width of the source buffer
  116. * @param h height of the source buffer
  117. */
  118. void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h,
  119. int src_x, int src_y, int w, int h){
  120. int x, y;
  121. int start_y, start_x, end_y, end_x;
  122. if(src_y>= h){
  123. src+= (h-1-src_y)*linesize;
  124. src_y=h-1;
  125. }else if(src_y<=-block_h){
  126. src+= (1-block_h-src_y)*linesize;
  127. src_y=1-block_h;
  128. }
  129. if(src_x>= w){
  130. src+= (w-1-src_x)*sizeof(pixel);
  131. src_x=w-1;
  132. }else if(src_x<=-block_w){
  133. src+= (1-block_w-src_x)*sizeof(pixel);
  134. src_x=1-block_w;
  135. }
  136. start_y= FFMAX(0, -src_y);
  137. start_x= FFMAX(0, -src_x);
  138. end_y= FFMIN(block_h, h-src_y);
  139. end_x= FFMIN(block_w, w-src_x);
  140. assert(start_y < end_y && block_h);
  141. assert(start_x < end_x && block_w);
  142. w = end_x - start_x;
  143. src += start_y*linesize + start_x*sizeof(pixel);
  144. buf += start_x*sizeof(pixel);
  145. //top
  146. for(y=0; y<start_y; y++){
  147. memcpy(buf, src, w*sizeof(pixel));
  148. buf += linesize;
  149. }
  150. // copy existing part
  151. for(; y<end_y; y++){
  152. memcpy(buf, src, w*sizeof(pixel));
  153. src += linesize;
  154. buf += linesize;
  155. }
  156. //bottom
  157. src -= linesize;
  158. for(; y<block_h; y++){
  159. memcpy(buf, src, w*sizeof(pixel));
  160. buf += linesize;
  161. }
  162. buf -= block_h * linesize + start_x*sizeof(pixel);
  163. while (block_h--){
  164. pixel *bufp = (pixel*)buf;
  165. //left
  166. for(x=0; x<start_x; x++){
  167. bufp[x] = bufp[start_x];
  168. }
  169. //right
  170. for(x=end_x; x<block_w; x++){
  171. bufp[x] = bufp[end_x - 1];
  172. }
  173. buf += linesize;
  174. }
  175. }
  176. static void FUNCC(add_pixels8)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
  177. {
  178. int i;
  179. pixel *restrict pixels = (pixel *restrict)_pixels;
  180. dctcoef *block = (dctcoef*)_block;
  181. line_size /= sizeof(pixel);
  182. for(i=0;i<8;i++) {
  183. pixels[0] += block[0];
  184. pixels[1] += block[1];
  185. pixels[2] += block[2];
  186. pixels[3] += block[3];
  187. pixels[4] += block[4];
  188. pixels[5] += block[5];
  189. pixels[6] += block[6];
  190. pixels[7] += block[7];
  191. pixels += line_size;
  192. block += 8;
  193. }
  194. }
  195. static void FUNCC(add_pixels4)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
  196. {
  197. int i;
  198. pixel *restrict pixels = (pixel *restrict)_pixels;
  199. dctcoef *block = (dctcoef*)_block;
  200. line_size /= sizeof(pixel);
  201. for(i=0;i<4;i++) {
  202. pixels[0] += block[0];
  203. pixels[1] += block[1];
  204. pixels[2] += block[2];
  205. pixels[3] += block[3];
  206. pixels += line_size;
  207. block += 4;
  208. }
  209. }
  210. #define PIXOP2(OPNAME, OP) \
  211. static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  212. int i;\
  213. for(i=0; i<h; i++){\
  214. OP(*((pixel2*)(block )), AV_RN2P(pixels ));\
  215. pixels+=line_size;\
  216. block +=line_size;\
  217. }\
  218. }\
  219. static void FUNCC(OPNAME ## _pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  220. int i;\
  221. for(i=0; i<h; i++){\
  222. OP(*((pixel4*)(block )), AV_RN4P(pixels ));\
  223. pixels+=line_size;\
  224. block +=line_size;\
  225. }\
  226. }\
  227. static void FUNCC(OPNAME ## _pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  228. int i;\
  229. for(i=0; i<h; i++){\
  230. OP(*((pixel4*)(block )), AV_RN4P(pixels ));\
  231. OP(*((pixel4*)(block+4*sizeof(pixel))), AV_RN4P(pixels+4*sizeof(pixel)));\
  232. pixels+=line_size;\
  233. block +=line_size;\
  234. }\
  235. }\
  236. static inline void FUNCC(OPNAME ## _no_rnd_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  237. FUNCC(OPNAME ## _pixels8)(block, pixels, line_size, h);\
  238. }\
  239. \
  240. static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  241. int src_stride1, int src_stride2, int h){\
  242. int i;\
  243. for(i=0; i<h; i++){\
  244. pixel4 a,b;\
  245. a= AV_RN4P(&src1[i*src_stride1 ]);\
  246. b= AV_RN4P(&src2[i*src_stride2 ]);\
  247. OP(*((pixel4*)&dst[i*dst_stride ]), no_rnd_avg_pixel4(a, b));\
  248. a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
  249. b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
  250. OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), no_rnd_avg_pixel4(a, b));\
  251. }\
  252. }\
  253. \
  254. static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  255. int src_stride1, int src_stride2, int h){\
  256. int i;\
  257. for(i=0; i<h; i++){\
  258. pixel4 a,b;\
  259. a= AV_RN4P(&src1[i*src_stride1 ]);\
  260. b= AV_RN4P(&src2[i*src_stride2 ]);\
  261. OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  262. a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
  263. b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
  264. OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), rnd_avg_pixel4(a, b));\
  265. }\
  266. }\
  267. \
  268. static inline void FUNC(OPNAME ## _pixels4_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  269. int src_stride1, int src_stride2, int h){\
  270. int i;\
  271. for(i=0; i<h; i++){\
  272. pixel4 a,b;\
  273. a= AV_RN4P(&src1[i*src_stride1 ]);\
  274. b= AV_RN4P(&src2[i*src_stride2 ]);\
  275. OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  276. }\
  277. }\
  278. \
  279. static inline void FUNC(OPNAME ## _pixels2_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  280. int src_stride1, int src_stride2, int h){\
  281. int i;\
  282. for(i=0; i<h; i++){\
  283. pixel4 a,b;\
  284. a= AV_RN2P(&src1[i*src_stride1 ]);\
  285. b= AV_RN2P(&src2[i*src_stride2 ]);\
  286. OP(*((pixel2*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  287. }\
  288. }\
  289. \
  290. static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  291. int src_stride1, int src_stride2, int h){\
  292. FUNC(OPNAME ## _pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
  293. FUNC(OPNAME ## _pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
  294. }\
  295. \
  296. static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  297. int src_stride1, int src_stride2, int h){\
  298. FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
  299. FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
  300. }\
  301. \
  302. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  303. FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  304. }\
  305. \
  306. static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  307. FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  308. }\
  309. \
  310. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  311. FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  312. }\
  313. \
  314. static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  315. FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  316. }\
  317. \
  318. static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  319. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  320. /* FIXME HIGH BIT DEPTH */\
  321. int i;\
  322. for(i=0; i<h; i++){\
  323. uint32_t a, b, c, d, l0, l1, h0, h1;\
  324. a= AV_RN32(&src1[i*src_stride1]);\
  325. b= AV_RN32(&src2[i*src_stride2]);\
  326. c= AV_RN32(&src3[i*src_stride3]);\
  327. d= AV_RN32(&src4[i*src_stride4]);\
  328. l0= (a&0x03030303UL)\
  329. + (b&0x03030303UL)\
  330. + 0x02020202UL;\
  331. h0= ((a&0xFCFCFCFCUL)>>2)\
  332. + ((b&0xFCFCFCFCUL)>>2);\
  333. l1= (c&0x03030303UL)\
  334. + (d&0x03030303UL);\
  335. h1= ((c&0xFCFCFCFCUL)>>2)\
  336. + ((d&0xFCFCFCFCUL)>>2);\
  337. OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  338. a= AV_RN32(&src1[i*src_stride1+4]);\
  339. b= AV_RN32(&src2[i*src_stride2+4]);\
  340. c= AV_RN32(&src3[i*src_stride3+4]);\
  341. d= AV_RN32(&src4[i*src_stride4+4]);\
  342. l0= (a&0x03030303UL)\
  343. + (b&0x03030303UL)\
  344. + 0x02020202UL;\
  345. h0= ((a&0xFCFCFCFCUL)>>2)\
  346. + ((b&0xFCFCFCFCUL)>>2);\
  347. l1= (c&0x03030303UL)\
  348. + (d&0x03030303UL);\
  349. h1= ((c&0xFCFCFCFCUL)>>2)\
  350. + ((d&0xFCFCFCFCUL)>>2);\
  351. OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  352. }\
  353. }\
  354. \
  355. static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  356. FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  357. }\
  358. \
  359. static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  360. FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  361. }\
  362. \
  363. static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  364. FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  365. }\
  366. \
  367. static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  368. FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  369. }\
  370. \
  371. static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  372. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  373. /* FIXME HIGH BIT DEPTH*/\
  374. int i;\
  375. for(i=0; i<h; i++){\
  376. uint32_t a, b, c, d, l0, l1, h0, h1;\
  377. a= AV_RN32(&src1[i*src_stride1]);\
  378. b= AV_RN32(&src2[i*src_stride2]);\
  379. c= AV_RN32(&src3[i*src_stride3]);\
  380. d= AV_RN32(&src4[i*src_stride4]);\
  381. l0= (a&0x03030303UL)\
  382. + (b&0x03030303UL)\
  383. + 0x01010101UL;\
  384. h0= ((a&0xFCFCFCFCUL)>>2)\
  385. + ((b&0xFCFCFCFCUL)>>2);\
  386. l1= (c&0x03030303UL)\
  387. + (d&0x03030303UL);\
  388. h1= ((c&0xFCFCFCFCUL)>>2)\
  389. + ((d&0xFCFCFCFCUL)>>2);\
  390. OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  391. a= AV_RN32(&src1[i*src_stride1+4]);\
  392. b= AV_RN32(&src2[i*src_stride2+4]);\
  393. c= AV_RN32(&src3[i*src_stride3+4]);\
  394. d= AV_RN32(&src4[i*src_stride4+4]);\
  395. l0= (a&0x03030303UL)\
  396. + (b&0x03030303UL)\
  397. + 0x01010101UL;\
  398. h0= ((a&0xFCFCFCFCUL)>>2)\
  399. + ((b&0xFCFCFCFCUL)>>2);\
  400. l1= (c&0x03030303UL)\
  401. + (d&0x03030303UL);\
  402. h1= ((c&0xFCFCFCFCUL)>>2)\
  403. + ((d&0xFCFCFCFCUL)>>2);\
  404. OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  405. }\
  406. }\
  407. static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  408. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  409. FUNC(OPNAME ## _pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  410. FUNC(OPNAME ## _pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  411. }\
  412. static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  413. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  414. FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  415. FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  416. }\
  417. \
  418. static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t *_pixels, int line_size, int h)\
  419. {\
  420. int i, a0, b0, a1, b1;\
  421. pixel *block = (pixel*)_block;\
  422. const pixel *pixels = (const pixel*)_pixels;\
  423. line_size /= sizeof(pixel);\
  424. a0= pixels[0];\
  425. b0= pixels[1] + 2;\
  426. a0 += b0;\
  427. b0 += pixels[2];\
  428. \
  429. pixels+=line_size;\
  430. for(i=0; i<h; i+=2){\
  431. a1= pixels[0];\
  432. b1= pixels[1];\
  433. a1 += b1;\
  434. b1 += pixels[2];\
  435. \
  436. block[0]= (a1+a0)>>2; /* FIXME non put */\
  437. block[1]= (b1+b0)>>2;\
  438. \
  439. pixels+=line_size;\
  440. block +=line_size;\
  441. \
  442. a0= pixels[0];\
  443. b0= pixels[1] + 2;\
  444. a0 += b0;\
  445. b0 += pixels[2];\
  446. \
  447. block[0]= (a1+a0)>>2;\
  448. block[1]= (b1+b0)>>2;\
  449. pixels+=line_size;\
  450. block +=line_size;\
  451. }\
  452. }\
  453. \
  454. static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  455. {\
  456. /* FIXME HIGH BIT DEPTH */\
  457. int i;\
  458. const uint32_t a= AV_RN32(pixels );\
  459. const uint32_t b= AV_RN32(pixels+1);\
  460. uint32_t l0= (a&0x03030303UL)\
  461. + (b&0x03030303UL)\
  462. + 0x02020202UL;\
  463. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  464. + ((b&0xFCFCFCFCUL)>>2);\
  465. uint32_t l1,h1;\
  466. \
  467. pixels+=line_size;\
  468. for(i=0; i<h; i+=2){\
  469. uint32_t a= AV_RN32(pixels );\
  470. uint32_t b= AV_RN32(pixels+1);\
  471. l1= (a&0x03030303UL)\
  472. + (b&0x03030303UL);\
  473. h1= ((a&0xFCFCFCFCUL)>>2)\
  474. + ((b&0xFCFCFCFCUL)>>2);\
  475. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  476. pixels+=line_size;\
  477. block +=line_size;\
  478. a= AV_RN32(pixels );\
  479. b= AV_RN32(pixels+1);\
  480. l0= (a&0x03030303UL)\
  481. + (b&0x03030303UL)\
  482. + 0x02020202UL;\
  483. h0= ((a&0xFCFCFCFCUL)>>2)\
  484. + ((b&0xFCFCFCFCUL)>>2);\
  485. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  486. pixels+=line_size;\
  487. block +=line_size;\
  488. }\
  489. }\
  490. \
  491. static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  492. {\
  493. /* FIXME HIGH BIT DEPTH */\
  494. int j;\
  495. for(j=0; j<2; j++){\
  496. int i;\
  497. const uint32_t a= AV_RN32(pixels );\
  498. const uint32_t b= AV_RN32(pixels+1);\
  499. uint32_t l0= (a&0x03030303UL)\
  500. + (b&0x03030303UL)\
  501. + 0x02020202UL;\
  502. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  503. + ((b&0xFCFCFCFCUL)>>2);\
  504. uint32_t l1,h1;\
  505. \
  506. pixels+=line_size;\
  507. for(i=0; i<h; i+=2){\
  508. uint32_t a= AV_RN32(pixels );\
  509. uint32_t b= AV_RN32(pixels+1);\
  510. l1= (a&0x03030303UL)\
  511. + (b&0x03030303UL);\
  512. h1= ((a&0xFCFCFCFCUL)>>2)\
  513. + ((b&0xFCFCFCFCUL)>>2);\
  514. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  515. pixels+=line_size;\
  516. block +=line_size;\
  517. a= AV_RN32(pixels );\
  518. b= AV_RN32(pixels+1);\
  519. l0= (a&0x03030303UL)\
  520. + (b&0x03030303UL)\
  521. + 0x02020202UL;\
  522. h0= ((a&0xFCFCFCFCUL)>>2)\
  523. + ((b&0xFCFCFCFCUL)>>2);\
  524. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  525. pixels+=line_size;\
  526. block +=line_size;\
  527. }\
  528. pixels+=4-line_size*(h+1);\
  529. block +=4-line_size*h;\
  530. }\
  531. }\
  532. \
  533. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  534. {\
  535. /* FIXME HIGH BIT DEPTH */\
  536. int j;\
  537. for(j=0; j<2; j++){\
  538. int i;\
  539. const uint32_t a= AV_RN32(pixels );\
  540. const uint32_t b= AV_RN32(pixels+1);\
  541. uint32_t l0= (a&0x03030303UL)\
  542. + (b&0x03030303UL)\
  543. + 0x01010101UL;\
  544. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  545. + ((b&0xFCFCFCFCUL)>>2);\
  546. uint32_t l1,h1;\
  547. \
  548. pixels+=line_size;\
  549. for(i=0; i<h; i+=2){\
  550. uint32_t a= AV_RN32(pixels );\
  551. uint32_t b= AV_RN32(pixels+1);\
  552. l1= (a&0x03030303UL)\
  553. + (b&0x03030303UL);\
  554. h1= ((a&0xFCFCFCFCUL)>>2)\
  555. + ((b&0xFCFCFCFCUL)>>2);\
  556. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  557. pixels+=line_size;\
  558. block +=line_size;\
  559. a= AV_RN32(pixels );\
  560. b= AV_RN32(pixels+1);\
  561. l0= (a&0x03030303UL)\
  562. + (b&0x03030303UL)\
  563. + 0x01010101UL;\
  564. h0= ((a&0xFCFCFCFCUL)>>2)\
  565. + ((b&0xFCFCFCFCUL)>>2);\
  566. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  567. pixels+=line_size;\
  568. block +=line_size;\
  569. }\
  570. pixels+=4-line_size*(h+1);\
  571. block +=4-line_size*h;\
  572. }\
  573. }\
  574. \
  575. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
  576. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2) , FUNCC(OPNAME ## _pixels8_x2) , 8*sizeof(pixel))\
  577. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2) , FUNCC(OPNAME ## _pixels8_y2) , 8*sizeof(pixel))\
  578. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2), FUNCC(OPNAME ## _pixels8_xy2), 8*sizeof(pixel))\
  579. av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
  580. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2) , FUNCC(OPNAME ## _no_rnd_pixels8_x2) , 8*sizeof(pixel))\
  581. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pixels8_y2) , 8*sizeof(pixel))\
  582. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\
  583. #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
  584. #define op_put(a, b) a = b
  585. PIXOP2(avg, op_avg)
  586. PIXOP2(put, op_put)
  587. #undef op_avg
  588. #undef op_put
  589. #define put_no_rnd_pixels8_c put_pixels8_c
  590. #define put_no_rnd_pixels16_c put_pixels16_c
  591. static void FUNCC(put_no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
  592. FUNC(put_no_rnd_pixels16_l2)(dst, a, b, stride, stride, stride, h);
  593. }
  594. static void FUNCC(put_no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
  595. FUNC(put_no_rnd_pixels8_l2)(dst, a, b, stride, stride, stride, h);
  596. }
  597. #define H264_CHROMA_MC(OPNAME, OP)\
  598. static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
  599. pixel *dst = (pixel*)_dst;\
  600. pixel *src = (pixel*)_src;\
  601. const int A=(8-x)*(8-y);\
  602. const int B=( x)*(8-y);\
  603. const int C=(8-x)*( y);\
  604. const int D=( x)*( y);\
  605. int i;\
  606. stride /= sizeof(pixel);\
  607. \
  608. assert(x<8 && y<8 && x>=0 && y>=0);\
  609. \
  610. if(D){\
  611. for(i=0; i<h; i++){\
  612. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  613. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  614. dst+= stride;\
  615. src+= stride;\
  616. }\
  617. }else{\
  618. const int E= B+C;\
  619. const int step= C ? stride : 1;\
  620. for(i=0; i<h; i++){\
  621. OP(dst[0], (A*src[0] + E*src[step+0]));\
  622. OP(dst[1], (A*src[1] + E*src[step+1]));\
  623. dst+= stride;\
  624. src+= stride;\
  625. }\
  626. }\
  627. }\
  628. \
  629. static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
  630. pixel *dst = (pixel*)_dst;\
  631. pixel *src = (pixel*)_src;\
  632. const int A=(8-x)*(8-y);\
  633. const int B=( x)*(8-y);\
  634. const int C=(8-x)*( y);\
  635. const int D=( x)*( y);\
  636. int i;\
  637. stride /= sizeof(pixel);\
  638. \
  639. assert(x<8 && y<8 && x>=0 && y>=0);\
  640. \
  641. if(D){\
  642. for(i=0; i<h; i++){\
  643. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  644. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  645. OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
  646. OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
  647. dst+= stride;\
  648. src+= stride;\
  649. }\
  650. }else{\
  651. const int E= B+C;\
  652. const int step= C ? stride : 1;\
  653. for(i=0; i<h; i++){\
  654. OP(dst[0], (A*src[0] + E*src[step+0]));\
  655. OP(dst[1], (A*src[1] + E*src[step+1]));\
  656. OP(dst[2], (A*src[2] + E*src[step+2]));\
  657. OP(dst[3], (A*src[3] + E*src[step+3]));\
  658. dst+= stride;\
  659. src+= stride;\
  660. }\
  661. }\
  662. }\
  663. \
  664. static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
  665. pixel *dst = (pixel*)_dst;\
  666. pixel *src = (pixel*)_src;\
  667. const int A=(8-x)*(8-y);\
  668. const int B=( x)*(8-y);\
  669. const int C=(8-x)*( y);\
  670. const int D=( x)*( y);\
  671. int i;\
  672. stride /= sizeof(pixel);\
  673. \
  674. assert(x<8 && y<8 && x>=0 && y>=0);\
  675. \
  676. if(D){\
  677. for(i=0; i<h; i++){\
  678. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  679. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  680. OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
  681. OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
  682. OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
  683. OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
  684. OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
  685. OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
  686. dst+= stride;\
  687. src+= stride;\
  688. }\
  689. }else{\
  690. const int E= B+C;\
  691. const int step= C ? stride : 1;\
  692. for(i=0; i<h; i++){\
  693. OP(dst[0], (A*src[0] + E*src[step+0]));\
  694. OP(dst[1], (A*src[1] + E*src[step+1]));\
  695. OP(dst[2], (A*src[2] + E*src[step+2]));\
  696. OP(dst[3], (A*src[3] + E*src[step+3]));\
  697. OP(dst[4], (A*src[4] + E*src[step+4]));\
  698. OP(dst[5], (A*src[5] + E*src[step+5]));\
  699. OP(dst[6], (A*src[6] + E*src[step+6]));\
  700. OP(dst[7], (A*src[7] + E*src[step+7]));\
  701. dst+= stride;\
  702. src+= stride;\
  703. }\
  704. }\
  705. }
  706. #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
  707. #define op_put(a, b) a = (((b) + 32)>>6)
  708. H264_CHROMA_MC(put_ , op_put)
  709. H264_CHROMA_MC(avg_ , op_avg)
  710. #undef op_avg
  711. #undef op_put
  712. #define H264_LOWPASS(OPNAME, OP, OP2) \
  713. static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  714. const int h=2;\
  715. INIT_CLIP\
  716. int i;\
  717. pixel *dst = (pixel*)_dst;\
  718. pixel *src = (pixel*)_src;\
  719. dstStride /= sizeof(pixel);\
  720. srcStride /= sizeof(pixel);\
  721. for(i=0; i<h; i++)\
  722. {\
  723. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
  724. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
  725. dst+=dstStride;\
  726. src+=srcStride;\
  727. }\
  728. }\
  729. \
  730. static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  731. const int w=2;\
  732. INIT_CLIP\
  733. int i;\
  734. pixel *dst = (pixel*)_dst;\
  735. pixel *src = (pixel*)_src;\
  736. dstStride /= sizeof(pixel);\
  737. srcStride /= sizeof(pixel);\
  738. for(i=0; i<w; i++)\
  739. {\
  740. const int srcB= src[-2*srcStride];\
  741. const int srcA= src[-1*srcStride];\
  742. const int src0= src[0 *srcStride];\
  743. const int src1= src[1 *srcStride];\
  744. const int src2= src[2 *srcStride];\
  745. const int src3= src[3 *srcStride];\
  746. const int src4= src[4 *srcStride];\
  747. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  748. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  749. dst++;\
  750. src++;\
  751. }\
  752. }\
  753. \
  754. static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
  755. const int h=2;\
  756. const int w=2;\
  757. const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  758. INIT_CLIP\
  759. int i;\
  760. pixel *dst = (pixel*)_dst;\
  761. pixel *src = (pixel*)_src;\
  762. dstStride /= sizeof(pixel);\
  763. srcStride /= sizeof(pixel);\
  764. src -= 2*srcStride;\
  765. for(i=0; i<h+5; i++)\
  766. {\
  767. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
  768. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
  769. tmp+=tmpStride;\
  770. src+=srcStride;\
  771. }\
  772. tmp -= tmpStride*(h+5-2);\
  773. for(i=0; i<w; i++)\
  774. {\
  775. const int tmpB= tmp[-2*tmpStride] - pad;\
  776. const int tmpA= tmp[-1*tmpStride] - pad;\
  777. const int tmp0= tmp[0 *tmpStride] - pad;\
  778. const int tmp1= tmp[1 *tmpStride] - pad;\
  779. const int tmp2= tmp[2 *tmpStride] - pad;\
  780. const int tmp3= tmp[3 *tmpStride] - pad;\
  781. const int tmp4= tmp[4 *tmpStride] - pad;\
  782. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  783. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  784. dst++;\
  785. tmp++;\
  786. }\
  787. }\
  788. static void FUNC(OPNAME ## h264_qpel4_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  789. const int h=4;\
  790. INIT_CLIP\
  791. int i;\
  792. pixel *dst = (pixel*)_dst;\
  793. pixel *src = (pixel*)_src;\
  794. dstStride /= sizeof(pixel);\
  795. srcStride /= sizeof(pixel);\
  796. for(i=0; i<h; i++)\
  797. {\
  798. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
  799. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
  800. OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
  801. OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
  802. dst+=dstStride;\
  803. src+=srcStride;\
  804. }\
  805. }\
  806. \
  807. static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  808. const int w=4;\
  809. INIT_CLIP\
  810. int i;\
  811. pixel *dst = (pixel*)_dst;\
  812. pixel *src = (pixel*)_src;\
  813. dstStride /= sizeof(pixel);\
  814. srcStride /= sizeof(pixel);\
  815. for(i=0; i<w; i++)\
  816. {\
  817. const int srcB= src[-2*srcStride];\
  818. const int srcA= src[-1*srcStride];\
  819. const int src0= src[0 *srcStride];\
  820. const int src1= src[1 *srcStride];\
  821. const int src2= src[2 *srcStride];\
  822. const int src3= src[3 *srcStride];\
  823. const int src4= src[4 *srcStride];\
  824. const int src5= src[5 *srcStride];\
  825. const int src6= src[6 *srcStride];\
  826. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  827. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  828. OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
  829. OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
  830. dst++;\
  831. src++;\
  832. }\
  833. }\
  834. \
  835. static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
  836. const int h=4;\
  837. const int w=4;\
  838. const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  839. INIT_CLIP\
  840. int i;\
  841. pixel *dst = (pixel*)_dst;\
  842. pixel *src = (pixel*)_src;\
  843. dstStride /= sizeof(pixel);\
  844. srcStride /= sizeof(pixel);\
  845. src -= 2*srcStride;\
  846. for(i=0; i<h+5; i++)\
  847. {\
  848. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
  849. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
  850. tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]) + pad;\
  851. tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]) + pad;\
  852. tmp+=tmpStride;\
  853. src+=srcStride;\
  854. }\
  855. tmp -= tmpStride*(h+5-2);\
  856. for(i=0; i<w; i++)\
  857. {\
  858. const int tmpB= tmp[-2*tmpStride] - pad;\
  859. const int tmpA= tmp[-1*tmpStride] - pad;\
  860. const int tmp0= tmp[0 *tmpStride] - pad;\
  861. const int tmp1= tmp[1 *tmpStride] - pad;\
  862. const int tmp2= tmp[2 *tmpStride] - pad;\
  863. const int tmp3= tmp[3 *tmpStride] - pad;\
  864. const int tmp4= tmp[4 *tmpStride] - pad;\
  865. const int tmp5= tmp[5 *tmpStride] - pad;\
  866. const int tmp6= tmp[6 *tmpStride] - pad;\
  867. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  868. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  869. OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
  870. OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
  871. dst++;\
  872. tmp++;\
  873. }\
  874. }\
  875. \
  876. static void FUNC(OPNAME ## h264_qpel8_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  877. const int h=8;\
  878. INIT_CLIP\
  879. int i;\
  880. pixel *dst = (pixel*)_dst;\
  881. pixel *src = (pixel*)_src;\
  882. dstStride /= sizeof(pixel);\
  883. srcStride /= sizeof(pixel);\
  884. for(i=0; i<h; i++)\
  885. {\
  886. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
  887. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
  888. OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
  889. OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
  890. OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
  891. OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
  892. OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
  893. OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
  894. dst+=dstStride;\
  895. src+=srcStride;\
  896. }\
  897. }\
  898. \
  899. static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  900. const int w=8;\
  901. INIT_CLIP\
  902. int i;\
  903. pixel *dst = (pixel*)_dst;\
  904. pixel *src = (pixel*)_src;\
  905. dstStride /= sizeof(pixel);\
  906. srcStride /= sizeof(pixel);\
  907. for(i=0; i<w; i++)\
  908. {\
  909. const int srcB= src[-2*srcStride];\
  910. const int srcA= src[-1*srcStride];\
  911. const int src0= src[0 *srcStride];\
  912. const int src1= src[1 *srcStride];\
  913. const int src2= src[2 *srcStride];\
  914. const int src3= src[3 *srcStride];\
  915. const int src4= src[4 *srcStride];\
  916. const int src5= src[5 *srcStride];\
  917. const int src6= src[6 *srcStride];\
  918. const int src7= src[7 *srcStride];\
  919. const int src8= src[8 *srcStride];\
  920. const int src9= src[9 *srcStride];\
  921. const int src10=src[10*srcStride];\
  922. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  923. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  924. OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
  925. OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
  926. OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
  927. OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
  928. OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
  929. OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
  930. dst++;\
  931. src++;\
  932. }\
  933. }\
  934. \
  935. static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
  936. const int h=8;\
  937. const int w=8;\
  938. const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  939. INIT_CLIP\
  940. int i;\
  941. pixel *dst = (pixel*)_dst;\
  942. pixel *src = (pixel*)_src;\
  943. dstStride /= sizeof(pixel);\
  944. srcStride /= sizeof(pixel);\
  945. src -= 2*srcStride;\
  946. for(i=0; i<h+5; i++)\
  947. {\
  948. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]) + pad;\
  949. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]) + pad;\
  950. tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]) + pad;\
  951. tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]) + pad;\
  952. tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]) + pad;\
  953. tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]) + pad;\
  954. tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]) + pad;\
  955. tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]) + pad;\
  956. tmp+=tmpStride;\
  957. src+=srcStride;\
  958. }\
  959. tmp -= tmpStride*(h+5-2);\
  960. for(i=0; i<w; i++)\
  961. {\
  962. const int tmpB= tmp[-2*tmpStride] - pad;\
  963. const int tmpA= tmp[-1*tmpStride] - pad;\
  964. const int tmp0= tmp[0 *tmpStride] - pad;\
  965. const int tmp1= tmp[1 *tmpStride] - pad;\
  966. const int tmp2= tmp[2 *tmpStride] - pad;\
  967. const int tmp3= tmp[3 *tmpStride] - pad;\
  968. const int tmp4= tmp[4 *tmpStride] - pad;\
  969. const int tmp5= tmp[5 *tmpStride] - pad;\
  970. const int tmp6= tmp[6 *tmpStride] - pad;\
  971. const int tmp7= tmp[7 *tmpStride] - pad;\
  972. const int tmp8= tmp[8 *tmpStride] - pad;\
  973. const int tmp9= tmp[9 *tmpStride] - pad;\
  974. const int tmp10=tmp[10*tmpStride] - pad;\
  975. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  976. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  977. OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
  978. OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
  979. OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
  980. OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
  981. OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
  982. OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
  983. dst++;\
  984. tmp++;\
  985. }\
  986. }\
  987. \
  988. static void FUNC(OPNAME ## h264_qpel16_v_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
  989. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
  990. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  991. src += 8*srcStride;\
  992. dst += 8*dstStride;\
  993. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
  994. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  995. }\
  996. \
  997. static void FUNC(OPNAME ## h264_qpel16_h_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
  998. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
  999. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  1000. src += 8*srcStride;\
  1001. dst += 8*dstStride;\
  1002. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
  1003. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  1004. }\
  1005. \
  1006. static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
  1007. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
  1008. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
  1009. src += 8*srcStride;\
  1010. dst += 8*dstStride;\
  1011. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
  1012. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
  1013. }\
  1014. #define H264_MC(OPNAME, SIZE) \
  1015. static av_unused void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc00)(uint8_t *dst, uint8_t *src, int stride){\
  1016. FUNCC(OPNAME ## pixels ## SIZE)(dst, src, stride, SIZE);\
  1017. }\
  1018. \
  1019. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc10)(uint8_t *dst, uint8_t *src, int stride){\
  1020. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1021. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
  1022. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src, half, stride, stride, SIZE*sizeof(pixel), SIZE);\
  1023. }\
  1024. \
  1025. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc20)(uint8_t *dst, uint8_t *src, int stride){\
  1026. FUNC(OPNAME ## h264_qpel ## SIZE ## _h_lowpass)(dst, src, stride, stride);\
  1027. }\
  1028. \
  1029. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc30)(uint8_t *dst, uint8_t *src, int stride){\
  1030. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1031. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
  1032. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src+sizeof(pixel), half, stride, stride, SIZE*sizeof(pixel), SIZE);\
  1033. }\
  1034. \
  1035. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, uint8_t *src, int stride){\
  1036. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1037. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1038. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1039. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1040. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1041. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid, half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1042. }\
  1043. \
  1044. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, uint8_t *src, int stride){\
  1045. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1046. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1047. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1048. FUNC(OPNAME ## h264_qpel ## SIZE ## _v_lowpass)(dst, full_mid, stride, SIZE*sizeof(pixel));\
  1049. }\
  1050. \
  1051. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, uint8_t *src, int stride){\
  1052. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1053. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1054. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1055. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1056. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1057. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid+SIZE*sizeof(pixel), half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1058. }\
  1059. \
  1060. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, uint8_t *src, int stride){\
  1061. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1062. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1063. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1064. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1065. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1066. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1067. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1068. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1069. }\
  1070. \
  1071. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, uint8_t *src, int stride){\
  1072. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1073. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1074. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1075. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1076. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1077. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1078. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1079. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1080. }\
  1081. \
  1082. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, uint8_t *src, int stride){\
  1083. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1084. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1085. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1086. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1087. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1088. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1089. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1090. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1091. }\
  1092. \
  1093. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, uint8_t *src, int stride){\
  1094. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1095. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1096. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1097. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1098. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1099. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1100. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1101. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1102. }\
  1103. \
  1104. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, uint8_t *src, int stride){\
  1105. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1106. FUNC(OPNAME ## h264_qpel ## SIZE ## _hv_lowpass)(dst, tmp, src, stride, SIZE*sizeof(pixel), stride);\
  1107. }\
  1108. \
  1109. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, uint8_t *src, int stride){\
  1110. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1111. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1112. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1113. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1114. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1115. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1116. }\
  1117. \
  1118. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, uint8_t *src, int stride){\
  1119. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1120. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1121. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1122. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1123. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1124. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1125. }\
  1126. \
  1127. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, uint8_t *src, int stride){\
  1128. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1129. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1130. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1131. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1132. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1133. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1134. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1135. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1136. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1137. }\
  1138. \
  1139. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, uint8_t *src, int stride){\
  1140. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1141. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1142. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1143. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1144. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1145. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1146. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1147. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1148. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1149. }\
  1150. #define op_avg(a, b) a = (((a)+CLIP(((b) + 16)>>5)+1)>>1)
  1151. //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
  1152. #define op_put(a, b) a = CLIP(((b) + 16)>>5)
  1153. #define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
  1154. #define op2_put(a, b) a = CLIP(((b) + 512)>>10)
  1155. H264_LOWPASS(put_ , op_put, op2_put)
  1156. H264_LOWPASS(avg_ , op_avg, op2_avg)
  1157. H264_MC(put_, 2)
  1158. H264_MC(put_, 4)
  1159. H264_MC(put_, 8)
  1160. H264_MC(put_, 16)
  1161. H264_MC(avg_, 4)
  1162. H264_MC(avg_, 8)
  1163. H264_MC(avg_, 16)
  1164. #undef op_avg
  1165. #undef op_put
  1166. #undef op2_avg
  1167. #undef op2_put
  1168. #if BIT_DEPTH == 8
  1169. # define put_h264_qpel8_mc00_8_c ff_put_pixels8x8_8_c
  1170. # define avg_h264_qpel8_mc00_8_c ff_avg_pixels8x8_8_c
  1171. # define put_h264_qpel16_mc00_8_c ff_put_pixels16x16_8_c
  1172. # define avg_h264_qpel16_mc00_8_c ff_avg_pixels16x16_8_c
  1173. #elif BIT_DEPTH == 9
  1174. # define put_h264_qpel8_mc00_9_c ff_put_pixels8x8_9_c
  1175. # define avg_h264_qpel8_mc00_9_c ff_avg_pixels8x8_9_c
  1176. # define put_h264_qpel16_mc00_9_c ff_put_pixels16x16_9_c
  1177. # define avg_h264_qpel16_mc00_9_c ff_avg_pixels16x16_9_c
  1178. #elif BIT_DEPTH == 10
  1179. # define put_h264_qpel8_mc00_10_c ff_put_pixels8x8_10_c
  1180. # define avg_h264_qpel8_mc00_10_c ff_avg_pixels8x8_10_c
  1181. # define put_h264_qpel16_mc00_10_c ff_put_pixels16x16_10_c
  1182. # define avg_h264_qpel16_mc00_10_c ff_avg_pixels16x16_10_c
  1183. #endif
  1184. void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
  1185. FUNCC(put_pixels8)(dst, src, stride, 8);
  1186. }
  1187. void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
  1188. FUNCC(avg_pixels8)(dst, src, stride, 8);
  1189. }
  1190. void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
  1191. FUNCC(put_pixels16)(dst, src, stride, 16);
  1192. }
  1193. void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
  1194. FUNCC(avg_pixels16)(dst, src, stride, 16);
  1195. }
  1196. static void FUNCC(clear_block)(DCTELEM *block)
  1197. {
  1198. memset(block, 0, sizeof(dctcoef)*64);
  1199. }
  1200. /**
  1201. * memset(blocks, 0, sizeof(DCTELEM)*6*64)
  1202. */
  1203. static void FUNCC(clear_blocks)(DCTELEM *blocks)
  1204. {
  1205. memset(blocks, 0, sizeof(dctcoef)*6*64);
  1206. }