You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1193 lines
52KB

  1. /*
  2. * DSP utils
  3. * Copyright (c) 2000, 2001 Fabrice Bellard
  4. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  5. *
  6. * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
  7. *
  8. * This file is part of Libav.
  9. *
  10. * Libav is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Lesser General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2.1 of the License, or (at your option) any later version.
  14. *
  15. * Libav is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Lesser General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Lesser General Public
  21. * License along with Libav; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23. */
  24. /**
  25. * @file
  26. * DSP utils
  27. */
  28. #include "bit_depth_template.c"
  29. static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  30. {
  31. int i;
  32. for(i=0; i<h; i++)
  33. {
  34. AV_WN2P(dst , AV_RN2P(src ));
  35. dst+=dstStride;
  36. src+=srcStride;
  37. }
  38. }
  39. static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  40. {
  41. int i;
  42. for(i=0; i<h; i++)
  43. {
  44. AV_WN4P(dst , AV_RN4P(src ));
  45. dst+=dstStride;
  46. src+=srcStride;
  47. }
  48. }
  49. static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  50. {
  51. int i;
  52. for(i=0; i<h; i++)
  53. {
  54. AV_WN4P(dst , AV_RN4P(src ));
  55. AV_WN4P(dst+4*sizeof(pixel), AV_RN4P(src+4*sizeof(pixel)));
  56. dst+=dstStride;
  57. src+=srcStride;
  58. }
  59. }
  60. static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  61. {
  62. int i;
  63. for(i=0; i<h; i++)
  64. {
  65. AV_WN4P(dst , AV_RN4P(src ));
  66. AV_WN4P(dst+ 4*sizeof(pixel), AV_RN4P(src+ 4*sizeof(pixel)));
  67. AV_WN4P(dst+ 8*sizeof(pixel), AV_RN4P(src+ 8*sizeof(pixel)));
  68. AV_WN4P(dst+12*sizeof(pixel), AV_RN4P(src+12*sizeof(pixel)));
  69. dst+=dstStride;
  70. src+=srcStride;
  71. }
  72. }
  73. /* draw the edges of width 'w' of an image of size width, height */
  74. //FIXME check that this is ok for mpeg4 interlaced
  75. static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, int w, int h, int sides)
  76. {
  77. pixel *buf = (pixel*)_buf;
  78. int wrap = _wrap / sizeof(pixel);
  79. pixel *ptr, *last_line;
  80. int i;
  81. /* left and right */
  82. ptr = buf;
  83. for(i=0;i<height;i++) {
  84. #if BIT_DEPTH > 8
  85. int j;
  86. for (j = 0; j < w; j++) {
  87. ptr[j-w] = ptr[0];
  88. ptr[j+width] = ptr[width-1];
  89. }
  90. #else
  91. memset(ptr - w, ptr[0], w);
  92. memset(ptr + width, ptr[width-1], w);
  93. #endif
  94. ptr += wrap;
  95. }
  96. /* top and bottom + corners */
  97. buf -= w;
  98. last_line = buf + (height - 1) * wrap;
  99. if (sides & EDGE_TOP)
  100. for(i = 0; i < h; i++)
  101. memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top
  102. if (sides & EDGE_BOTTOM)
  103. for (i = 0; i < h; i++)
  104. memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
  105. }
  106. #define DCTELEM_FUNCS(dctcoef, suffix) \
  107. static void FUNCC(get_pixels ## suffix)(int16_t *restrict _block, \
  108. const uint8_t *_pixels, \
  109. int line_size) \
  110. { \
  111. const pixel *pixels = (const pixel *) _pixels; \
  112. dctcoef *restrict block = (dctcoef *) _block; \
  113. int i; \
  114. \
  115. /* read the pixels */ \
  116. for(i=0;i<8;i++) { \
  117. block[0] = pixels[0]; \
  118. block[1] = pixels[1]; \
  119. block[2] = pixels[2]; \
  120. block[3] = pixels[3]; \
  121. block[4] = pixels[4]; \
  122. block[5] = pixels[5]; \
  123. block[6] = pixels[6]; \
  124. block[7] = pixels[7]; \
  125. pixels += line_size / sizeof(pixel); \
  126. block += 8; \
  127. } \
  128. } \
  129. \
  130. static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels, \
  131. int16_t *_block, \
  132. int line_size) \
  133. { \
  134. int i; \
  135. pixel *restrict pixels = (pixel *restrict)_pixels; \
  136. dctcoef *block = (dctcoef*)_block; \
  137. line_size /= sizeof(pixel); \
  138. \
  139. for(i=0;i<8;i++) { \
  140. pixels[0] += block[0]; \
  141. pixels[1] += block[1]; \
  142. pixels[2] += block[2]; \
  143. pixels[3] += block[3]; \
  144. pixels[4] += block[4]; \
  145. pixels[5] += block[5]; \
  146. pixels[6] += block[6]; \
  147. pixels[7] += block[7]; \
  148. pixels += line_size; \
  149. block += 8; \
  150. } \
  151. } \
  152. \
  153. static void FUNCC(add_pixels4 ## suffix)(uint8_t *restrict _pixels, \
  154. int16_t *_block, \
  155. int line_size) \
  156. { \
  157. int i; \
  158. pixel *restrict pixels = (pixel *restrict)_pixels; \
  159. dctcoef *block = (dctcoef*)_block; \
  160. line_size /= sizeof(pixel); \
  161. \
  162. for(i=0;i<4;i++) { \
  163. pixels[0] += block[0]; \
  164. pixels[1] += block[1]; \
  165. pixels[2] += block[2]; \
  166. pixels[3] += block[3]; \
  167. pixels += line_size; \
  168. block += 4; \
  169. } \
  170. } \
  171. \
  172. static void FUNCC(clear_block ## suffix)(int16_t *block) \
  173. { \
  174. memset(block, 0, sizeof(dctcoef)*64); \
  175. } \
  176. \
  177. /** \
  178. * memset(blocks, 0, sizeof(int16_t)*6*64) \
  179. */ \
  180. static void FUNCC(clear_blocks ## suffix)(int16_t *blocks) \
  181. { \
  182. memset(blocks, 0, sizeof(dctcoef)*6*64); \
  183. }
  184. DCTELEM_FUNCS(int16_t, _16)
  185. #if BIT_DEPTH > 8
  186. DCTELEM_FUNCS(dctcoef, _32)
  187. #endif
  188. #define PIXOP2(OPNAME, OP) \
  189. static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  190. int i;\
  191. for(i=0; i<h; i++){\
  192. OP(*((pixel2*)(block )), AV_RN2P(pixels ));\
  193. pixels+=line_size;\
  194. block +=line_size;\
  195. }\
  196. }\
  197. static void FUNCC(OPNAME ## _pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  198. int i;\
  199. for(i=0; i<h; i++){\
  200. OP(*((pixel4*)(block )), AV_RN4P(pixels ));\
  201. pixels+=line_size;\
  202. block +=line_size;\
  203. }\
  204. }\
  205. static void FUNCC(OPNAME ## _pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  206. int i;\
  207. for(i=0; i<h; i++){\
  208. OP(*((pixel4*)(block )), AV_RN4P(pixels ));\
  209. OP(*((pixel4*)(block+4*sizeof(pixel))), AV_RN4P(pixels+4*sizeof(pixel)));\
  210. pixels+=line_size;\
  211. block +=line_size;\
  212. }\
  213. }\
  214. static inline void FUNCC(OPNAME ## _no_rnd_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  215. FUNCC(OPNAME ## _pixels8)(block, pixels, line_size, h);\
  216. }\
  217. \
  218. static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  219. int src_stride1, int src_stride2, int h){\
  220. int i;\
  221. for(i=0; i<h; i++){\
  222. pixel4 a,b;\
  223. a= AV_RN4P(&src1[i*src_stride1 ]);\
  224. b= AV_RN4P(&src2[i*src_stride2 ]);\
  225. OP(*((pixel4*)&dst[i*dst_stride ]), no_rnd_avg_pixel4(a, b));\
  226. a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
  227. b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
  228. OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), no_rnd_avg_pixel4(a, b));\
  229. }\
  230. }\
  231. \
  232. static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  233. int src_stride1, int src_stride2, int h){\
  234. int i;\
  235. for(i=0; i<h; i++){\
  236. pixel4 a,b;\
  237. a= AV_RN4P(&src1[i*src_stride1 ]);\
  238. b= AV_RN4P(&src2[i*src_stride2 ]);\
  239. OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  240. a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
  241. b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
  242. OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), rnd_avg_pixel4(a, b));\
  243. }\
  244. }\
  245. \
  246. static inline void FUNC(OPNAME ## _pixels4_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  247. int src_stride1, int src_stride2, int h){\
  248. int i;\
  249. for(i=0; i<h; i++){\
  250. pixel4 a,b;\
  251. a= AV_RN4P(&src1[i*src_stride1 ]);\
  252. b= AV_RN4P(&src2[i*src_stride2 ]);\
  253. OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  254. }\
  255. }\
  256. \
  257. static inline void FUNC(OPNAME ## _pixels2_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  258. int src_stride1, int src_stride2, int h){\
  259. int i;\
  260. for(i=0; i<h; i++){\
  261. pixel4 a,b;\
  262. a= AV_RN2P(&src1[i*src_stride1 ]);\
  263. b= AV_RN2P(&src2[i*src_stride2 ]);\
  264. OP(*((pixel2*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  265. }\
  266. }\
  267. \
  268. static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  269. int src_stride1, int src_stride2, int h){\
  270. FUNC(OPNAME ## _pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
  271. FUNC(OPNAME ## _pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
  272. }\
  273. \
  274. static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  275. int src_stride1, int src_stride2, int h){\
  276. FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
  277. FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
  278. }\
  279. \
  280. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  281. FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  282. }\
  283. \
  284. static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  285. FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  286. }\
  287. \
  288. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  289. FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  290. }\
  291. \
  292. static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  293. FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  294. }\
  295. \
  296. static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  297. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  298. /* FIXME HIGH BIT DEPTH */\
  299. int i;\
  300. for(i=0; i<h; i++){\
  301. uint32_t a, b, c, d, l0, l1, h0, h1;\
  302. a= AV_RN32(&src1[i*src_stride1]);\
  303. b= AV_RN32(&src2[i*src_stride2]);\
  304. c= AV_RN32(&src3[i*src_stride3]);\
  305. d= AV_RN32(&src4[i*src_stride4]);\
  306. l0= (a&0x03030303UL)\
  307. + (b&0x03030303UL)\
  308. + 0x02020202UL;\
  309. h0= ((a&0xFCFCFCFCUL)>>2)\
  310. + ((b&0xFCFCFCFCUL)>>2);\
  311. l1= (c&0x03030303UL)\
  312. + (d&0x03030303UL);\
  313. h1= ((c&0xFCFCFCFCUL)>>2)\
  314. + ((d&0xFCFCFCFCUL)>>2);\
  315. OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  316. a= AV_RN32(&src1[i*src_stride1+4]);\
  317. b= AV_RN32(&src2[i*src_stride2+4]);\
  318. c= AV_RN32(&src3[i*src_stride3+4]);\
  319. d= AV_RN32(&src4[i*src_stride4+4]);\
  320. l0= (a&0x03030303UL)\
  321. + (b&0x03030303UL)\
  322. + 0x02020202UL;\
  323. h0= ((a&0xFCFCFCFCUL)>>2)\
  324. + ((b&0xFCFCFCFCUL)>>2);\
  325. l1= (c&0x03030303UL)\
  326. + (d&0x03030303UL);\
  327. h1= ((c&0xFCFCFCFCUL)>>2)\
  328. + ((d&0xFCFCFCFCUL)>>2);\
  329. OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  330. }\
  331. }\
  332. \
  333. static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  334. FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  335. }\
  336. \
  337. static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  338. FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  339. }\
  340. \
  341. static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  342. FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  343. }\
  344. \
  345. static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  346. FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  347. }\
  348. \
  349. static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  350. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  351. /* FIXME HIGH BIT DEPTH*/\
  352. int i;\
  353. for(i=0; i<h; i++){\
  354. uint32_t a, b, c, d, l0, l1, h0, h1;\
  355. a= AV_RN32(&src1[i*src_stride1]);\
  356. b= AV_RN32(&src2[i*src_stride2]);\
  357. c= AV_RN32(&src3[i*src_stride3]);\
  358. d= AV_RN32(&src4[i*src_stride4]);\
  359. l0= (a&0x03030303UL)\
  360. + (b&0x03030303UL)\
  361. + 0x01010101UL;\
  362. h0= ((a&0xFCFCFCFCUL)>>2)\
  363. + ((b&0xFCFCFCFCUL)>>2);\
  364. l1= (c&0x03030303UL)\
  365. + (d&0x03030303UL);\
  366. h1= ((c&0xFCFCFCFCUL)>>2)\
  367. + ((d&0xFCFCFCFCUL)>>2);\
  368. OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  369. a= AV_RN32(&src1[i*src_stride1+4]);\
  370. b= AV_RN32(&src2[i*src_stride2+4]);\
  371. c= AV_RN32(&src3[i*src_stride3+4]);\
  372. d= AV_RN32(&src4[i*src_stride4+4]);\
  373. l0= (a&0x03030303UL)\
  374. + (b&0x03030303UL)\
  375. + 0x01010101UL;\
  376. h0= ((a&0xFCFCFCFCUL)>>2)\
  377. + ((b&0xFCFCFCFCUL)>>2);\
  378. l1= (c&0x03030303UL)\
  379. + (d&0x03030303UL);\
  380. h1= ((c&0xFCFCFCFCUL)>>2)\
  381. + ((d&0xFCFCFCFCUL)>>2);\
  382. OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  383. }\
  384. }\
  385. static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  386. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  387. FUNC(OPNAME ## _pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  388. FUNC(OPNAME ## _pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  389. }\
  390. static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  391. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  392. FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  393. FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  394. }\
  395. \
  396. static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t *_pixels, int line_size, int h)\
  397. {\
  398. int i, a0, b0, a1, b1;\
  399. pixel *block = (pixel*)_block;\
  400. const pixel *pixels = (const pixel*)_pixels;\
  401. line_size /= sizeof(pixel);\
  402. a0= pixels[0];\
  403. b0= pixels[1] + 2;\
  404. a0 += b0;\
  405. b0 += pixels[2];\
  406. \
  407. pixels+=line_size;\
  408. for(i=0; i<h; i+=2){\
  409. a1= pixels[0];\
  410. b1= pixels[1];\
  411. a1 += b1;\
  412. b1 += pixels[2];\
  413. \
  414. block[0]= (a1+a0)>>2; /* FIXME non put */\
  415. block[1]= (b1+b0)>>2;\
  416. \
  417. pixels+=line_size;\
  418. block +=line_size;\
  419. \
  420. a0= pixels[0];\
  421. b0= pixels[1] + 2;\
  422. a0 += b0;\
  423. b0 += pixels[2];\
  424. \
  425. block[0]= (a1+a0)>>2;\
  426. block[1]= (b1+b0)>>2;\
  427. pixels+=line_size;\
  428. block +=line_size;\
  429. }\
  430. }\
  431. \
  432. static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  433. {\
  434. /* FIXME HIGH BIT DEPTH */\
  435. int i;\
  436. const uint32_t a= AV_RN32(pixels );\
  437. const uint32_t b= AV_RN32(pixels+1);\
  438. uint32_t l0= (a&0x03030303UL)\
  439. + (b&0x03030303UL)\
  440. + 0x02020202UL;\
  441. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  442. + ((b&0xFCFCFCFCUL)>>2);\
  443. uint32_t l1,h1;\
  444. \
  445. pixels+=line_size;\
  446. for(i=0; i<h; i+=2){\
  447. uint32_t a= AV_RN32(pixels );\
  448. uint32_t b= AV_RN32(pixels+1);\
  449. l1= (a&0x03030303UL)\
  450. + (b&0x03030303UL);\
  451. h1= ((a&0xFCFCFCFCUL)>>2)\
  452. + ((b&0xFCFCFCFCUL)>>2);\
  453. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  454. pixels+=line_size;\
  455. block +=line_size;\
  456. a= AV_RN32(pixels );\
  457. b= AV_RN32(pixels+1);\
  458. l0= (a&0x03030303UL)\
  459. + (b&0x03030303UL)\
  460. + 0x02020202UL;\
  461. h0= ((a&0xFCFCFCFCUL)>>2)\
  462. + ((b&0xFCFCFCFCUL)>>2);\
  463. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  464. pixels+=line_size;\
  465. block +=line_size;\
  466. }\
  467. }\
  468. \
  469. static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  470. {\
  471. /* FIXME HIGH BIT DEPTH */\
  472. int j;\
  473. for(j=0; j<2; j++){\
  474. int i;\
  475. const uint32_t a= AV_RN32(pixels );\
  476. const uint32_t b= AV_RN32(pixels+1);\
  477. uint32_t l0= (a&0x03030303UL)\
  478. + (b&0x03030303UL)\
  479. + 0x02020202UL;\
  480. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  481. + ((b&0xFCFCFCFCUL)>>2);\
  482. uint32_t l1,h1;\
  483. \
  484. pixels+=line_size;\
  485. for(i=0; i<h; i+=2){\
  486. uint32_t a= AV_RN32(pixels );\
  487. uint32_t b= AV_RN32(pixels+1);\
  488. l1= (a&0x03030303UL)\
  489. + (b&0x03030303UL);\
  490. h1= ((a&0xFCFCFCFCUL)>>2)\
  491. + ((b&0xFCFCFCFCUL)>>2);\
  492. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  493. pixels+=line_size;\
  494. block +=line_size;\
  495. a= AV_RN32(pixels );\
  496. b= AV_RN32(pixels+1);\
  497. l0= (a&0x03030303UL)\
  498. + (b&0x03030303UL)\
  499. + 0x02020202UL;\
  500. h0= ((a&0xFCFCFCFCUL)>>2)\
  501. + ((b&0xFCFCFCFCUL)>>2);\
  502. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  503. pixels+=line_size;\
  504. block +=line_size;\
  505. }\
  506. pixels+=4-line_size*(h+1);\
  507. block +=4-line_size*h;\
  508. }\
  509. }\
  510. \
  511. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  512. {\
  513. /* FIXME HIGH BIT DEPTH */\
  514. int j;\
  515. for(j=0; j<2; j++){\
  516. int i;\
  517. const uint32_t a= AV_RN32(pixels );\
  518. const uint32_t b= AV_RN32(pixels+1);\
  519. uint32_t l0= (a&0x03030303UL)\
  520. + (b&0x03030303UL)\
  521. + 0x01010101UL;\
  522. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  523. + ((b&0xFCFCFCFCUL)>>2);\
  524. uint32_t l1,h1;\
  525. \
  526. pixels+=line_size;\
  527. for(i=0; i<h; i+=2){\
  528. uint32_t a= AV_RN32(pixels );\
  529. uint32_t b= AV_RN32(pixels+1);\
  530. l1= (a&0x03030303UL)\
  531. + (b&0x03030303UL);\
  532. h1= ((a&0xFCFCFCFCUL)>>2)\
  533. + ((b&0xFCFCFCFCUL)>>2);\
  534. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  535. pixels+=line_size;\
  536. block +=line_size;\
  537. a= AV_RN32(pixels );\
  538. b= AV_RN32(pixels+1);\
  539. l0= (a&0x03030303UL)\
  540. + (b&0x03030303UL)\
  541. + 0x01010101UL;\
  542. h0= ((a&0xFCFCFCFCUL)>>2)\
  543. + ((b&0xFCFCFCFCUL)>>2);\
  544. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  545. pixels+=line_size;\
  546. block +=line_size;\
  547. }\
  548. pixels+=4-line_size*(h+1);\
  549. block +=4-line_size*h;\
  550. }\
  551. }\
  552. \
  553. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
  554. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2) , FUNCC(OPNAME ## _pixels8_x2) , 8*sizeof(pixel))\
  555. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2) , FUNCC(OPNAME ## _pixels8_y2) , 8*sizeof(pixel))\
  556. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2), FUNCC(OPNAME ## _pixels8_xy2), 8*sizeof(pixel))\
  557. av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
  558. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2) , FUNCC(OPNAME ## _no_rnd_pixels8_x2) , 8*sizeof(pixel))\
  559. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pixels8_y2) , 8*sizeof(pixel))\
  560. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\
  561. #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
  562. #define op_put(a, b) a = b
  563. PIXOP2(avg, op_avg)
  564. PIXOP2(put, op_put)
  565. #undef op_avg
  566. #undef op_put
  567. #define put_no_rnd_pixels8_c put_pixels8_c
  568. #define put_no_rnd_pixels16_c put_pixels16_c
  569. #define H264_CHROMA_MC(OPNAME, OP)\
  570. static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
  571. pixel *dst = (pixel*)_dst;\
  572. pixel *src = (pixel*)_src;\
  573. const int A=(8-x)*(8-y);\
  574. const int B=( x)*(8-y);\
  575. const int C=(8-x)*( y);\
  576. const int D=( x)*( y);\
  577. int i;\
  578. stride /= sizeof(pixel);\
  579. \
  580. assert(x<8 && y<8 && x>=0 && y>=0);\
  581. \
  582. if(D){\
  583. for(i=0; i<h; i++){\
  584. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  585. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  586. dst+= stride;\
  587. src+= stride;\
  588. }\
  589. }else{\
  590. const int E= B+C;\
  591. const int step= C ? stride : 1;\
  592. for(i=0; i<h; i++){\
  593. OP(dst[0], (A*src[0] + E*src[step+0]));\
  594. OP(dst[1], (A*src[1] + E*src[step+1]));\
  595. dst+= stride;\
  596. src+= stride;\
  597. }\
  598. }\
  599. }\
  600. \
  601. static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
  602. pixel *dst = (pixel*)_dst;\
  603. pixel *src = (pixel*)_src;\
  604. const int A=(8-x)*(8-y);\
  605. const int B=( x)*(8-y);\
  606. const int C=(8-x)*( y);\
  607. const int D=( x)*( y);\
  608. int i;\
  609. stride /= sizeof(pixel);\
  610. \
  611. assert(x<8 && y<8 && x>=0 && y>=0);\
  612. \
  613. if(D){\
  614. for(i=0; i<h; i++){\
  615. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  616. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  617. OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
  618. OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
  619. dst+= stride;\
  620. src+= stride;\
  621. }\
  622. }else{\
  623. const int E= B+C;\
  624. const int step= C ? stride : 1;\
  625. for(i=0; i<h; i++){\
  626. OP(dst[0], (A*src[0] + E*src[step+0]));\
  627. OP(dst[1], (A*src[1] + E*src[step+1]));\
  628. OP(dst[2], (A*src[2] + E*src[step+2]));\
  629. OP(dst[3], (A*src[3] + E*src[step+3]));\
  630. dst+= stride;\
  631. src+= stride;\
  632. }\
  633. }\
  634. }\
  635. \
  636. static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
  637. pixel *dst = (pixel*)_dst;\
  638. pixel *src = (pixel*)_src;\
  639. const int A=(8-x)*(8-y);\
  640. const int B=( x)*(8-y);\
  641. const int C=(8-x)*( y);\
  642. const int D=( x)*( y);\
  643. int i;\
  644. stride /= sizeof(pixel);\
  645. \
  646. assert(x<8 && y<8 && x>=0 && y>=0);\
  647. \
  648. if(D){\
  649. for(i=0; i<h; i++){\
  650. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  651. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  652. OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
  653. OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
  654. OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
  655. OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
  656. OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
  657. OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
  658. dst+= stride;\
  659. src+= stride;\
  660. }\
  661. }else{\
  662. const int E= B+C;\
  663. const int step= C ? stride : 1;\
  664. for(i=0; i<h; i++){\
  665. OP(dst[0], (A*src[0] + E*src[step+0]));\
  666. OP(dst[1], (A*src[1] + E*src[step+1]));\
  667. OP(dst[2], (A*src[2] + E*src[step+2]));\
  668. OP(dst[3], (A*src[3] + E*src[step+3]));\
  669. OP(dst[4], (A*src[4] + E*src[step+4]));\
  670. OP(dst[5], (A*src[5] + E*src[step+5]));\
  671. OP(dst[6], (A*src[6] + E*src[step+6]));\
  672. OP(dst[7], (A*src[7] + E*src[step+7]));\
  673. dst+= stride;\
  674. src+= stride;\
  675. }\
  676. }\
  677. }
  678. #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
  679. #define op_put(a, b) a = (((b) + 32)>>6)
  680. H264_CHROMA_MC(put_ , op_put)
  681. H264_CHROMA_MC(avg_ , op_avg)
  682. #undef op_avg
  683. #undef op_put
  684. #define H264_LOWPASS(OPNAME, OP, OP2) \
  685. static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  686. const int h=2;\
  687. INIT_CLIP\
  688. int i;\
  689. pixel *dst = (pixel*)_dst;\
  690. pixel *src = (pixel*)_src;\
  691. dstStride /= sizeof(pixel);\
  692. srcStride /= sizeof(pixel);\
  693. for(i=0; i<h; i++)\
  694. {\
  695. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
  696. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
  697. dst+=dstStride;\
  698. src+=srcStride;\
  699. }\
  700. }\
  701. \
  702. static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  703. const int w=2;\
  704. INIT_CLIP\
  705. int i;\
  706. pixel *dst = (pixel*)_dst;\
  707. pixel *src = (pixel*)_src;\
  708. dstStride /= sizeof(pixel);\
  709. srcStride /= sizeof(pixel);\
  710. for(i=0; i<w; i++)\
  711. {\
  712. const int srcB= src[-2*srcStride];\
  713. const int srcA= src[-1*srcStride];\
  714. const int src0= src[0 *srcStride];\
  715. const int src1= src[1 *srcStride];\
  716. const int src2= src[2 *srcStride];\
  717. const int src3= src[3 *srcStride];\
  718. const int src4= src[4 *srcStride];\
  719. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  720. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  721. dst++;\
  722. src++;\
  723. }\
  724. }\
  725. \
  726. static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
  727. const int h=2;\
  728. const int w=2;\
  729. const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  730. INIT_CLIP\
  731. int i;\
  732. pixel *dst = (pixel*)_dst;\
  733. pixel *src = (pixel*)_src;\
  734. dstStride /= sizeof(pixel);\
  735. srcStride /= sizeof(pixel);\
  736. src -= 2*srcStride;\
  737. for(i=0; i<h+5; i++)\
  738. {\
  739. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
  740. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
  741. tmp+=tmpStride;\
  742. src+=srcStride;\
  743. }\
  744. tmp -= tmpStride*(h+5-2);\
  745. for(i=0; i<w; i++)\
  746. {\
  747. const int tmpB= tmp[-2*tmpStride] - pad;\
  748. const int tmpA= tmp[-1*tmpStride] - pad;\
  749. const int tmp0= tmp[0 *tmpStride] - pad;\
  750. const int tmp1= tmp[1 *tmpStride] - pad;\
  751. const int tmp2= tmp[2 *tmpStride] - pad;\
  752. const int tmp3= tmp[3 *tmpStride] - pad;\
  753. const int tmp4= tmp[4 *tmpStride] - pad;\
  754. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  755. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  756. dst++;\
  757. tmp++;\
  758. }\
  759. }\
  760. static void FUNC(OPNAME ## h264_qpel4_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  761. const int h=4;\
  762. INIT_CLIP\
  763. int i;\
  764. pixel *dst = (pixel*)_dst;\
  765. pixel *src = (pixel*)_src;\
  766. dstStride /= sizeof(pixel);\
  767. srcStride /= sizeof(pixel);\
  768. for(i=0; i<h; i++)\
  769. {\
  770. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
  771. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
  772. OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
  773. OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
  774. dst+=dstStride;\
  775. src+=srcStride;\
  776. }\
  777. }\
  778. \
  779. static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  780. const int w=4;\
  781. INIT_CLIP\
  782. int i;\
  783. pixel *dst = (pixel*)_dst;\
  784. pixel *src = (pixel*)_src;\
  785. dstStride /= sizeof(pixel);\
  786. srcStride /= sizeof(pixel);\
  787. for(i=0; i<w; i++)\
  788. {\
  789. const int srcB= src[-2*srcStride];\
  790. const int srcA= src[-1*srcStride];\
  791. const int src0= src[0 *srcStride];\
  792. const int src1= src[1 *srcStride];\
  793. const int src2= src[2 *srcStride];\
  794. const int src3= src[3 *srcStride];\
  795. const int src4= src[4 *srcStride];\
  796. const int src5= src[5 *srcStride];\
  797. const int src6= src[6 *srcStride];\
  798. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  799. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  800. OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
  801. OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
  802. dst++;\
  803. src++;\
  804. }\
  805. }\
  806. \
  807. static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
  808. const int h=4;\
  809. const int w=4;\
  810. const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  811. INIT_CLIP\
  812. int i;\
  813. pixel *dst = (pixel*)_dst;\
  814. pixel *src = (pixel*)_src;\
  815. dstStride /= sizeof(pixel);\
  816. srcStride /= sizeof(pixel);\
  817. src -= 2*srcStride;\
  818. for(i=0; i<h+5; i++)\
  819. {\
  820. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
  821. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
  822. tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]) + pad;\
  823. tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]) + pad;\
  824. tmp+=tmpStride;\
  825. src+=srcStride;\
  826. }\
  827. tmp -= tmpStride*(h+5-2);\
  828. for(i=0; i<w; i++)\
  829. {\
  830. const int tmpB= tmp[-2*tmpStride] - pad;\
  831. const int tmpA= tmp[-1*tmpStride] - pad;\
  832. const int tmp0= tmp[0 *tmpStride] - pad;\
  833. const int tmp1= tmp[1 *tmpStride] - pad;\
  834. const int tmp2= tmp[2 *tmpStride] - pad;\
  835. const int tmp3= tmp[3 *tmpStride] - pad;\
  836. const int tmp4= tmp[4 *tmpStride] - pad;\
  837. const int tmp5= tmp[5 *tmpStride] - pad;\
  838. const int tmp6= tmp[6 *tmpStride] - pad;\
  839. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  840. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  841. OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
  842. OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
  843. dst++;\
  844. tmp++;\
  845. }\
  846. }\
  847. \
  848. static void FUNC(OPNAME ## h264_qpel8_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  849. const int h=8;\
  850. INIT_CLIP\
  851. int i;\
  852. pixel *dst = (pixel*)_dst;\
  853. pixel *src = (pixel*)_src;\
  854. dstStride /= sizeof(pixel);\
  855. srcStride /= sizeof(pixel);\
  856. for(i=0; i<h; i++)\
  857. {\
  858. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
  859. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
  860. OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
  861. OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
  862. OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
  863. OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
  864. OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
  865. OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
  866. dst+=dstStride;\
  867. src+=srcStride;\
  868. }\
  869. }\
  870. \
  871. static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  872. const int w=8;\
  873. INIT_CLIP\
  874. int i;\
  875. pixel *dst = (pixel*)_dst;\
  876. pixel *src = (pixel*)_src;\
  877. dstStride /= sizeof(pixel);\
  878. srcStride /= sizeof(pixel);\
  879. for(i=0; i<w; i++)\
  880. {\
  881. const int srcB= src[-2*srcStride];\
  882. const int srcA= src[-1*srcStride];\
  883. const int src0= src[0 *srcStride];\
  884. const int src1= src[1 *srcStride];\
  885. const int src2= src[2 *srcStride];\
  886. const int src3= src[3 *srcStride];\
  887. const int src4= src[4 *srcStride];\
  888. const int src5= src[5 *srcStride];\
  889. const int src6= src[6 *srcStride];\
  890. const int src7= src[7 *srcStride];\
  891. const int src8= src[8 *srcStride];\
  892. const int src9= src[9 *srcStride];\
  893. const int src10=src[10*srcStride];\
  894. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  895. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  896. OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
  897. OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
  898. OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
  899. OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
  900. OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
  901. OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
  902. dst++;\
  903. src++;\
  904. }\
  905. }\
  906. \
  907. static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
  908. const int h=8;\
  909. const int w=8;\
  910. const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  911. INIT_CLIP\
  912. int i;\
  913. pixel *dst = (pixel*)_dst;\
  914. pixel *src = (pixel*)_src;\
  915. dstStride /= sizeof(pixel);\
  916. srcStride /= sizeof(pixel);\
  917. src -= 2*srcStride;\
  918. for(i=0; i<h+5; i++)\
  919. {\
  920. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]) + pad;\
  921. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]) + pad;\
  922. tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]) + pad;\
  923. tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]) + pad;\
  924. tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]) + pad;\
  925. tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]) + pad;\
  926. tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]) + pad;\
  927. tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]) + pad;\
  928. tmp+=tmpStride;\
  929. src+=srcStride;\
  930. }\
  931. tmp -= tmpStride*(h+5-2);\
  932. for(i=0; i<w; i++)\
  933. {\
  934. const int tmpB= tmp[-2*tmpStride] - pad;\
  935. const int tmpA= tmp[-1*tmpStride] - pad;\
  936. const int tmp0= tmp[0 *tmpStride] - pad;\
  937. const int tmp1= tmp[1 *tmpStride] - pad;\
  938. const int tmp2= tmp[2 *tmpStride] - pad;\
  939. const int tmp3= tmp[3 *tmpStride] - pad;\
  940. const int tmp4= tmp[4 *tmpStride] - pad;\
  941. const int tmp5= tmp[5 *tmpStride] - pad;\
  942. const int tmp6= tmp[6 *tmpStride] - pad;\
  943. const int tmp7= tmp[7 *tmpStride] - pad;\
  944. const int tmp8= tmp[8 *tmpStride] - pad;\
  945. const int tmp9= tmp[9 *tmpStride] - pad;\
  946. const int tmp10=tmp[10*tmpStride] - pad;\
  947. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  948. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  949. OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
  950. OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
  951. OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
  952. OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
  953. OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
  954. OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
  955. dst++;\
  956. tmp++;\
  957. }\
  958. }\
  959. \
  960. static void FUNC(OPNAME ## h264_qpel16_v_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
  961. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
  962. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  963. src += 8*srcStride;\
  964. dst += 8*dstStride;\
  965. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
  966. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  967. }\
  968. \
  969. static void FUNC(OPNAME ## h264_qpel16_h_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
  970. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
  971. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  972. src += 8*srcStride;\
  973. dst += 8*dstStride;\
  974. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
  975. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  976. }\
  977. \
  978. static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
  979. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
  980. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
  981. src += 8*srcStride;\
  982. dst += 8*dstStride;\
  983. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
  984. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
  985. }\
  986. #define H264_MC(OPNAME, SIZE) \
  987. static av_unused void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc00)(uint8_t *dst, uint8_t *src, int stride){\
  988. FUNCC(OPNAME ## pixels ## SIZE)(dst, src, stride, SIZE);\
  989. }\
  990. \
  991. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc10)(uint8_t *dst, uint8_t *src, int stride){\
  992. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  993. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
  994. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src, half, stride, stride, SIZE*sizeof(pixel), SIZE);\
  995. }\
  996. \
  997. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc20)(uint8_t *dst, uint8_t *src, int stride){\
  998. FUNC(OPNAME ## h264_qpel ## SIZE ## _h_lowpass)(dst, src, stride, stride);\
  999. }\
  1000. \
  1001. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc30)(uint8_t *dst, uint8_t *src, int stride){\
  1002. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1003. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
  1004. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src+sizeof(pixel), half, stride, stride, SIZE*sizeof(pixel), SIZE);\
  1005. }\
  1006. \
  1007. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, uint8_t *src, int stride){\
  1008. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1009. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1010. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1011. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1012. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1013. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid, half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1014. }\
  1015. \
  1016. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, uint8_t *src, int stride){\
  1017. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1018. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1019. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1020. FUNC(OPNAME ## h264_qpel ## SIZE ## _v_lowpass)(dst, full_mid, stride, SIZE*sizeof(pixel));\
  1021. }\
  1022. \
  1023. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, uint8_t *src, int stride){\
  1024. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1025. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1026. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1027. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1028. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1029. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid+SIZE*sizeof(pixel), half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1030. }\
  1031. \
  1032. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, uint8_t *src, int stride){\
  1033. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1034. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1035. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1036. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1037. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1038. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1039. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1040. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1041. }\
  1042. \
  1043. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, uint8_t *src, int stride){\
  1044. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1045. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1046. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1047. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1048. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1049. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1050. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1051. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1052. }\
  1053. \
  1054. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, uint8_t *src, int stride){\
  1055. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1056. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1057. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1058. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1059. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1060. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1061. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1062. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1063. }\
  1064. \
  1065. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, uint8_t *src, int stride){\
  1066. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1067. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1068. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1069. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1070. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1071. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1072. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1073. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1074. }\
  1075. \
  1076. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, uint8_t *src, int stride){\
  1077. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1078. FUNC(OPNAME ## h264_qpel ## SIZE ## _hv_lowpass)(dst, tmp, src, stride, SIZE*sizeof(pixel), stride);\
  1079. }\
  1080. \
  1081. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, uint8_t *src, int stride){\
  1082. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1083. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1084. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1085. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1086. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1087. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1088. }\
  1089. \
  1090. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, uint8_t *src, int stride){\
  1091. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1092. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1093. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1094. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1095. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1096. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1097. }\
  1098. \
  1099. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, uint8_t *src, int stride){\
  1100. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1101. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1102. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1103. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1104. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1105. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1106. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1107. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1108. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1109. }\
  1110. \
  1111. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, uint8_t *src, int stride){\
  1112. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1113. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1114. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1115. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1116. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1117. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1118. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1119. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1120. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1121. }\
  1122. #define op_avg(a, b) a = (((a)+CLIP(((b) + 16)>>5)+1)>>1)
  1123. //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
  1124. #define op_put(a, b) a = CLIP(((b) + 16)>>5)
  1125. #define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
  1126. #define op2_put(a, b) a = CLIP(((b) + 512)>>10)
  1127. H264_LOWPASS(put_ , op_put, op2_put)
  1128. H264_LOWPASS(avg_ , op_avg, op2_avg)
  1129. H264_MC(put_, 2)
  1130. H264_MC(put_, 4)
  1131. H264_MC(put_, 8)
  1132. H264_MC(put_, 16)
  1133. H264_MC(avg_, 4)
  1134. H264_MC(avg_, 8)
  1135. H264_MC(avg_, 16)
  1136. #undef op_avg
  1137. #undef op_put
  1138. #undef op2_avg
  1139. #undef op2_put
  1140. #if BIT_DEPTH == 8
  1141. # define put_h264_qpel8_mc00_8_c ff_put_pixels8x8_8_c
  1142. # define avg_h264_qpel8_mc00_8_c ff_avg_pixels8x8_8_c
  1143. # define put_h264_qpel16_mc00_8_c ff_put_pixels16x16_8_c
  1144. # define avg_h264_qpel16_mc00_8_c ff_avg_pixels16x16_8_c
  1145. #elif BIT_DEPTH == 9
  1146. # define put_h264_qpel8_mc00_9_c ff_put_pixels8x8_9_c
  1147. # define avg_h264_qpel8_mc00_9_c ff_avg_pixels8x8_9_c
  1148. # define put_h264_qpel16_mc00_9_c ff_put_pixels16x16_9_c
  1149. # define avg_h264_qpel16_mc00_9_c ff_avg_pixels16x16_9_c
  1150. #elif BIT_DEPTH == 10
  1151. # define put_h264_qpel8_mc00_10_c ff_put_pixels8x8_10_c
  1152. # define avg_h264_qpel8_mc00_10_c ff_avg_pixels8x8_10_c
  1153. # define put_h264_qpel16_mc00_10_c ff_put_pixels16x16_10_c
  1154. # define avg_h264_qpel16_mc00_10_c ff_avg_pixels16x16_10_c
  1155. #endif
  1156. void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
  1157. FUNCC(put_pixels8)(dst, src, stride, 8);
  1158. }
  1159. void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
  1160. FUNCC(avg_pixels8)(dst, src, stride, 8);
  1161. }
  1162. void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
  1163. FUNCC(put_pixels16)(dst, src, stride, 16);
  1164. }
  1165. void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
  1166. FUNCC(avg_pixels16)(dst, src, stride, 16);
  1167. }