You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1201 lines
53KB

  1. /*
  2. * DSP utils
  3. * Copyright (c) 2000, 2001 Fabrice Bellard
  4. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  5. *
  6. * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
  7. *
  8. * This file is part of Libav.
  9. *
  10. * Libav is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Lesser General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2.1 of the License, or (at your option) any later version.
  14. *
  15. * Libav is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Lesser General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Lesser General Public
  21. * License along with Libav; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23. */
  24. /**
  25. * @file
  26. * DSP utils
  27. */
  28. #include "bit_depth_template.c"
  29. static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  30. {
  31. int i;
  32. for(i=0; i<h; i++)
  33. {
  34. AV_WN2P(dst , AV_RN2P(src ));
  35. dst+=dstStride;
  36. src+=srcStride;
  37. }
  38. }
  39. static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  40. {
  41. int i;
  42. for(i=0; i<h; i++)
  43. {
  44. AV_WN4P(dst , AV_RN4P(src ));
  45. dst+=dstStride;
  46. src+=srcStride;
  47. }
  48. }
  49. static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  50. {
  51. int i;
  52. for(i=0; i<h; i++)
  53. {
  54. AV_WN4P(dst , AV_RN4P(src ));
  55. AV_WN4P(dst+4*sizeof(pixel), AV_RN4P(src+4*sizeof(pixel)));
  56. dst+=dstStride;
  57. src+=srcStride;
  58. }
  59. }
  60. static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
  61. {
  62. int i;
  63. for(i=0; i<h; i++)
  64. {
  65. AV_WN4P(dst , AV_RN4P(src ));
  66. AV_WN4P(dst+ 4*sizeof(pixel), AV_RN4P(src+ 4*sizeof(pixel)));
  67. AV_WN4P(dst+ 8*sizeof(pixel), AV_RN4P(src+ 8*sizeof(pixel)));
  68. AV_WN4P(dst+12*sizeof(pixel), AV_RN4P(src+12*sizeof(pixel)));
  69. dst+=dstStride;
  70. src+=srcStride;
  71. }
  72. }
  73. /* draw the edges of width 'w' of an image of size width, height */
  74. //FIXME check that this is ok for mpeg4 interlaced
  75. static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, int w, int h, int sides)
  76. {
  77. pixel *buf = (pixel*)_buf;
  78. int wrap = _wrap / sizeof(pixel);
  79. pixel *ptr, *last_line;
  80. int i;
  81. /* left and right */
  82. ptr = buf;
  83. for(i=0;i<height;i++) {
  84. #if BIT_DEPTH > 8
  85. int j;
  86. for (j = 0; j < w; j++) {
  87. ptr[j-w] = ptr[0];
  88. ptr[j+width] = ptr[width-1];
  89. }
  90. #else
  91. memset(ptr - w, ptr[0], w);
  92. memset(ptr + width, ptr[width-1], w);
  93. #endif
  94. ptr += wrap;
  95. }
  96. /* top and bottom + corners */
  97. buf -= w;
  98. last_line = buf + (height - 1) * wrap;
  99. if (sides & EDGE_TOP)
  100. for(i = 0; i < h; i++)
  101. memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top
  102. if (sides & EDGE_BOTTOM)
  103. for (i = 0; i < h; i++)
  104. memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
  105. }
  106. #define DCTELEM_FUNCS(dctcoef, suffix) \
  107. static void FUNCC(get_pixels ## suffix)(DCTELEM *restrict _block, \
  108. const uint8_t *_pixels, \
  109. int line_size) \
  110. { \
  111. const pixel *pixels = (const pixel *) _pixels; \
  112. dctcoef *restrict block = (dctcoef *) _block; \
  113. int i; \
  114. \
  115. /* read the pixels */ \
  116. for(i=0;i<8;i++) { \
  117. block[0] = pixels[0]; \
  118. block[1] = pixels[1]; \
  119. block[2] = pixels[2]; \
  120. block[3] = pixels[3]; \
  121. block[4] = pixels[4]; \
  122. block[5] = pixels[5]; \
  123. block[6] = pixels[6]; \
  124. block[7] = pixels[7]; \
  125. pixels += line_size / sizeof(pixel); \
  126. block += 8; \
  127. } \
  128. } \
  129. \
  130. static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels, \
  131. DCTELEM *_block, \
  132. int line_size) \
  133. { \
  134. int i; \
  135. pixel *restrict pixels = (pixel *restrict)_pixels; \
  136. dctcoef *block = (dctcoef*)_block; \
  137. line_size /= sizeof(pixel); \
  138. \
  139. for(i=0;i<8;i++) { \
  140. pixels[0] += block[0]; \
  141. pixels[1] += block[1]; \
  142. pixels[2] += block[2]; \
  143. pixels[3] += block[3]; \
  144. pixels[4] += block[4]; \
  145. pixels[5] += block[5]; \
  146. pixels[6] += block[6]; \
  147. pixels[7] += block[7]; \
  148. pixels += line_size; \
  149. block += 8; \
  150. } \
  151. } \
  152. \
  153. static void FUNCC(add_pixels4 ## suffix)(uint8_t *restrict _pixels, \
  154. DCTELEM *_block, \
  155. int line_size) \
  156. { \
  157. int i; \
  158. pixel *restrict pixels = (pixel *restrict)_pixels; \
  159. dctcoef *block = (dctcoef*)_block; \
  160. line_size /= sizeof(pixel); \
  161. \
  162. for(i=0;i<4;i++) { \
  163. pixels[0] += block[0]; \
  164. pixels[1] += block[1]; \
  165. pixels[2] += block[2]; \
  166. pixels[3] += block[3]; \
  167. pixels += line_size; \
  168. block += 4; \
  169. } \
  170. } \
  171. \
  172. static void FUNCC(clear_block ## suffix)(DCTELEM *block) \
  173. { \
  174. memset(block, 0, sizeof(dctcoef)*64); \
  175. } \
  176. \
  177. /** \
  178. * memset(blocks, 0, sizeof(DCTELEM)*6*64) \
  179. */ \
  180. static void FUNCC(clear_blocks ## suffix)(DCTELEM *blocks) \
  181. { \
  182. memset(blocks, 0, sizeof(dctcoef)*6*64); \
  183. }
  184. DCTELEM_FUNCS(DCTELEM, _16)
  185. #if BIT_DEPTH > 8
  186. DCTELEM_FUNCS(dctcoef, _32)
  187. #endif
  188. #define PIXOP2(OPNAME, OP) \
  189. static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  190. int i;\
  191. for(i=0; i<h; i++){\
  192. OP(*((pixel2*)(block )), AV_RN2P(pixels ));\
  193. pixels+=line_size;\
  194. block +=line_size;\
  195. }\
  196. }\
  197. static void FUNCC(OPNAME ## _pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  198. int i;\
  199. for(i=0; i<h; i++){\
  200. OP(*((pixel4*)(block )), AV_RN4P(pixels ));\
  201. pixels+=line_size;\
  202. block +=line_size;\
  203. }\
  204. }\
  205. static void FUNCC(OPNAME ## _pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  206. int i;\
  207. for(i=0; i<h; i++){\
  208. OP(*((pixel4*)(block )), AV_RN4P(pixels ));\
  209. OP(*((pixel4*)(block+4*sizeof(pixel))), AV_RN4P(pixels+4*sizeof(pixel)));\
  210. pixels+=line_size;\
  211. block +=line_size;\
  212. }\
  213. }\
  214. static inline void FUNCC(OPNAME ## _no_rnd_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  215. FUNCC(OPNAME ## _pixels8)(block, pixels, line_size, h);\
  216. }\
  217. \
  218. static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  219. int src_stride1, int src_stride2, int h){\
  220. int i;\
  221. for(i=0; i<h; i++){\
  222. pixel4 a,b;\
  223. a= AV_RN4P(&src1[i*src_stride1 ]);\
  224. b= AV_RN4P(&src2[i*src_stride2 ]);\
  225. OP(*((pixel4*)&dst[i*dst_stride ]), no_rnd_avg_pixel4(a, b));\
  226. a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
  227. b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
  228. OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), no_rnd_avg_pixel4(a, b));\
  229. }\
  230. }\
  231. \
  232. static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  233. int src_stride1, int src_stride2, int h){\
  234. int i;\
  235. for(i=0; i<h; i++){\
  236. pixel4 a,b;\
  237. a= AV_RN4P(&src1[i*src_stride1 ]);\
  238. b= AV_RN4P(&src2[i*src_stride2 ]);\
  239. OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  240. a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
  241. b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
  242. OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), rnd_avg_pixel4(a, b));\
  243. }\
  244. }\
  245. \
  246. static inline void FUNC(OPNAME ## _pixels4_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  247. int src_stride1, int src_stride2, int h){\
  248. int i;\
  249. for(i=0; i<h; i++){\
  250. pixel4 a,b;\
  251. a= AV_RN4P(&src1[i*src_stride1 ]);\
  252. b= AV_RN4P(&src2[i*src_stride2 ]);\
  253. OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  254. }\
  255. }\
  256. \
  257. static inline void FUNC(OPNAME ## _pixels2_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  258. int src_stride1, int src_stride2, int h){\
  259. int i;\
  260. for(i=0; i<h; i++){\
  261. pixel4 a,b;\
  262. a= AV_RN2P(&src1[i*src_stride1 ]);\
  263. b= AV_RN2P(&src2[i*src_stride2 ]);\
  264. OP(*((pixel2*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\
  265. }\
  266. }\
  267. \
  268. static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  269. int src_stride1, int src_stride2, int h){\
  270. FUNC(OPNAME ## _pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
  271. FUNC(OPNAME ## _pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
  272. }\
  273. \
  274. static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  275. int src_stride1, int src_stride2, int h){\
  276. FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
  277. FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
  278. }\
  279. \
  280. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  281. FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  282. }\
  283. \
  284. static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  285. FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  286. }\
  287. \
  288. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  289. FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  290. }\
  291. \
  292. static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  293. FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  294. }\
  295. \
  296. static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  297. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  298. /* FIXME HIGH BIT DEPTH */\
  299. int i;\
  300. for(i=0; i<h; i++){\
  301. uint32_t a, b, c, d, l0, l1, h0, h1;\
  302. a= AV_RN32(&src1[i*src_stride1]);\
  303. b= AV_RN32(&src2[i*src_stride2]);\
  304. c= AV_RN32(&src3[i*src_stride3]);\
  305. d= AV_RN32(&src4[i*src_stride4]);\
  306. l0= (a&0x03030303UL)\
  307. + (b&0x03030303UL)\
  308. + 0x02020202UL;\
  309. h0= ((a&0xFCFCFCFCUL)>>2)\
  310. + ((b&0xFCFCFCFCUL)>>2);\
  311. l1= (c&0x03030303UL)\
  312. + (d&0x03030303UL);\
  313. h1= ((c&0xFCFCFCFCUL)>>2)\
  314. + ((d&0xFCFCFCFCUL)>>2);\
  315. OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  316. a= AV_RN32(&src1[i*src_stride1+4]);\
  317. b= AV_RN32(&src2[i*src_stride2+4]);\
  318. c= AV_RN32(&src3[i*src_stride3+4]);\
  319. d= AV_RN32(&src4[i*src_stride4+4]);\
  320. l0= (a&0x03030303UL)\
  321. + (b&0x03030303UL)\
  322. + 0x02020202UL;\
  323. h0= ((a&0xFCFCFCFCUL)>>2)\
  324. + ((b&0xFCFCFCFCUL)>>2);\
  325. l1= (c&0x03030303UL)\
  326. + (d&0x03030303UL);\
  327. h1= ((c&0xFCFCFCFCUL)>>2)\
  328. + ((d&0xFCFCFCFCUL)>>2);\
  329. OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  330. }\
  331. }\
  332. \
  333. static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  334. FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  335. }\
  336. \
  337. static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  338. FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  339. }\
  340. \
  341. static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  342. FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
  343. }\
  344. \
  345. static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  346. FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  347. }\
  348. \
  349. static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  350. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  351. /* FIXME HIGH BIT DEPTH*/\
  352. int i;\
  353. for(i=0; i<h; i++){\
  354. uint32_t a, b, c, d, l0, l1, h0, h1;\
  355. a= AV_RN32(&src1[i*src_stride1]);\
  356. b= AV_RN32(&src2[i*src_stride2]);\
  357. c= AV_RN32(&src3[i*src_stride3]);\
  358. d= AV_RN32(&src4[i*src_stride4]);\
  359. l0= (a&0x03030303UL)\
  360. + (b&0x03030303UL)\
  361. + 0x01010101UL;\
  362. h0= ((a&0xFCFCFCFCUL)>>2)\
  363. + ((b&0xFCFCFCFCUL)>>2);\
  364. l1= (c&0x03030303UL)\
  365. + (d&0x03030303UL);\
  366. h1= ((c&0xFCFCFCFCUL)>>2)\
  367. + ((d&0xFCFCFCFCUL)>>2);\
  368. OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  369. a= AV_RN32(&src1[i*src_stride1+4]);\
  370. b= AV_RN32(&src2[i*src_stride2+4]);\
  371. c= AV_RN32(&src3[i*src_stride3+4]);\
  372. d= AV_RN32(&src4[i*src_stride4+4]);\
  373. l0= (a&0x03030303UL)\
  374. + (b&0x03030303UL)\
  375. + 0x01010101UL;\
  376. h0= ((a&0xFCFCFCFCUL)>>2)\
  377. + ((b&0xFCFCFCFCUL)>>2);\
  378. l1= (c&0x03030303UL)\
  379. + (d&0x03030303UL);\
  380. h1= ((c&0xFCFCFCFCUL)>>2)\
  381. + ((d&0xFCFCFCFCUL)>>2);\
  382. OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  383. }\
  384. }\
  385. static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  386. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  387. FUNC(OPNAME ## _pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  388. FUNC(OPNAME ## _pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  389. }\
  390. static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
  391. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  392. FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  393. FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  394. }\
  395. \
  396. static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t *_pixels, int line_size, int h)\
  397. {\
  398. int i, a0, b0, a1, b1;\
  399. pixel *block = (pixel*)_block;\
  400. const pixel *pixels = (const pixel*)_pixels;\
  401. line_size /= sizeof(pixel);\
  402. a0= pixels[0];\
  403. b0= pixels[1] + 2;\
  404. a0 += b0;\
  405. b0 += pixels[2];\
  406. \
  407. pixels+=line_size;\
  408. for(i=0; i<h; i+=2){\
  409. a1= pixels[0];\
  410. b1= pixels[1];\
  411. a1 += b1;\
  412. b1 += pixels[2];\
  413. \
  414. block[0]= (a1+a0)>>2; /* FIXME non put */\
  415. block[1]= (b1+b0)>>2;\
  416. \
  417. pixels+=line_size;\
  418. block +=line_size;\
  419. \
  420. a0= pixels[0];\
  421. b0= pixels[1] + 2;\
  422. a0 += b0;\
  423. b0 += pixels[2];\
  424. \
  425. block[0]= (a1+a0)>>2;\
  426. block[1]= (b1+b0)>>2;\
  427. pixels+=line_size;\
  428. block +=line_size;\
  429. }\
  430. }\
  431. \
  432. static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  433. {\
  434. /* FIXME HIGH BIT DEPTH */\
  435. int i;\
  436. const uint32_t a= AV_RN32(pixels );\
  437. const uint32_t b= AV_RN32(pixels+1);\
  438. uint32_t l0= (a&0x03030303UL)\
  439. + (b&0x03030303UL)\
  440. + 0x02020202UL;\
  441. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  442. + ((b&0xFCFCFCFCUL)>>2);\
  443. uint32_t l1,h1;\
  444. \
  445. pixels+=line_size;\
  446. for(i=0; i<h; i+=2){\
  447. uint32_t a= AV_RN32(pixels );\
  448. uint32_t b= AV_RN32(pixels+1);\
  449. l1= (a&0x03030303UL)\
  450. + (b&0x03030303UL);\
  451. h1= ((a&0xFCFCFCFCUL)>>2)\
  452. + ((b&0xFCFCFCFCUL)>>2);\
  453. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  454. pixels+=line_size;\
  455. block +=line_size;\
  456. a= AV_RN32(pixels );\
  457. b= AV_RN32(pixels+1);\
  458. l0= (a&0x03030303UL)\
  459. + (b&0x03030303UL)\
  460. + 0x02020202UL;\
  461. h0= ((a&0xFCFCFCFCUL)>>2)\
  462. + ((b&0xFCFCFCFCUL)>>2);\
  463. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  464. pixels+=line_size;\
  465. block +=line_size;\
  466. }\
  467. }\
  468. \
  469. static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  470. {\
  471. /* FIXME HIGH BIT DEPTH */\
  472. int j;\
  473. for(j=0; j<2; j++){\
  474. int i;\
  475. const uint32_t a= AV_RN32(pixels );\
  476. const uint32_t b= AV_RN32(pixels+1);\
  477. uint32_t l0= (a&0x03030303UL)\
  478. + (b&0x03030303UL)\
  479. + 0x02020202UL;\
  480. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  481. + ((b&0xFCFCFCFCUL)>>2);\
  482. uint32_t l1,h1;\
  483. \
  484. pixels+=line_size;\
  485. for(i=0; i<h; i+=2){\
  486. uint32_t a= AV_RN32(pixels );\
  487. uint32_t b= AV_RN32(pixels+1);\
  488. l1= (a&0x03030303UL)\
  489. + (b&0x03030303UL);\
  490. h1= ((a&0xFCFCFCFCUL)>>2)\
  491. + ((b&0xFCFCFCFCUL)>>2);\
  492. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  493. pixels+=line_size;\
  494. block +=line_size;\
  495. a= AV_RN32(pixels );\
  496. b= AV_RN32(pixels+1);\
  497. l0= (a&0x03030303UL)\
  498. + (b&0x03030303UL)\
  499. + 0x02020202UL;\
  500. h0= ((a&0xFCFCFCFCUL)>>2)\
  501. + ((b&0xFCFCFCFCUL)>>2);\
  502. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  503. pixels+=line_size;\
  504. block +=line_size;\
  505. }\
  506. pixels+=4-line_size*(h+1);\
  507. block +=4-line_size*h;\
  508. }\
  509. }\
  510. \
  511. static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  512. {\
  513. /* FIXME HIGH BIT DEPTH */\
  514. int j;\
  515. for(j=0; j<2; j++){\
  516. int i;\
  517. const uint32_t a= AV_RN32(pixels );\
  518. const uint32_t b= AV_RN32(pixels+1);\
  519. uint32_t l0= (a&0x03030303UL)\
  520. + (b&0x03030303UL)\
  521. + 0x01010101UL;\
  522. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  523. + ((b&0xFCFCFCFCUL)>>2);\
  524. uint32_t l1,h1;\
  525. \
  526. pixels+=line_size;\
  527. for(i=0; i<h; i+=2){\
  528. uint32_t a= AV_RN32(pixels );\
  529. uint32_t b= AV_RN32(pixels+1);\
  530. l1= (a&0x03030303UL)\
  531. + (b&0x03030303UL);\
  532. h1= ((a&0xFCFCFCFCUL)>>2)\
  533. + ((b&0xFCFCFCFCUL)>>2);\
  534. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  535. pixels+=line_size;\
  536. block +=line_size;\
  537. a= AV_RN32(pixels );\
  538. b= AV_RN32(pixels+1);\
  539. l0= (a&0x03030303UL)\
  540. + (b&0x03030303UL)\
  541. + 0x01010101UL;\
  542. h0= ((a&0xFCFCFCFCUL)>>2)\
  543. + ((b&0xFCFCFCFCUL)>>2);\
  544. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  545. pixels+=line_size;\
  546. block +=line_size;\
  547. }\
  548. pixels+=4-line_size*(h+1);\
  549. block +=4-line_size*h;\
  550. }\
  551. }\
  552. \
  553. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
  554. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2) , FUNCC(OPNAME ## _pixels8_x2) , 8*sizeof(pixel))\
  555. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2) , FUNCC(OPNAME ## _pixels8_y2) , 8*sizeof(pixel))\
  556. CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2), FUNCC(OPNAME ## _pixels8_xy2), 8*sizeof(pixel))\
  557. av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
  558. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2) , FUNCC(OPNAME ## _no_rnd_pixels8_x2) , 8*sizeof(pixel))\
  559. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pixels8_y2) , 8*sizeof(pixel))\
  560. CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\
  561. #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
  562. #define op_put(a, b) a = b
  563. PIXOP2(avg, op_avg)
  564. PIXOP2(put, op_put)
  565. #undef op_avg
  566. #undef op_put
  567. #define put_no_rnd_pixels8_c put_pixels8_c
  568. #define put_no_rnd_pixels16_c put_pixels16_c
  569. static void FUNCC(put_no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
  570. FUNC(put_no_rnd_pixels16_l2)(dst, a, b, stride, stride, stride, h);
  571. }
  572. static void FUNCC(put_no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
  573. FUNC(put_no_rnd_pixels8_l2)(dst, a, b, stride, stride, stride, h);
  574. }
  575. #define H264_CHROMA_MC(OPNAME, OP)\
  576. static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
  577. pixel *dst = (pixel*)_dst;\
  578. pixel *src = (pixel*)_src;\
  579. const int A=(8-x)*(8-y);\
  580. const int B=( x)*(8-y);\
  581. const int C=(8-x)*( y);\
  582. const int D=( x)*( y);\
  583. int i;\
  584. stride /= sizeof(pixel);\
  585. \
  586. assert(x<8 && y<8 && x>=0 && y>=0);\
  587. \
  588. if(D){\
  589. for(i=0; i<h; i++){\
  590. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  591. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  592. dst+= stride;\
  593. src+= stride;\
  594. }\
  595. }else{\
  596. const int E= B+C;\
  597. const int step= C ? stride : 1;\
  598. for(i=0; i<h; i++){\
  599. OP(dst[0], (A*src[0] + E*src[step+0]));\
  600. OP(dst[1], (A*src[1] + E*src[step+1]));\
  601. dst+= stride;\
  602. src+= stride;\
  603. }\
  604. }\
  605. }\
  606. \
  607. static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
  608. pixel *dst = (pixel*)_dst;\
  609. pixel *src = (pixel*)_src;\
  610. const int A=(8-x)*(8-y);\
  611. const int B=( x)*(8-y);\
  612. const int C=(8-x)*( y);\
  613. const int D=( x)*( y);\
  614. int i;\
  615. stride /= sizeof(pixel);\
  616. \
  617. assert(x<8 && y<8 && x>=0 && y>=0);\
  618. \
  619. if(D){\
  620. for(i=0; i<h; i++){\
  621. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  622. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  623. OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
  624. OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
  625. dst+= stride;\
  626. src+= stride;\
  627. }\
  628. }else{\
  629. const int E= B+C;\
  630. const int step= C ? stride : 1;\
  631. for(i=0; i<h; i++){\
  632. OP(dst[0], (A*src[0] + E*src[step+0]));\
  633. OP(dst[1], (A*src[1] + E*src[step+1]));\
  634. OP(dst[2], (A*src[2] + E*src[step+2]));\
  635. OP(dst[3], (A*src[3] + E*src[step+3]));\
  636. dst+= stride;\
  637. src+= stride;\
  638. }\
  639. }\
  640. }\
  641. \
  642. static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
  643. pixel *dst = (pixel*)_dst;\
  644. pixel *src = (pixel*)_src;\
  645. const int A=(8-x)*(8-y);\
  646. const int B=( x)*(8-y);\
  647. const int C=(8-x)*( y);\
  648. const int D=( x)*( y);\
  649. int i;\
  650. stride /= sizeof(pixel);\
  651. \
  652. assert(x<8 && y<8 && x>=0 && y>=0);\
  653. \
  654. if(D){\
  655. for(i=0; i<h; i++){\
  656. OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
  657. OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
  658. OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
  659. OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
  660. OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
  661. OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
  662. OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
  663. OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
  664. dst+= stride;\
  665. src+= stride;\
  666. }\
  667. }else{\
  668. const int E= B+C;\
  669. const int step= C ? stride : 1;\
  670. for(i=0; i<h; i++){\
  671. OP(dst[0], (A*src[0] + E*src[step+0]));\
  672. OP(dst[1], (A*src[1] + E*src[step+1]));\
  673. OP(dst[2], (A*src[2] + E*src[step+2]));\
  674. OP(dst[3], (A*src[3] + E*src[step+3]));\
  675. OP(dst[4], (A*src[4] + E*src[step+4]));\
  676. OP(dst[5], (A*src[5] + E*src[step+5]));\
  677. OP(dst[6], (A*src[6] + E*src[step+6]));\
  678. OP(dst[7], (A*src[7] + E*src[step+7]));\
  679. dst+= stride;\
  680. src+= stride;\
  681. }\
  682. }\
  683. }
  684. #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
  685. #define op_put(a, b) a = (((b) + 32)>>6)
  686. H264_CHROMA_MC(put_ , op_put)
  687. H264_CHROMA_MC(avg_ , op_avg)
  688. #undef op_avg
  689. #undef op_put
  690. #define H264_LOWPASS(OPNAME, OP, OP2) \
  691. static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  692. const int h=2;\
  693. INIT_CLIP\
  694. int i;\
  695. pixel *dst = (pixel*)_dst;\
  696. pixel *src = (pixel*)_src;\
  697. dstStride /= sizeof(pixel);\
  698. srcStride /= sizeof(pixel);\
  699. for(i=0; i<h; i++)\
  700. {\
  701. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
  702. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
  703. dst+=dstStride;\
  704. src+=srcStride;\
  705. }\
  706. }\
  707. \
  708. static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  709. const int w=2;\
  710. INIT_CLIP\
  711. int i;\
  712. pixel *dst = (pixel*)_dst;\
  713. pixel *src = (pixel*)_src;\
  714. dstStride /= sizeof(pixel);\
  715. srcStride /= sizeof(pixel);\
  716. for(i=0; i<w; i++)\
  717. {\
  718. const int srcB= src[-2*srcStride];\
  719. const int srcA= src[-1*srcStride];\
  720. const int src0= src[0 *srcStride];\
  721. const int src1= src[1 *srcStride];\
  722. const int src2= src[2 *srcStride];\
  723. const int src3= src[3 *srcStride];\
  724. const int src4= src[4 *srcStride];\
  725. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  726. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  727. dst++;\
  728. src++;\
  729. }\
  730. }\
  731. \
  732. static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
  733. const int h=2;\
  734. const int w=2;\
  735. const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  736. INIT_CLIP\
  737. int i;\
  738. pixel *dst = (pixel*)_dst;\
  739. pixel *src = (pixel*)_src;\
  740. dstStride /= sizeof(pixel);\
  741. srcStride /= sizeof(pixel);\
  742. src -= 2*srcStride;\
  743. for(i=0; i<h+5; i++)\
  744. {\
  745. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
  746. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
  747. tmp+=tmpStride;\
  748. src+=srcStride;\
  749. }\
  750. tmp -= tmpStride*(h+5-2);\
  751. for(i=0; i<w; i++)\
  752. {\
  753. const int tmpB= tmp[-2*tmpStride] - pad;\
  754. const int tmpA= tmp[-1*tmpStride] - pad;\
  755. const int tmp0= tmp[0 *tmpStride] - pad;\
  756. const int tmp1= tmp[1 *tmpStride] - pad;\
  757. const int tmp2= tmp[2 *tmpStride] - pad;\
  758. const int tmp3= tmp[3 *tmpStride] - pad;\
  759. const int tmp4= tmp[4 *tmpStride] - pad;\
  760. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  761. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  762. dst++;\
  763. tmp++;\
  764. }\
  765. }\
  766. static void FUNC(OPNAME ## h264_qpel4_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  767. const int h=4;\
  768. INIT_CLIP\
  769. int i;\
  770. pixel *dst = (pixel*)_dst;\
  771. pixel *src = (pixel*)_src;\
  772. dstStride /= sizeof(pixel);\
  773. srcStride /= sizeof(pixel);\
  774. for(i=0; i<h; i++)\
  775. {\
  776. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
  777. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
  778. OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
  779. OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
  780. dst+=dstStride;\
  781. src+=srcStride;\
  782. }\
  783. }\
  784. \
  785. static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  786. const int w=4;\
  787. INIT_CLIP\
  788. int i;\
  789. pixel *dst = (pixel*)_dst;\
  790. pixel *src = (pixel*)_src;\
  791. dstStride /= sizeof(pixel);\
  792. srcStride /= sizeof(pixel);\
  793. for(i=0; i<w; i++)\
  794. {\
  795. const int srcB= src[-2*srcStride];\
  796. const int srcA= src[-1*srcStride];\
  797. const int src0= src[0 *srcStride];\
  798. const int src1= src[1 *srcStride];\
  799. const int src2= src[2 *srcStride];\
  800. const int src3= src[3 *srcStride];\
  801. const int src4= src[4 *srcStride];\
  802. const int src5= src[5 *srcStride];\
  803. const int src6= src[6 *srcStride];\
  804. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  805. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  806. OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
  807. OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
  808. dst++;\
  809. src++;\
  810. }\
  811. }\
  812. \
  813. static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
  814. const int h=4;\
  815. const int w=4;\
  816. const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  817. INIT_CLIP\
  818. int i;\
  819. pixel *dst = (pixel*)_dst;\
  820. pixel *src = (pixel*)_src;\
  821. dstStride /= sizeof(pixel);\
  822. srcStride /= sizeof(pixel);\
  823. src -= 2*srcStride;\
  824. for(i=0; i<h+5; i++)\
  825. {\
  826. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
  827. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
  828. tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]) + pad;\
  829. tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]) + pad;\
  830. tmp+=tmpStride;\
  831. src+=srcStride;\
  832. }\
  833. tmp -= tmpStride*(h+5-2);\
  834. for(i=0; i<w; i++)\
  835. {\
  836. const int tmpB= tmp[-2*tmpStride] - pad;\
  837. const int tmpA= tmp[-1*tmpStride] - pad;\
  838. const int tmp0= tmp[0 *tmpStride] - pad;\
  839. const int tmp1= tmp[1 *tmpStride] - pad;\
  840. const int tmp2= tmp[2 *tmpStride] - pad;\
  841. const int tmp3= tmp[3 *tmpStride] - pad;\
  842. const int tmp4= tmp[4 *tmpStride] - pad;\
  843. const int tmp5= tmp[5 *tmpStride] - pad;\
  844. const int tmp6= tmp[6 *tmpStride] - pad;\
  845. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  846. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  847. OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
  848. OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
  849. dst++;\
  850. tmp++;\
  851. }\
  852. }\
  853. \
  854. static void FUNC(OPNAME ## h264_qpel8_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  855. const int h=8;\
  856. INIT_CLIP\
  857. int i;\
  858. pixel *dst = (pixel*)_dst;\
  859. pixel *src = (pixel*)_src;\
  860. dstStride /= sizeof(pixel);\
  861. srcStride /= sizeof(pixel);\
  862. for(i=0; i<h; i++)\
  863. {\
  864. OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
  865. OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
  866. OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
  867. OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
  868. OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
  869. OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
  870. OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
  871. OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
  872. dst+=dstStride;\
  873. src+=srcStride;\
  874. }\
  875. }\
  876. \
  877. static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\
  878. const int w=8;\
  879. INIT_CLIP\
  880. int i;\
  881. pixel *dst = (pixel*)_dst;\
  882. pixel *src = (pixel*)_src;\
  883. dstStride /= sizeof(pixel);\
  884. srcStride /= sizeof(pixel);\
  885. for(i=0; i<w; i++)\
  886. {\
  887. const int srcB= src[-2*srcStride];\
  888. const int srcA= src[-1*srcStride];\
  889. const int src0= src[0 *srcStride];\
  890. const int src1= src[1 *srcStride];\
  891. const int src2= src[2 *srcStride];\
  892. const int src3= src[3 *srcStride];\
  893. const int src4= src[4 *srcStride];\
  894. const int src5= src[5 *srcStride];\
  895. const int src6= src[6 *srcStride];\
  896. const int src7= src[7 *srcStride];\
  897. const int src8= src[8 *srcStride];\
  898. const int src9= src[9 *srcStride];\
  899. const int src10=src[10*srcStride];\
  900. OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
  901. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
  902. OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
  903. OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
  904. OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
  905. OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
  906. OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
  907. OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
  908. dst++;\
  909. src++;\
  910. }\
  911. }\
  912. \
  913. static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
  914. const int h=8;\
  915. const int w=8;\
  916. const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
  917. INIT_CLIP\
  918. int i;\
  919. pixel *dst = (pixel*)_dst;\
  920. pixel *src = (pixel*)_src;\
  921. dstStride /= sizeof(pixel);\
  922. srcStride /= sizeof(pixel);\
  923. src -= 2*srcStride;\
  924. for(i=0; i<h+5; i++)\
  925. {\
  926. tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]) + pad;\
  927. tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]) + pad;\
  928. tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]) + pad;\
  929. tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]) + pad;\
  930. tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]) + pad;\
  931. tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]) + pad;\
  932. tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]) + pad;\
  933. tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]) + pad;\
  934. tmp+=tmpStride;\
  935. src+=srcStride;\
  936. }\
  937. tmp -= tmpStride*(h+5-2);\
  938. for(i=0; i<w; i++)\
  939. {\
  940. const int tmpB= tmp[-2*tmpStride] - pad;\
  941. const int tmpA= tmp[-1*tmpStride] - pad;\
  942. const int tmp0= tmp[0 *tmpStride] - pad;\
  943. const int tmp1= tmp[1 *tmpStride] - pad;\
  944. const int tmp2= tmp[2 *tmpStride] - pad;\
  945. const int tmp3= tmp[3 *tmpStride] - pad;\
  946. const int tmp4= tmp[4 *tmpStride] - pad;\
  947. const int tmp5= tmp[5 *tmpStride] - pad;\
  948. const int tmp6= tmp[6 *tmpStride] - pad;\
  949. const int tmp7= tmp[7 *tmpStride] - pad;\
  950. const int tmp8= tmp[8 *tmpStride] - pad;\
  951. const int tmp9= tmp[9 *tmpStride] - pad;\
  952. const int tmp10=tmp[10*tmpStride] - pad;\
  953. OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
  954. OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
  955. OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
  956. OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
  957. OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
  958. OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
  959. OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
  960. OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
  961. dst++;\
  962. tmp++;\
  963. }\
  964. }\
  965. \
  966. static void FUNC(OPNAME ## h264_qpel16_v_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
  967. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
  968. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  969. src += 8*srcStride;\
  970. dst += 8*dstStride;\
  971. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
  972. FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  973. }\
  974. \
  975. static void FUNC(OPNAME ## h264_qpel16_h_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
  976. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
  977. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  978. src += 8*srcStride;\
  979. dst += 8*dstStride;\
  980. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
  981. FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
  982. }\
  983. \
  984. static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
  985. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
  986. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
  987. src += 8*srcStride;\
  988. dst += 8*dstStride;\
  989. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
  990. FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
  991. }\
  992. #define H264_MC(OPNAME, SIZE) \
  993. static av_unused void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc00)(uint8_t *dst, uint8_t *src, int stride){\
  994. FUNCC(OPNAME ## pixels ## SIZE)(dst, src, stride, SIZE);\
  995. }\
  996. \
  997. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc10)(uint8_t *dst, uint8_t *src, int stride){\
  998. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  999. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
  1000. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src, half, stride, stride, SIZE*sizeof(pixel), SIZE);\
  1001. }\
  1002. \
  1003. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc20)(uint8_t *dst, uint8_t *src, int stride){\
  1004. FUNC(OPNAME ## h264_qpel ## SIZE ## _h_lowpass)(dst, src, stride, stride);\
  1005. }\
  1006. \
  1007. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc30)(uint8_t *dst, uint8_t *src, int stride){\
  1008. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1009. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
  1010. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src+sizeof(pixel), half, stride, stride, SIZE*sizeof(pixel), SIZE);\
  1011. }\
  1012. \
  1013. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, uint8_t *src, int stride){\
  1014. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1015. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1016. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1017. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1018. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1019. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid, half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1020. }\
  1021. \
  1022. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, uint8_t *src, int stride){\
  1023. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1024. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1025. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1026. FUNC(OPNAME ## h264_qpel ## SIZE ## _v_lowpass)(dst, full_mid, stride, SIZE*sizeof(pixel));\
  1027. }\
  1028. \
  1029. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, uint8_t *src, int stride){\
  1030. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1031. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1032. uint8_t half[SIZE*SIZE*sizeof(pixel)];\
  1033. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1034. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1035. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid+SIZE*sizeof(pixel), half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1036. }\
  1037. \
  1038. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, uint8_t *src, int stride){\
  1039. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1040. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1041. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1042. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1043. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1044. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1045. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1046. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1047. }\
  1048. \
  1049. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, uint8_t *src, int stride){\
  1050. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1051. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1052. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1053. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1054. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1055. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1056. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1057. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1058. }\
  1059. \
  1060. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, uint8_t *src, int stride){\
  1061. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1062. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1063. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1064. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1065. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1066. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1067. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1068. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1069. }\
  1070. \
  1071. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, uint8_t *src, int stride){\
  1072. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1073. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1074. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1075. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1076. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1077. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1078. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1079. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1080. }\
  1081. \
  1082. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, uint8_t *src, int stride){\
  1083. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1084. FUNC(OPNAME ## h264_qpel ## SIZE ## _hv_lowpass)(dst, tmp, src, stride, SIZE*sizeof(pixel), stride);\
  1085. }\
  1086. \
  1087. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, uint8_t *src, int stride){\
  1088. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1089. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1090. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1091. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
  1092. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1093. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1094. }\
  1095. \
  1096. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, uint8_t *src, int stride){\
  1097. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1098. uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
  1099. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1100. FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
  1101. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1102. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1103. }\
  1104. \
  1105. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, uint8_t *src, int stride){\
  1106. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1107. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1108. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1109. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1110. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1111. FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
  1112. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1113. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1114. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1115. }\
  1116. \
  1117. static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, uint8_t *src, int stride){\
  1118. uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
  1119. uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
  1120. int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
  1121. uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
  1122. uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
  1123. FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
  1124. FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
  1125. FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
  1126. FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
  1127. }\
  1128. #define op_avg(a, b) a = (((a)+CLIP(((b) + 16)>>5)+1)>>1)
  1129. //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
  1130. #define op_put(a, b) a = CLIP(((b) + 16)>>5)
  1131. #define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
  1132. #define op2_put(a, b) a = CLIP(((b) + 512)>>10)
  1133. H264_LOWPASS(put_ , op_put, op2_put)
  1134. H264_LOWPASS(avg_ , op_avg, op2_avg)
  1135. H264_MC(put_, 2)
  1136. H264_MC(put_, 4)
  1137. H264_MC(put_, 8)
  1138. H264_MC(put_, 16)
  1139. H264_MC(avg_, 4)
  1140. H264_MC(avg_, 8)
  1141. H264_MC(avg_, 16)
  1142. #undef op_avg
  1143. #undef op_put
  1144. #undef op2_avg
  1145. #undef op2_put
  1146. #if BIT_DEPTH == 8
  1147. # define put_h264_qpel8_mc00_8_c ff_put_pixels8x8_8_c
  1148. # define avg_h264_qpel8_mc00_8_c ff_avg_pixels8x8_8_c
  1149. # define put_h264_qpel16_mc00_8_c ff_put_pixels16x16_8_c
  1150. # define avg_h264_qpel16_mc00_8_c ff_avg_pixels16x16_8_c
  1151. #elif BIT_DEPTH == 9
  1152. # define put_h264_qpel8_mc00_9_c ff_put_pixels8x8_9_c
  1153. # define avg_h264_qpel8_mc00_9_c ff_avg_pixels8x8_9_c
  1154. # define put_h264_qpel16_mc00_9_c ff_put_pixels16x16_9_c
  1155. # define avg_h264_qpel16_mc00_9_c ff_avg_pixels16x16_9_c
  1156. #elif BIT_DEPTH == 10
  1157. # define put_h264_qpel8_mc00_10_c ff_put_pixels8x8_10_c
  1158. # define avg_h264_qpel8_mc00_10_c ff_avg_pixels8x8_10_c
  1159. # define put_h264_qpel16_mc00_10_c ff_put_pixels16x16_10_c
  1160. # define avg_h264_qpel16_mc00_10_c ff_avg_pixels16x16_10_c
  1161. #endif
  1162. void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
  1163. FUNCC(put_pixels8)(dst, src, stride, 8);
  1164. }
  1165. void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
  1166. FUNCC(avg_pixels8)(dst, src, stride, 8);
  1167. }
  1168. void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
  1169. FUNCC(put_pixels16)(dst, src, stride, 16);
  1170. }
  1171. void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
  1172. FUNCC(avg_pixels16)(dst, src, stride, 16);
  1173. }